aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bio.c6
-rw-r--r--fs/block_dev.c37
-rw-r--r--fs/btrfs/backref.c28
-rw-r--r--fs/btrfs/backref.h4
-rw-r--r--fs/btrfs/ctree.c70
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/extent_io.c4
-rw-r--r--fs/btrfs/inode.c7
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/qgroup.c17
-rw-r--r--fs/btrfs/send.c156
-rw-r--r--fs/btrfs/transaction.c2
-rw-r--r--fs/btrfs/volumes.c7
-rw-r--r--fs/ceph/export.c2
-rw-r--r--fs/char_dev.c18
-rw-r--r--fs/cifs/cifsacl.c49
-rw-r--r--fs/cifs/dir.c11
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/coredump.c5
-rw-r--r--fs/eventpoll.c38
-rw-r--r--fs/exec.c3
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext3/balloc.c5
-rw-r--r--fs/ext3/namei.c40
-rw-r--r--fs/ext3/namei.h19
-rw-r--r--fs/ext3/super.c4
-rw-r--r--fs/ext4/balloc.c8
-rw-r--r--fs/ext4/bitmap.c6
-rw-r--r--fs/ext4/ext4.h7
-rw-r--r--fs/ext4/ext4_jbd2.c8
-rw-r--r--fs/ext4/extents.c60
-rw-r--r--fs/ext4/ialloc.c23
-rw-r--r--fs/ext4/mballoc.c14
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/file.c5
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/gfs2/file.c14
-rw-r--r--fs/gfs2/lops.c16
-rw-r--r--fs/gfs2/quota.c7
-rw-r--r--fs/gfs2/rgrp.c33
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/trans.c8
-rw-r--r--fs/inode.c16
-rw-r--r--fs/internal.h1
-rw-r--r--fs/jbd/transaction.c2
-rw-r--r--fs/jffs2/file.c39
-rw-r--r--fs/jfs/jfs_discard.c16
-rw-r--r--fs/lockd/clntxdr.c2
-rw-r--r--fs/lockd/mon.c57
-rw-r--r--fs/lockd/svcproc.c3
-rw-r--r--fs/namei.c4
-rw-r--r--fs/nfs/callback.c2
-rw-r--r--fs/nfs/dns_resolve.c5
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/internal.h6
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/namespace.c19
-rw-r--r--fs/nfs/nfs4filelayout.c21
-rw-r--r--fs/nfs/nfs4filelayout.h1
-rw-r--r--fs/nfs/nfs4filelayoutdev.c22
-rw-r--r--fs/nfs/nfs4getroot.c1
-rw-r--r--fs/nfs/nfs4namespace.c3
-rw-r--r--fs/nfs/nfs4proc.c46
-rw-r--r--fs/nfs/objlayout/objio_osd.c6
-rw-r--r--fs/nfs/pnfs.c4
-rw-r--r--fs/nfs/pnfs.h1
-rw-r--r--fs/nfs/super.c51
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/notify/fanotify/fanotify.c1
-rw-r--r--fs/notify/fanotify/fanotify_user.c3
-rw-r--r--fs/proc/base.c114
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/stat.c14
-rw-r--r--fs/proc/task_mmu.c53
-rw-r--r--fs/pstore/platform.c3
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/reiserfs/inode.c10
-rw-r--r--fs/reiserfs/stree.c4
-rw-r--r--fs/reiserfs/super.c60
-rw-r--r--fs/sysfs/dir.c16
-rw-r--r--fs/ubifs/find.c12
-rw-r--r--fs/ubifs/lprops.c6
-rw-r--r--fs/ubifs/ubifs.h3
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/xfs_alloc.c43
-rw-r--r--fs/xfs/xfs_alloc.h3
-rw-r--r--fs/xfs/xfs_alloc_btree.c2
-rw-r--r--fs/xfs/xfs_aops.c54
-rw-r--r--fs/xfs/xfs_attr_leaf.c20
-rw-r--r--fs/xfs/xfs_bmap.c63
-rw-r--r--fs/xfs/xfs_bmap.h9
-rw-r--r--fs/xfs/xfs_buf.c14
-rw-r--r--fs/xfs/xfs_buf_item.c18
-rw-r--r--fs/xfs/xfs_fsops.c21
-rw-r--r--fs/xfs/xfs_ialloc.c1
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_ioctl.c2
-rw-r--r--fs/xfs/xfs_iomap.c4
-rw-r--r--fs/xfs/xfs_log.c19
-rw-r--r--fs/xfs/xfs_log_recover.c2
101 files changed, 1154 insertions, 541 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 9298c65ad9c7..b96fc6ce4855 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -75,6 +75,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
75 unsigned int sz = sizeof(struct bio) + extra_size; 75 unsigned int sz = sizeof(struct bio) + extra_size;
76 struct kmem_cache *slab = NULL; 76 struct kmem_cache *slab = NULL;
77 struct bio_slab *bslab, *new_bio_slabs; 77 struct bio_slab *bslab, *new_bio_slabs;
78 unsigned int new_bio_slab_max;
78 unsigned int i, entry = -1; 79 unsigned int i, entry = -1;
79 80
80 mutex_lock(&bio_slab_lock); 81 mutex_lock(&bio_slab_lock);
@@ -97,12 +98,13 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
97 goto out_unlock; 98 goto out_unlock;
98 99
99 if (bio_slab_nr == bio_slab_max && entry == -1) { 100 if (bio_slab_nr == bio_slab_max && entry == -1) {
100 bio_slab_max <<= 1; 101 new_bio_slab_max = bio_slab_max << 1;
101 new_bio_slabs = krealloc(bio_slabs, 102 new_bio_slabs = krealloc(bio_slabs,
102 bio_slab_max * sizeof(struct bio_slab), 103 new_bio_slab_max * sizeof(struct bio_slab),
103 GFP_KERNEL); 104 GFP_KERNEL);
104 if (!new_bio_slabs) 105 if (!new_bio_slabs)
105 goto out_unlock; 106 goto out_unlock;
107 bio_slab_max = new_bio_slab_max;
106 bio_slabs = new_bio_slabs; 108 bio_slabs = new_bio_slabs;
107 } 109 }
108 if (entry == -1) 110 if (entry == -1)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b3c1d3dae77d..1a1e5e3b1eaf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1661,6 +1661,39 @@ static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
1661 return ret; 1661 return ret;
1662} 1662}
1663 1663
1664static ssize_t blkdev_splice_read(struct file *file, loff_t *ppos,
1665 struct pipe_inode_info *pipe, size_t len,
1666 unsigned int flags)
1667{
1668 ssize_t ret;
1669 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1670
1671 percpu_down_read(&bdev->bd_block_size_semaphore);
1672
1673 ret = generic_file_splice_read(file, ppos, pipe, len, flags);
1674
1675 percpu_up_read(&bdev->bd_block_size_semaphore);
1676
1677 return ret;
1678}
1679
1680static ssize_t blkdev_splice_write(struct pipe_inode_info *pipe,
1681 struct file *file, loff_t *ppos, size_t len,
1682 unsigned int flags)
1683{
1684 ssize_t ret;
1685 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1686
1687 percpu_down_read(&bdev->bd_block_size_semaphore);
1688
1689 ret = generic_file_splice_write(pipe, file, ppos, len, flags);
1690
1691 percpu_up_read(&bdev->bd_block_size_semaphore);
1692
1693 return ret;
1694}
1695
1696
1664/* 1697/*
1665 * Try to release a page associated with block device when the system 1698 * Try to release a page associated with block device when the system
1666 * is under memory pressure. 1699 * is under memory pressure.
@@ -1699,8 +1732,8 @@ const struct file_operations def_blk_fops = {
1699#ifdef CONFIG_COMPAT 1732#ifdef CONFIG_COMPAT
1700 .compat_ioctl = compat_blkdev_ioctl, 1733 .compat_ioctl = compat_blkdev_ioctl,
1701#endif 1734#endif
1702 .splice_read = generic_file_splice_read, 1735 .splice_read = blkdev_splice_read,
1703 .splice_write = generic_file_splice_write, 1736 .splice_write = blkdev_splice_write,
1704}; 1737};
1705 1738
1706int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1739int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f3187938e081..208d8aa5b07e 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -283,9 +283,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
283 goto out; 283 goto out;
284 } 284 }
285 285
286 rcu_read_lock(); 286 root_level = btrfs_old_root_level(root, time_seq);
287 root_level = btrfs_header_level(root->node);
288 rcu_read_unlock();
289 287
290 if (root_level + 1 == level) 288 if (root_level + 1 == level)
291 goto out; 289 goto out;
@@ -1177,16 +1175,15 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
1177 return ret; 1175 return ret;
1178} 1176}
1179 1177
1180static char *ref_to_path(struct btrfs_root *fs_root, 1178char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1181 struct btrfs_path *path, 1179 u32 name_len, unsigned long name_off,
1182 u32 name_len, unsigned long name_off, 1180 struct extent_buffer *eb_in, u64 parent,
1183 struct extent_buffer *eb_in, u64 parent, 1181 char *dest, u32 size)
1184 char *dest, u32 size)
1185{ 1182{
1186 int slot; 1183 int slot;
1187 u64 next_inum; 1184 u64 next_inum;
1188 int ret; 1185 int ret;
1189 s64 bytes_left = size - 1; 1186 s64 bytes_left = ((s64)size) - 1;
1190 struct extent_buffer *eb = eb_in; 1187 struct extent_buffer *eb = eb_in;
1191 struct btrfs_key found_key; 1188 struct btrfs_key found_key;
1192 int leave_spinning = path->leave_spinning; 1189 int leave_spinning = path->leave_spinning;
@@ -1266,10 +1263,10 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root,
1266 struct extent_buffer *eb_in, u64 parent, 1263 struct extent_buffer *eb_in, u64 parent,
1267 char *dest, u32 size) 1264 char *dest, u32 size)
1268{ 1265{
1269 return ref_to_path(fs_root, path, 1266 return btrfs_ref_to_path(fs_root, path,
1270 btrfs_inode_ref_name_len(eb_in, iref), 1267 btrfs_inode_ref_name_len(eb_in, iref),
1271 (unsigned long)(iref + 1), 1268 (unsigned long)(iref + 1),
1272 eb_in, parent, dest, size); 1269 eb_in, parent, dest, size);
1273} 1270}
1274 1271
1275/* 1272/*
@@ -1715,9 +1712,8 @@ static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
1715 ipath->fspath->bytes_left - s_ptr : 0; 1712 ipath->fspath->bytes_left - s_ptr : 0;
1716 1713
1717 fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; 1714 fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr;
1718 fspath = ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len, 1715 fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len,
1719 name_off, eb, inum, fspath_min, 1716 name_off, eb, inum, fspath_min, bytes_left);
1720 bytes_left);
1721 if (IS_ERR(fspath)) 1717 if (IS_ERR(fspath))
1722 return PTR_ERR(fspath); 1718 return PTR_ERR(fspath);
1723 1719
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index e75533043a5f..d61feca79455 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -62,6 +62,10 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
62char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 62char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
63 struct btrfs_inode_ref *iref, struct extent_buffer *eb, 63 struct btrfs_inode_ref *iref, struct extent_buffer *eb,
64 u64 parent, char *dest, u32 size); 64 u64 parent, char *dest, u32 size);
65char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
66 u32 name_len, unsigned long name_off,
67 struct extent_buffer *eb_in, u64 parent,
68 char *dest, u32 size);
65 69
66struct btrfs_data_container *init_data_container(u32 total_bytes); 70struct btrfs_data_container *init_data_container(u32 total_bytes);
67struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, 71struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b33436211000..cdfb4c49a806 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -596,6 +596,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
596 if (tree_mod_dont_log(fs_info, eb)) 596 if (tree_mod_dont_log(fs_info, eb))
597 return 0; 597 return 0;
598 598
599 /*
600 * When we override something during the move, we log these removals.
601 * This can only happen when we move towards the beginning of the
602 * buffer, i.e. dst_slot < src_slot.
603 */
599 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { 604 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
600 ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, 605 ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
601 MOD_LOG_KEY_REMOVE_WHILE_MOVING); 606 MOD_LOG_KEY_REMOVE_WHILE_MOVING);
@@ -647,8 +652,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
647 if (tree_mod_dont_log(fs_info, NULL)) 652 if (tree_mod_dont_log(fs_info, NULL))
648 return 0; 653 return 0;
649 654
650 __tree_mod_log_free_eb(fs_info, old_root);
651
652 ret = tree_mod_alloc(fs_info, flags, &tm); 655 ret = tree_mod_alloc(fs_info, flags, &tm);
653 if (ret < 0) 656 if (ret < 0)
654 goto out; 657 goto out;
@@ -926,12 +929,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
926 ret = btrfs_dec_ref(trans, root, buf, 1, 1); 929 ret = btrfs_dec_ref(trans, root, buf, 1, 1);
927 BUG_ON(ret); /* -ENOMEM */ 930 BUG_ON(ret); /* -ENOMEM */
928 } 931 }
929 /* 932 tree_mod_log_free_eb(root->fs_info, buf);
930 * don't log freeing in case we're freeing the root node, this
931 * is done by tree_mod_log_set_root_pointer later
932 */
933 if (buf != root->node && btrfs_header_level(buf) != 0)
934 tree_mod_log_free_eb(root->fs_info, buf);
935 clean_tree_block(trans, root, buf); 933 clean_tree_block(trans, root, buf);
936 *last_ref = 1; 934 *last_ref = 1;
937 } 935 }
@@ -1225,6 +1223,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1225 free_extent_buffer(eb); 1223 free_extent_buffer(eb);
1226 1224
1227 __tree_mod_log_rewind(eb_rewin, time_seq, tm); 1225 __tree_mod_log_rewind(eb_rewin, time_seq, tm);
1226 WARN_ON(btrfs_header_nritems(eb_rewin) >
1227 BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root));
1228 1228
1229 return eb_rewin; 1229 return eb_rewin;
1230} 1230}
@@ -1241,9 +1241,11 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1241{ 1241{
1242 struct tree_mod_elem *tm; 1242 struct tree_mod_elem *tm;
1243 struct extent_buffer *eb; 1243 struct extent_buffer *eb;
1244 struct extent_buffer *old;
1244 struct tree_mod_root *old_root = NULL; 1245 struct tree_mod_root *old_root = NULL;
1245 u64 old_generation = 0; 1246 u64 old_generation = 0;
1246 u64 logical; 1247 u64 logical;
1248 u32 blocksize;
1247 1249
1248 eb = btrfs_read_lock_root_node(root); 1250 eb = btrfs_read_lock_root_node(root);
1249 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); 1251 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
@@ -1259,14 +1261,32 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1259 } 1261 }
1260 1262
1261 tm = tree_mod_log_search(root->fs_info, logical, time_seq); 1263 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
1262 if (old_root) 1264 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1265 btrfs_tree_read_unlock(root->node);
1266 free_extent_buffer(root->node);
1267 blocksize = btrfs_level_size(root, old_root->level);
1268 old = read_tree_block(root, logical, blocksize, 0);
1269 if (!old) {
1270 pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
1271 logical);
1272 WARN_ON(1);
1273 } else {
1274 eb = btrfs_clone_extent_buffer(old);
1275 free_extent_buffer(old);
1276 }
1277 } else if (old_root) {
1278 btrfs_tree_read_unlock(root->node);
1279 free_extent_buffer(root->node);
1263 eb = alloc_dummy_extent_buffer(logical, root->nodesize); 1280 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
1264 else 1281 } else {
1265 eb = btrfs_clone_extent_buffer(root->node); 1282 eb = btrfs_clone_extent_buffer(root->node);
1266 btrfs_tree_read_unlock(root->node); 1283 btrfs_tree_read_unlock(root->node);
1267 free_extent_buffer(root->node); 1284 free_extent_buffer(root->node);
1285 }
1286
1268 if (!eb) 1287 if (!eb)
1269 return NULL; 1288 return NULL;
1289 extent_buffer_get(eb);
1270 btrfs_tree_read_lock(eb); 1290 btrfs_tree_read_lock(eb);
1271 if (old_root) { 1291 if (old_root) {
1272 btrfs_set_header_bytenr(eb, eb->start); 1292 btrfs_set_header_bytenr(eb, eb->start);
@@ -1279,11 +1299,28 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1279 __tree_mod_log_rewind(eb, time_seq, tm); 1299 __tree_mod_log_rewind(eb, time_seq, tm);
1280 else 1300 else
1281 WARN_ON(btrfs_header_level(eb) != 0); 1301 WARN_ON(btrfs_header_level(eb) != 0);
1282 extent_buffer_get(eb); 1302 WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root));
1283 1303
1284 return eb; 1304 return eb;
1285} 1305}
1286 1306
1307int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
1308{
1309 struct tree_mod_elem *tm;
1310 int level;
1311
1312 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
1313 if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
1314 level = tm->old_root.level;
1315 } else {
1316 rcu_read_lock();
1317 level = btrfs_header_level(root->node);
1318 rcu_read_unlock();
1319 }
1320
1321 return level;
1322}
1323
1287static inline int should_cow_block(struct btrfs_trans_handle *trans, 1324static inline int should_cow_block(struct btrfs_trans_handle *trans,
1288 struct btrfs_root *root, 1325 struct btrfs_root *root,
1289 struct extent_buffer *buf) 1326 struct extent_buffer *buf)
@@ -1725,6 +1762,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1725 goto enospc; 1762 goto enospc;
1726 } 1763 }
1727 1764
1765 tree_mod_log_free_eb(root->fs_info, root->node);
1728 tree_mod_log_set_root_pointer(root, child); 1766 tree_mod_log_set_root_pointer(root, child);
1729 rcu_assign_pointer(root->node, child); 1767 rcu_assign_pointer(root->node, child);
1730 1768
@@ -2970,8 +3008,10 @@ static int push_node_left(struct btrfs_trans_handle *trans,
2970 push_items * sizeof(struct btrfs_key_ptr)); 3008 push_items * sizeof(struct btrfs_key_ptr));
2971 3009
2972 if (push_items < src_nritems) { 3010 if (push_items < src_nritems) {
2973 tree_mod_log_eb_move(root->fs_info, src, 0, push_items, 3011 /*
2974 src_nritems - push_items); 3012 * don't call tree_mod_log_eb_move here, key removal was already
3013 * fully logged by tree_mod_log_eb_copy above.
3014 */
2975 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), 3015 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2976 btrfs_node_key_ptr_offset(push_items), 3016 btrfs_node_key_ptr_offset(push_items),
2977 (src_nritems - push_items) * 3017 (src_nritems - push_items) *
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 926c9ffc66d9..c72ead869507 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3120,6 +3120,7 @@ static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
3120{ 3120{
3121 return atomic_inc_return(&fs_info->tree_mod_seq); 3121 return atomic_inc_return(&fs_info->tree_mod_seq);
3122} 3122}
3123int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
3123 3124
3124/* root-item.c */ 3125/* root-item.c */
3125int btrfs_find_root_ref(struct btrfs_root *tree_root, 3126int btrfs_find_root_ref(struct btrfs_root *tree_root,
@@ -3338,6 +3339,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
3338int btrfs_update_inode(struct btrfs_trans_handle *trans, 3339int btrfs_update_inode(struct btrfs_trans_handle *trans,
3339 struct btrfs_root *root, 3340 struct btrfs_root *root,
3340 struct inode *inode); 3341 struct inode *inode);
3342int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
3343 struct btrfs_root *root, struct inode *inode);
3341int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); 3344int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
3342int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); 3345int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
3343int btrfs_orphan_cleanup(struct btrfs_root *root); 3346int btrfs_orphan_cleanup(struct btrfs_root *root);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8036d3a84853..472873a94d96 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4110,8 +4110,8 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
4110 4110
4111 return eb; 4111 return eb;
4112err: 4112err:
4113 for (i--; i >= 0; i--) 4113 for (; i > 0; i--)
4114 __free_page(eb->pages[i]); 4114 __free_page(eb->pages[i - 1]);
4115 __free_extent_buffer(eb); 4115 __free_extent_buffer(eb);
4116 return NULL; 4116 return NULL;
4117} 4117}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 85a1e5053fe6..95542a1b3dfc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -94,8 +94,6 @@ static noinline int cow_file_range(struct inode *inode,
94 struct page *locked_page, 94 struct page *locked_page,
95 u64 start, u64 end, int *page_started, 95 u64 start, u64 end, int *page_started,
96 unsigned long *nr_written, int unlock); 96 unsigned long *nr_written, int unlock);
97static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
98 struct btrfs_root *root, struct inode *inode);
99 97
100static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 98static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
101 struct inode *inode, struct inode *dir, 99 struct inode *inode, struct inode *dir,
@@ -2746,8 +2744,9 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2746 return btrfs_update_inode_item(trans, root, inode); 2744 return btrfs_update_inode_item(trans, root, inode);
2747} 2745}
2748 2746
2749static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, 2747noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
2750 struct btrfs_root *root, struct inode *inode) 2748 struct btrfs_root *root,
2749 struct inode *inode)
2751{ 2750{
2752 int ret; 2751 int ret;
2753 2752
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 61168805f175..8fcf9a59c28d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -343,7 +343,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
343 return -EOPNOTSUPP; 343 return -EOPNOTSUPP;
344 if (copy_from_user(&range, arg, sizeof(range))) 344 if (copy_from_user(&range, arg, sizeof(range)))
345 return -EFAULT; 345 return -EFAULT;
346 if (range.start > total_bytes) 346 if (range.start > total_bytes ||
347 range.len < fs_info->sb->s_blocksize)
347 return -EINVAL; 348 return -EINVAL;
348 349
349 range.len = min(range.len, total_bytes - range.start); 350 range.len = min(range.len, total_bytes - range.start);
@@ -570,7 +571,8 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
570 ret = btrfs_commit_transaction(trans, 571 ret = btrfs_commit_transaction(trans,
571 root->fs_info->extent_root); 572 root->fs_info->extent_root);
572 } 573 }
573 BUG_ON(ret); 574 if (ret)
575 goto fail;
574 576
575 ret = pending_snapshot->error; 577 ret = pending_snapshot->error;
576 if (ret) 578 if (ret)
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5039686df6ae..fe9d02c45f8e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -790,8 +790,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
790 } 790 }
791 791
792 path = btrfs_alloc_path(); 792 path = btrfs_alloc_path();
793 if (!path) 793 if (!path) {
794 return -ENOMEM; 794 ret = -ENOMEM;
795 goto out_free_root;
796 }
795 797
796 key.objectid = 0; 798 key.objectid = 0;
797 key.type = BTRFS_QGROUP_STATUS_KEY; 799 key.type = BTRFS_QGROUP_STATUS_KEY;
@@ -800,7 +802,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
800 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 802 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
801 sizeof(*ptr)); 803 sizeof(*ptr));
802 if (ret) 804 if (ret)
803 goto out; 805 goto out_free_path;
804 806
805 leaf = path->nodes[0]; 807 leaf = path->nodes[0];
806 ptr = btrfs_item_ptr(leaf, path->slots[0], 808 ptr = btrfs_item_ptr(leaf, path->slots[0],
@@ -818,8 +820,15 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
818 fs_info->quota_root = quota_root; 820 fs_info->quota_root = quota_root;
819 fs_info->pending_quota_state = 1; 821 fs_info->pending_quota_state = 1;
820 spin_unlock(&fs_info->qgroup_lock); 822 spin_unlock(&fs_info->qgroup_lock);
821out: 823out_free_path:
822 btrfs_free_path(path); 824 btrfs_free_path(path);
825out_free_root:
826 if (ret) {
827 free_extent_buffer(quota_root->node);
828 free_extent_buffer(quota_root->commit_root);
829 kfree(quota_root);
830 }
831out:
823 return ret; 832 return ret;
824} 833}
825 834
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c7beb543a4a8..e78b297b0b00 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -745,31 +745,36 @@ typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
745 void *ctx); 745 void *ctx);
746 746
747/* 747/*
748 * Helper function to iterate the entries in ONE btrfs_inode_ref. 748 * Helper function to iterate the entries in ONE btrfs_inode_ref or
749 * btrfs_inode_extref.
749 * The iterate callback may return a non zero value to stop iteration. This can 750 * The iterate callback may return a non zero value to stop iteration. This can
750 * be a negative value for error codes or 1 to simply stop it. 751 * be a negative value for error codes or 1 to simply stop it.
751 * 752 *
752 * path must point to the INODE_REF when called. 753 * path must point to the INODE_REF or INODE_EXTREF when called.
753 */ 754 */
754static int iterate_inode_ref(struct send_ctx *sctx, 755static int iterate_inode_ref(struct send_ctx *sctx,
755 struct btrfs_root *root, struct btrfs_path *path, 756 struct btrfs_root *root, struct btrfs_path *path,
756 struct btrfs_key *found_key, int resolve, 757 struct btrfs_key *found_key, int resolve,
757 iterate_inode_ref_t iterate, void *ctx) 758 iterate_inode_ref_t iterate, void *ctx)
758{ 759{
759 struct extent_buffer *eb; 760 struct extent_buffer *eb = path->nodes[0];
760 struct btrfs_item *item; 761 struct btrfs_item *item;
761 struct btrfs_inode_ref *iref; 762 struct btrfs_inode_ref *iref;
763 struct btrfs_inode_extref *extref;
762 struct btrfs_path *tmp_path; 764 struct btrfs_path *tmp_path;
763 struct fs_path *p; 765 struct fs_path *p;
764 u32 cur; 766 u32 cur = 0;
765 u32 len;
766 u32 total; 767 u32 total;
767 int slot; 768 int slot = path->slots[0];
768 u32 name_len; 769 u32 name_len;
769 char *start; 770 char *start;
770 int ret = 0; 771 int ret = 0;
771 int num; 772 int num = 0;
772 int index; 773 int index;
774 u64 dir;
775 unsigned long name_off;
776 unsigned long elem_size;
777 unsigned long ptr;
773 778
774 p = fs_path_alloc_reversed(sctx); 779 p = fs_path_alloc_reversed(sctx);
775 if (!p) 780 if (!p)
@@ -781,24 +786,40 @@ static int iterate_inode_ref(struct send_ctx *sctx,
781 return -ENOMEM; 786 return -ENOMEM;
782 } 787 }
783 788
784 eb = path->nodes[0];
785 slot = path->slots[0];
786 item = btrfs_item_nr(eb, slot);
787 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
788 cur = 0;
789 len = 0;
790 total = btrfs_item_size(eb, item);
791 789
792 num = 0; 790 if (found_key->type == BTRFS_INODE_REF_KEY) {
791 ptr = (unsigned long)btrfs_item_ptr(eb, slot,
792 struct btrfs_inode_ref);
793 item = btrfs_item_nr(eb, slot);
794 total = btrfs_item_size(eb, item);
795 elem_size = sizeof(*iref);
796 } else {
797 ptr = btrfs_item_ptr_offset(eb, slot);
798 total = btrfs_item_size_nr(eb, slot);
799 elem_size = sizeof(*extref);
800 }
801
793 while (cur < total) { 802 while (cur < total) {
794 fs_path_reset(p); 803 fs_path_reset(p);
795 804
796 name_len = btrfs_inode_ref_name_len(eb, iref); 805 if (found_key->type == BTRFS_INODE_REF_KEY) {
797 index = btrfs_inode_ref_index(eb, iref); 806 iref = (struct btrfs_inode_ref *)(ptr + cur);
807 name_len = btrfs_inode_ref_name_len(eb, iref);
808 name_off = (unsigned long)(iref + 1);
809 index = btrfs_inode_ref_index(eb, iref);
810 dir = found_key->offset;
811 } else {
812 extref = (struct btrfs_inode_extref *)(ptr + cur);
813 name_len = btrfs_inode_extref_name_len(eb, extref);
814 name_off = (unsigned long)&extref->name;
815 index = btrfs_inode_extref_index(eb, extref);
816 dir = btrfs_inode_extref_parent(eb, extref);
817 }
818
798 if (resolve) { 819 if (resolve) {
799 start = btrfs_iref_to_path(root, tmp_path, iref, eb, 820 start = btrfs_ref_to_path(root, tmp_path, name_len,
800 found_key->offset, p->buf, 821 name_off, eb, dir,
801 p->buf_len); 822 p->buf, p->buf_len);
802 if (IS_ERR(start)) { 823 if (IS_ERR(start)) {
803 ret = PTR_ERR(start); 824 ret = PTR_ERR(start);
804 goto out; 825 goto out;
@@ -809,9 +830,10 @@ static int iterate_inode_ref(struct send_ctx *sctx,
809 p->buf_len + p->buf - start); 830 p->buf_len + p->buf - start);
810 if (ret < 0) 831 if (ret < 0)
811 goto out; 832 goto out;
812 start = btrfs_iref_to_path(root, tmp_path, iref, 833 start = btrfs_ref_to_path(root, tmp_path,
813 eb, found_key->offset, p->buf, 834 name_len, name_off,
814 p->buf_len); 835 eb, dir,
836 p->buf, p->buf_len);
815 if (IS_ERR(start)) { 837 if (IS_ERR(start)) {
816 ret = PTR_ERR(start); 838 ret = PTR_ERR(start);
817 goto out; 839 goto out;
@@ -820,21 +842,16 @@ static int iterate_inode_ref(struct send_ctx *sctx,
820 } 842 }
821 p->start = start; 843 p->start = start;
822 } else { 844 } else {
823 ret = fs_path_add_from_extent_buffer(p, eb, 845 ret = fs_path_add_from_extent_buffer(p, eb, name_off,
824 (unsigned long)(iref + 1), name_len); 846 name_len);
825 if (ret < 0) 847 if (ret < 0)
826 goto out; 848 goto out;
827 } 849 }
828 850
829 851 cur += elem_size + name_len;
830 len = sizeof(*iref) + name_len; 852 ret = iterate(num, dir, index, p, ctx);
831 iref = (struct btrfs_inode_ref *)((char *)iref + len);
832 cur += len;
833
834 ret = iterate(num, found_key->offset, index, p, ctx);
835 if (ret) 853 if (ret)
836 goto out; 854 goto out;
837
838 num++; 855 num++;
839 } 856 }
840 857
@@ -998,7 +1015,8 @@ static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root,
998 } 1015 }
999 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); 1016 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1000 if (found_key.objectid != ino || 1017 if (found_key.objectid != ino ||
1001 found_key.type != BTRFS_INODE_REF_KEY) { 1018 (found_key.type != BTRFS_INODE_REF_KEY &&
1019 found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1002 ret = -ENOENT; 1020 ret = -ENOENT;
1003 goto out; 1021 goto out;
1004 } 1022 }
@@ -1551,8 +1569,8 @@ static int get_first_ref(struct send_ctx *sctx,
1551 struct btrfs_key key; 1569 struct btrfs_key key;
1552 struct btrfs_key found_key; 1570 struct btrfs_key found_key;
1553 struct btrfs_path *path; 1571 struct btrfs_path *path;
1554 struct btrfs_inode_ref *iref;
1555 int len; 1572 int len;
1573 u64 parent_dir;
1556 1574
1557 path = alloc_path_for_send(); 1575 path = alloc_path_for_send();
1558 if (!path) 1576 if (!path)
@@ -1568,27 +1586,41 @@ static int get_first_ref(struct send_ctx *sctx,
1568 if (!ret) 1586 if (!ret)
1569 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1587 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1570 path->slots[0]); 1588 path->slots[0]);
1571 if (ret || found_key.objectid != key.objectid || 1589 if (ret || found_key.objectid != ino ||
1572 found_key.type != key.type) { 1590 (found_key.type != BTRFS_INODE_REF_KEY &&
1591 found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1573 ret = -ENOENT; 1592 ret = -ENOENT;
1574 goto out; 1593 goto out;
1575 } 1594 }
1576 1595
1577 iref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1596 if (key.type == BTRFS_INODE_REF_KEY) {
1578 struct btrfs_inode_ref); 1597 struct btrfs_inode_ref *iref;
1579 len = btrfs_inode_ref_name_len(path->nodes[0], iref); 1598 iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1580 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1599 struct btrfs_inode_ref);
1581 (unsigned long)(iref + 1), len); 1600 len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1601 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1602 (unsigned long)(iref + 1),
1603 len);
1604 parent_dir = found_key.offset;
1605 } else {
1606 struct btrfs_inode_extref *extref;
1607 extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1608 struct btrfs_inode_extref);
1609 len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1610 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1611 (unsigned long)&extref->name, len);
1612 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1613 }
1582 if (ret < 0) 1614 if (ret < 0)
1583 goto out; 1615 goto out;
1584 btrfs_release_path(path); 1616 btrfs_release_path(path);
1585 1617
1586 ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL, 1618 ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL,
1587 NULL, NULL); 1619 NULL, NULL);
1588 if (ret < 0) 1620 if (ret < 0)
1589 goto out; 1621 goto out;
1590 1622
1591 *dir = found_key.offset; 1623 *dir = parent_dir;
1592 1624
1593out: 1625out:
1594 btrfs_free_path(path); 1626 btrfs_free_path(path);
@@ -2430,7 +2462,8 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino);
2430 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); 2462 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2431 } else if (S_ISCHR(mode) || S_ISBLK(mode) || 2463 } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2432 S_ISFIFO(mode) || S_ISSOCK(mode)) { 2464 S_ISFIFO(mode) || S_ISSOCK(mode)) {
2433 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, rdev); 2465 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2466 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2434 } 2467 }
2435 2468
2436 ret = send_cmd(sctx); 2469 ret = send_cmd(sctx);
@@ -3226,7 +3259,8 @@ static int process_all_refs(struct send_ctx *sctx,
3226 btrfs_item_key_to_cpu(eb, &found_key, slot); 3259 btrfs_item_key_to_cpu(eb, &found_key, slot);
3227 3260
3228 if (found_key.objectid != key.objectid || 3261 if (found_key.objectid != key.objectid ||
3229 found_key.type != key.type) 3262 (found_key.type != BTRFS_INODE_REF_KEY &&
3263 found_key.type != BTRFS_INODE_EXTREF_KEY))
3230 break; 3264 break;
3231 3265
3232 ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb, 3266 ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb,
@@ -3987,7 +4021,7 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
3987 if (sctx->cur_ino == 0) 4021 if (sctx->cur_ino == 0)
3988 goto out; 4022 goto out;
3989 if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && 4023 if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
3990 sctx->cmp_key->type <= BTRFS_INODE_REF_KEY) 4024 sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
3991 goto out; 4025 goto out;
3992 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 4026 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
3993 goto out; 4027 goto out;
@@ -4033,22 +4067,21 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4033 if (ret < 0) 4067 if (ret < 0)
4034 goto out; 4068 goto out;
4035 4069
4036 if (!S_ISLNK(sctx->cur_inode_mode)) { 4070 if (!sctx->parent_root || sctx->cur_inode_new) {
4037 if (!sctx->parent_root || sctx->cur_inode_new) { 4071 need_chown = 1;
4072 if (!S_ISLNK(sctx->cur_inode_mode))
4038 need_chmod = 1; 4073 need_chmod = 1;
4039 need_chown = 1; 4074 } else {
4040 } else { 4075 ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
4041 ret = get_inode_info(sctx->parent_root, sctx->cur_ino, 4076 NULL, NULL, &right_mode, &right_uid,
4042 NULL, NULL, &right_mode, &right_uid, 4077 &right_gid, NULL);
4043 &right_gid, NULL); 4078 if (ret < 0)
4044 if (ret < 0) 4079 goto out;
4045 goto out;
4046 4080
4047 if (left_uid != right_uid || left_gid != right_gid) 4081 if (left_uid != right_uid || left_gid != right_gid)
4048 need_chown = 1; 4082 need_chown = 1;
4049 if (left_mode != right_mode) 4083 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
4050 need_chmod = 1; 4084 need_chmod = 1;
4051 }
4052 } 4085 }
4053 4086
4054 if (S_ISREG(sctx->cur_inode_mode)) { 4087 if (S_ISREG(sctx->cur_inode_mode)) {
@@ -4335,7 +4368,8 @@ static int changed_cb(struct btrfs_root *left_root,
4335 4368
4336 if (key->type == BTRFS_INODE_ITEM_KEY) 4369 if (key->type == BTRFS_INODE_ITEM_KEY)
4337 ret = changed_inode(sctx, result); 4370 ret = changed_inode(sctx, result);
4338 else if (key->type == BTRFS_INODE_REF_KEY) 4371 else if (key->type == BTRFS_INODE_REF_KEY ||
4372 key->type == BTRFS_INODE_EXTREF_KEY)
4339 ret = changed_ref(sctx, result); 4373 ret = changed_ref(sctx, result);
4340 else if (key->type == BTRFS_XATTR_ITEM_KEY) 4374 else if (key->type == BTRFS_XATTR_ITEM_KEY)
4341 ret = changed_xattr(sctx, result); 4375 ret = changed_xattr(sctx, result);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 77db875b5116..04bbfb1052eb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1200,7 +1200,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1200 btrfs_i_size_write(parent_inode, parent_inode->i_size + 1200 btrfs_i_size_write(parent_inode, parent_inode->i_size +
1201 dentry->d_name.len * 2); 1201 dentry->d_name.len * 2);
1202 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; 1202 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1203 ret = btrfs_update_inode(trans, parent_root, parent_inode); 1203 ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
1204 if (ret) 1204 if (ret)
1205 btrfs_abort_transaction(trans, root, ret); 1205 btrfs_abort_transaction(trans, root, ret);
1206fail: 1206fail:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 029b903a4ae3..0f5ebb72a5ea 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1819,6 +1819,13 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1819 "Failed to relocate sys chunks after " 1819 "Failed to relocate sys chunks after "
1820 "device initialization. This can be fixed " 1820 "device initialization. This can be fixed "
1821 "using the \"btrfs balance\" command."); 1821 "using the \"btrfs balance\" command.");
1822 trans = btrfs_attach_transaction(root);
1823 if (IS_ERR(trans)) {
1824 if (PTR_ERR(trans) == -ENOENT)
1825 return 0;
1826 return PTR_ERR(trans);
1827 }
1828 ret = btrfs_commit_transaction(trans, root);
1822 } 1829 }
1823 1830
1824 return ret; 1831 return ret;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 02ce90972d81..9349bb37a2fe 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -90,6 +90,8 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
90 *max_len = handle_length; 90 *max_len = handle_length;
91 type = 255; 91 type = 255;
92 } 92 }
93 if (dentry)
94 dput(dentry);
93 return type; 95 return type;
94} 96}
95 97
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 3f152b92a94a..afc2bb691780 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -471,9 +471,19 @@ static int exact_lock(dev_t dev, void *data)
471 */ 471 */
472int cdev_add(struct cdev *p, dev_t dev, unsigned count) 472int cdev_add(struct cdev *p, dev_t dev, unsigned count)
473{ 473{
474 int error;
475
474 p->dev = dev; 476 p->dev = dev;
475 p->count = count; 477 p->count = count;
476 return kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p); 478
479 error = kobj_map(cdev_map, dev, count, NULL,
480 exact_match, exact_lock, p);
481 if (error)
482 return error;
483
484 kobject_get(p->kobj.parent);
485
486 return 0;
477} 487}
478 488
479static void cdev_unmap(dev_t dev, unsigned count) 489static void cdev_unmap(dev_t dev, unsigned count)
@@ -498,14 +508,20 @@ void cdev_del(struct cdev *p)
498static void cdev_default_release(struct kobject *kobj) 508static void cdev_default_release(struct kobject *kobj)
499{ 509{
500 struct cdev *p = container_of(kobj, struct cdev, kobj); 510 struct cdev *p = container_of(kobj, struct cdev, kobj);
511 struct kobject *parent = kobj->parent;
512
501 cdev_purge(p); 513 cdev_purge(p);
514 kobject_put(parent);
502} 515}
503 516
504static void cdev_dynamic_release(struct kobject *kobj) 517static void cdev_dynamic_release(struct kobject *kobj)
505{ 518{
506 struct cdev *p = container_of(kobj, struct cdev, kobj); 519 struct cdev *p = container_of(kobj, struct cdev, kobj);
520 struct kobject *parent = kobj->parent;
521
507 cdev_purge(p); 522 cdev_purge(p);
508 kfree(p); 523 kfree(p);
524 kobject_put(parent);
509} 525}
510 526
511static struct kobj_type ktype_cdev_default = { 527static struct kobj_type ktype_cdev_default = {
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index fc783e264420..0fb15bbbe43c 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr)
225} 225}
226 226
227static void 227static void
228cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
229{
230 memcpy(dst, src, sizeof(*dst));
231 dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS);
232}
233
234static void
228id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr, 235id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
229 struct cifs_sid_id **psidid, char *typestr) 236 struct cifs_sid_id **psidid, char *typestr)
230{ 237{
@@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
248 } 255 }
249 } 256 }
250 257
251 memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid)); 258 cifs_copy_sid(&(*psidid)->sid, sidptr);
252 (*psidid)->time = jiffies - (SID_MAP_RETRY + 1); 259 (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
253 (*psidid)->refcount = 0; 260 (*psidid)->refcount = 0;
254 261
@@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
354 * any fields of the node after a reference is put . 361 * any fields of the node after a reference is put .
355 */ 362 */
356 if (test_bit(SID_ID_MAPPED, &psidid->state)) { 363 if (test_bit(SID_ID_MAPPED, &psidid->state)) {
357 memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid)); 364 cifs_copy_sid(ssid, &psidid->sid);
358 psidid->time = jiffies; /* update ts for accessing */ 365 psidid->time = jiffies; /* update ts for accessing */
359 goto id_sid_out; 366 goto id_sid_out;
360 } 367 }
@@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
370 if (IS_ERR(sidkey)) { 377 if (IS_ERR(sidkey)) {
371 rc = -EINVAL; 378 rc = -EINVAL;
372 cFYI(1, "%s: Can't map and id to a SID", __func__); 379 cFYI(1, "%s: Can't map and id to a SID", __func__);
380 } else if (sidkey->datalen < sizeof(struct cifs_sid)) {
381 rc = -EIO;
382 cFYI(1, "%s: Downcall contained malformed key "
383 "(datalen=%hu)", __func__, sidkey->datalen);
373 } else { 384 } else {
374 lsid = (struct cifs_sid *)sidkey->payload.data; 385 lsid = (struct cifs_sid *)sidkey->payload.data;
375 memcpy(&psidid->sid, lsid, 386 cifs_copy_sid(&psidid->sid, lsid);
376 sidkey->datalen < sizeof(struct cifs_sid) ? 387 cifs_copy_sid(ssid, &psidid->sid);
377 sidkey->datalen : sizeof(struct cifs_sid));
378 memcpy(ssid, &psidid->sid,
379 sidkey->datalen < sizeof(struct cifs_sid) ?
380 sidkey->datalen : sizeof(struct cifs_sid));
381 set_bit(SID_ID_MAPPED, &psidid->state); 388 set_bit(SID_ID_MAPPED, &psidid->state);
382 key_put(sidkey); 389 key_put(sidkey);
383 kfree(psidid->sidstr); 390 kfree(psidid->sidstr);
@@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
396 return rc; 403 return rc;
397 } 404 }
398 if (test_bit(SID_ID_MAPPED, &psidid->state)) 405 if (test_bit(SID_ID_MAPPED, &psidid->state))
399 memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid)); 406 cifs_copy_sid(ssid, &psidid->sid);
400 else 407 else
401 rc = -EINVAL; 408 rc = -EINVAL;
402 } 409 }
@@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
675static void copy_sec_desc(const struct cifs_ntsd *pntsd, 682static void copy_sec_desc(const struct cifs_ntsd *pntsd,
676 struct cifs_ntsd *pnntsd, __u32 sidsoffset) 683 struct cifs_ntsd *pnntsd, __u32 sidsoffset)
677{ 684{
678 int i;
679
680 struct cifs_sid *owner_sid_ptr, *group_sid_ptr; 685 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
681 struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr; 686 struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
682 687
@@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
692 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + 697 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
693 le32_to_cpu(pntsd->osidoffset)); 698 le32_to_cpu(pntsd->osidoffset));
694 nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset); 699 nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
695 700 cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
696 nowner_sid_ptr->revision = owner_sid_ptr->revision;
697 nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth;
698 for (i = 0; i < 6; i++)
699 nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i];
700 for (i = 0; i < 5; i++)
701 nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i];
702 701
703 /* copy group sid */ 702 /* copy group sid */
704 group_sid_ptr = (struct cifs_sid *)((char *)pntsd + 703 group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
705 le32_to_cpu(pntsd->gsidoffset)); 704 le32_to_cpu(pntsd->gsidoffset));
706 ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset + 705 ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
707 sizeof(struct cifs_sid)); 706 sizeof(struct cifs_sid));
708 707 cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
709 ngroup_sid_ptr->revision = group_sid_ptr->revision;
710 ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth;
711 for (i = 0; i < 6; i++)
712 ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
713 for (i = 0; i < 5; i++)
714 ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
715 708
716 return; 709 return;
717} 710}
@@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
1120 kfree(nowner_sid_ptr); 1113 kfree(nowner_sid_ptr);
1121 return rc; 1114 return rc;
1122 } 1115 }
1123 memcpy(owner_sid_ptr, nowner_sid_ptr, 1116 cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
1124 sizeof(struct cifs_sid));
1125 kfree(nowner_sid_ptr); 1117 kfree(nowner_sid_ptr);
1126 *aclflag = CIFS_ACL_OWNER; 1118 *aclflag = CIFS_ACL_OWNER;
1127 } 1119 }
@@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
1139 kfree(ngroup_sid_ptr); 1131 kfree(ngroup_sid_ptr);
1140 return rc; 1132 return rc;
1141 } 1133 }
1142 memcpy(group_sid_ptr, ngroup_sid_ptr, 1134 cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
1143 sizeof(struct cifs_sid));
1144 kfree(ngroup_sid_ptr); 1135 kfree(ngroup_sid_ptr);
1145 *aclflag = CIFS_ACL_GROUP; 1136 *aclflag = CIFS_ACL_GROUP;
1146 } 1137 }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 7c0a81283645..d3671f2acb29 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -398,7 +398,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
398 * in network traffic in the other paths. 398 * in network traffic in the other paths.
399 */ 399 */
400 if (!(oflags & O_CREAT)) { 400 if (!(oflags & O_CREAT)) {
401 struct dentry *res = cifs_lookup(inode, direntry, 0); 401 struct dentry *res;
402
403 /*
404 * Check for hashed negative dentry. We have already revalidated
405 * the dentry and it is fine. No need to perform another lookup.
406 */
407 if (!d_unhashed(direntry))
408 return -ENOENT;
409
410 res = cifs_lookup(inode, direntry, 0);
402 if (IS_ERR(res)) 411 if (IS_ERR(res))
403 return PTR_ERR(res); 412 return PTR_ERR(res);
404 413
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index f5054025f9da..4c6285fff598 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -210,6 +210,8 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
210 210
211 err = get_user(palp, &up->palette); 211 err = get_user(palp, &up->palette);
212 err |= get_user(length, &up->length); 212 err |= get_user(length, &up->length);
213 if (err)
214 return -EFAULT;
213 215
214 up_native = compat_alloc_user_space(sizeof(struct video_spu_palette)); 216 up_native = compat_alloc_user_space(sizeof(struct video_spu_palette));
215 err = put_user(compat_ptr(palp), &up_native->palette); 217 err = put_user(compat_ptr(palp), &up_native->palette);
diff --git a/fs/coredump.c b/fs/coredump.c
index fd37facac8dc..ce47379bfa61 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -450,11 +450,12 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
450 450
451 cp->file = files[1]; 451 cp->file = files[1];
452 452
453 replace_fd(0, files[0], 0); 453 err = replace_fd(0, files[0], 0);
454 fput(files[0]);
454 /* and disallow core files too */ 455 /* and disallow core files too */
455 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; 456 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
456 457
457 return 0; 458 return err;
458} 459}
459 460
460void do_coredump(siginfo_t *siginfo, struct pt_regs *regs) 461void do_coredump(siginfo_t *siginfo, struct pt_regs *regs)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index da72250ddc1c..cd96649bfe62 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
346/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ 346/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
347static inline int ep_op_has_event(int op) 347static inline int ep_op_has_event(int op)
348{ 348{
349 return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD; 349 return op != EPOLL_CTL_DEL;
350} 350}
351 351
352/* Initialize the poll safe wake up structure */ 352/* Initialize the poll safe wake up structure */
@@ -676,34 +676,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
676 return 0; 676 return 0;
677} 677}
678 678
679/*
680 * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item
681 * had no event flags set, indicating that another thread may be currently
682 * handling that item's events (in the case that EPOLLONESHOT was being
683 * used). Otherwise a zero result indicates that the item has been disabled
684 * from receiving events. A disabled item may be re-enabled via
685 * EPOLL_CTL_MOD. Must be called with "mtx" held.
686 */
687static int ep_disable(struct eventpoll *ep, struct epitem *epi)
688{
689 int result = 0;
690 unsigned long flags;
691
692 spin_lock_irqsave(&ep->lock, flags);
693 if (epi->event.events & ~EP_PRIVATE_BITS) {
694 if (ep_is_linked(&epi->rdllink))
695 list_del_init(&epi->rdllink);
696 /* Ensure ep_poll_callback will not add epi back onto ready
697 list: */
698 epi->event.events &= EP_PRIVATE_BITS;
699 }
700 else
701 result = -EBUSY;
702 spin_unlock_irqrestore(&ep->lock, flags);
703
704 return result;
705}
706
707static void ep_free(struct eventpoll *ep) 679static void ep_free(struct eventpoll *ep)
708{ 680{
709 struct rb_node *rbp; 681 struct rb_node *rbp;
@@ -1048,6 +1020,8 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
1048 rb_insert_color(&epi->rbn, &ep->rbr); 1020 rb_insert_color(&epi->rbn, &ep->rbr);
1049} 1021}
1050 1022
1023
1024
1051#define PATH_ARR_SIZE 5 1025#define PATH_ARR_SIZE 5
1052/* 1026/*
1053 * These are the number paths of length 1 to 5, that we are allowing to emanate 1027 * These are the number paths of length 1 to 5, that we are allowing to emanate
@@ -1813,12 +1787,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1813 } else 1787 } else
1814 error = -ENOENT; 1788 error = -ENOENT;
1815 break; 1789 break;
1816 case EPOLL_CTL_DISABLE:
1817 if (epi)
1818 error = ep_disable(ep, epi);
1819 else
1820 error = -ENOENT;
1821 break;
1822 } 1790 }
1823 mutex_unlock(&ep->mtx); 1791 mutex_unlock(&ep->mtx);
1824 1792
diff --git a/fs/exec.c b/fs/exec.c
index 8b9011b67041..0039055b1fc6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1083,7 +1083,8 @@ int flush_old_exec(struct linux_binprm * bprm)
1083 bprm->mm = NULL; /* We're using it now */ 1083 bprm->mm = NULL; /* We're using it now */
1084 1084
1085 set_fs(USER_DS); 1085 set_fs(USER_DS);
1086 current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD); 1086 current->flags &=
1087 ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE);
1087 flush_thread(); 1088 flush_thread();
1088 current->personality &= ~bprm->per_clear; 1089 current->personality &= ~bprm->per_clear;
1089 1090
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 6c205d0c565b..fa04d023177e 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -469,7 +469,7 @@ static int parse_options(char *options, struct super_block *sb)
469 uid = make_kuid(current_user_ns(), option); 469 uid = make_kuid(current_user_ns(), option);
470 if (!uid_valid(uid)) { 470 if (!uid_valid(uid)) {
471 ext2_msg(sb, KERN_ERR, "Invalid uid value %d", option); 471 ext2_msg(sb, KERN_ERR, "Invalid uid value %d", option);
472 return -1; 472 return 0;
473 473
474 } 474 }
475 sbi->s_resuid = uid; 475 sbi->s_resuid = uid;
@@ -480,7 +480,7 @@ static int parse_options(char *options, struct super_block *sb)
480 gid = make_kgid(current_user_ns(), option); 480 gid = make_kgid(current_user_ns(), option);
481 if (!gid_valid(gid)) { 481 if (!gid_valid(gid)) {
482 ext2_msg(sb, KERN_ERR, "Invalid gid value %d", option); 482 ext2_msg(sb, KERN_ERR, "Invalid gid value %d", option);
483 return -1; 483 return 0;
484 } 484 }
485 sbi->s_resgid = gid; 485 sbi->s_resgid = gid;
486 break; 486 break;
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 7320a66e958f..22548f56197b 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -2101,8 +2101,9 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2101 end = start + (range->len >> sb->s_blocksize_bits) - 1; 2101 end = start + (range->len >> sb->s_blocksize_bits) - 1;
2102 minlen = range->minlen >> sb->s_blocksize_bits; 2102 minlen = range->minlen >> sb->s_blocksize_bits;
2103 2103
2104 if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) || 2104 if (minlen > EXT3_BLOCKS_PER_GROUP(sb) ||
2105 unlikely(start >= max_blks)) 2105 start >= max_blks ||
2106 range->len < sb->s_blocksize)
2106 return -EINVAL; 2107 return -EINVAL;
2107 if (end >= max_blks) 2108 if (end >= max_blks)
2108 end = max_blks - 1; 2109 end = max_blks - 1;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 8f4fddac01a6..890b8947c546 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -46,8 +46,7 @@ static struct buffer_head *ext3_append(handle_t *handle,
46 46
47 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 47 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
48 48
49 bh = ext3_bread(handle, inode, *block, 1, err); 49 if ((bh = ext3_dir_bread(handle, inode, *block, 1, err))) {
50 if (bh) {
51 inode->i_size += inode->i_sb->s_blocksize; 50 inode->i_size += inode->i_sb->s_blocksize;
52 EXT3_I(inode)->i_disksize = inode->i_size; 51 EXT3_I(inode)->i_disksize = inode->i_size;
53 *err = ext3_journal_get_write_access(handle, bh); 52 *err = ext3_journal_get_write_access(handle, bh);
@@ -339,8 +338,10 @@ dx_probe(struct qstr *entry, struct inode *dir,
339 u32 hash; 338 u32 hash;
340 339
341 frame->bh = NULL; 340 frame->bh = NULL;
342 if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) 341 if (!(bh = ext3_dir_bread(NULL, dir, 0, 0, err))) {
342 *err = ERR_BAD_DX_DIR;
343 goto fail; 343 goto fail;
344 }
344 root = (struct dx_root *) bh->b_data; 345 root = (struct dx_root *) bh->b_data;
345 if (root->info.hash_version != DX_HASH_TEA && 346 if (root->info.hash_version != DX_HASH_TEA &&
346 root->info.hash_version != DX_HASH_HALF_MD4 && 347 root->info.hash_version != DX_HASH_HALF_MD4 &&
@@ -436,8 +437,10 @@ dx_probe(struct qstr *entry, struct inode *dir,
436 frame->entries = entries; 437 frame->entries = entries;
437 frame->at = at; 438 frame->at = at;
438 if (!indirect--) return frame; 439 if (!indirect--) return frame;
439 if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) 440 if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(at), 0, err))) {
441 *err = ERR_BAD_DX_DIR;
440 goto fail2; 442 goto fail2;
443 }
441 at = entries = ((struct dx_node *) bh->b_data)->entries; 444 at = entries = ((struct dx_node *) bh->b_data)->entries;
442 if (dx_get_limit(entries) != dx_node_limit (dir)) { 445 if (dx_get_limit(entries) != dx_node_limit (dir)) {
443 ext3_warning(dir->i_sb, __func__, 446 ext3_warning(dir->i_sb, __func__,
@@ -535,8 +538,8 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
535 * block so no check is necessary 538 * block so no check is necessary
536 */ 539 */
537 while (num_frames--) { 540 while (num_frames--) {
538 if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), 541 if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(p->at),
539 0, &err))) 542 0, &err)))
540 return err; /* Failure */ 543 return err; /* Failure */
541 p++; 544 p++;
542 brelse (p->bh); 545 brelse (p->bh);
@@ -559,10 +562,11 @@ static int htree_dirblock_to_tree(struct file *dir_file,
559{ 562{
560 struct buffer_head *bh; 563 struct buffer_head *bh;
561 struct ext3_dir_entry_2 *de, *top; 564 struct ext3_dir_entry_2 *de, *top;
562 int err, count = 0; 565 int err = 0, count = 0;
563 566
564 dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); 567 dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
565 if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) 568
569 if (!(bh = ext3_dir_bread(NULL, dir, block, 0, &err)))
566 return err; 570 return err;
567 571
568 de = (struct ext3_dir_entry_2 *) bh->b_data; 572 de = (struct ext3_dir_entry_2 *) bh->b_data;
@@ -976,7 +980,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
976 return NULL; 980 return NULL;
977 do { 981 do {
978 block = dx_get_block(frame->at); 982 block = dx_get_block(frame->at);
979 if (!(bh = ext3_bread (NULL,dir, block, 0, err))) 983 if (!(bh = ext3_dir_bread (NULL, dir, block, 0, err)))
980 goto errout; 984 goto errout;
981 985
982 retval = search_dirblock(bh, dir, entry, 986 retval = search_dirblock(bh, dir, entry,
@@ -1458,9 +1462,9 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
1458 } 1462 }
1459 blocks = dir->i_size >> sb->s_blocksize_bits; 1463 blocks = dir->i_size >> sb->s_blocksize_bits;
1460 for (block = 0; block < blocks; block++) { 1464 for (block = 0; block < blocks; block++) {
1461 bh = ext3_bread(handle, dir, block, 0, &retval); 1465 if (!(bh = ext3_dir_bread(handle, dir, block, 0, &retval)))
1462 if(!bh)
1463 return retval; 1466 return retval;
1467
1464 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1468 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1465 if (retval != -ENOSPC) 1469 if (retval != -ENOSPC)
1466 return retval; 1470 return retval;
@@ -1500,7 +1504,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1500 entries = frame->entries; 1504 entries = frame->entries;
1501 at = frame->at; 1505 at = frame->at;
1502 1506
1503 if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) 1507 if (!(bh = ext3_dir_bread(handle, dir, dx_get_block(frame->at), 0, &err)))
1504 goto cleanup; 1508 goto cleanup;
1505 1509
1506 BUFFER_TRACE(bh, "get_write_access"); 1510 BUFFER_TRACE(bh, "get_write_access");
@@ -1790,8 +1794,7 @@ retry:
1790 inode->i_op = &ext3_dir_inode_operations; 1794 inode->i_op = &ext3_dir_inode_operations;
1791 inode->i_fop = &ext3_dir_operations; 1795 inode->i_fop = &ext3_dir_operations;
1792 inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1796 inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
1793 dir_block = ext3_bread (handle, inode, 0, 1, &err); 1797 if (!(dir_block = ext3_dir_bread(handle, inode, 0, 1, &err)))
1794 if (!dir_block)
1795 goto out_clear_inode; 1798 goto out_clear_inode;
1796 1799
1797 BUFFER_TRACE(dir_block, "get_write_access"); 1800 BUFFER_TRACE(dir_block, "get_write_access");
@@ -1859,7 +1862,7 @@ static int empty_dir (struct inode * inode)
1859 1862
1860 sb = inode->i_sb; 1863 sb = inode->i_sb;
1861 if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) || 1864 if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
1862 !(bh = ext3_bread (NULL, inode, 0, 0, &err))) { 1865 !(bh = ext3_dir_bread(NULL, inode, 0, 0, &err))) {
1863 if (err) 1866 if (err)
1864 ext3_error(inode->i_sb, __func__, 1867 ext3_error(inode->i_sb, __func__,
1865 "error %d reading directory #%lu offset 0", 1868 "error %d reading directory #%lu offset 0",
@@ -1890,9 +1893,8 @@ static int empty_dir (struct inode * inode)
1890 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { 1893 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
1891 err = 0; 1894 err = 0;
1892 brelse (bh); 1895 brelse (bh);
1893 bh = ext3_bread (NULL, inode, 1896 if (!(bh = ext3_dir_bread (NULL, inode,
1894 offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err); 1897 offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err))) {
1895 if (!bh) {
1896 if (err) 1898 if (err)
1897 ext3_error(sb, __func__, 1899 ext3_error(sb, __func__,
1898 "error %d reading directory" 1900 "error %d reading directory"
@@ -2388,7 +2390,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2388 goto end_rename; 2390 goto end_rename;
2389 } 2391 }
2390 retval = -EIO; 2392 retval = -EIO;
2391 dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval); 2393 dir_bh = ext3_dir_bread(handle, old_inode, 0, 0, &retval);
2392 if (!dir_bh) 2394 if (!dir_bh)
2393 goto end_rename; 2395 goto end_rename;
2394 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) 2396 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
diff --git a/fs/ext3/namei.h b/fs/ext3/namei.h
index f2ce2b0065c9..46304d8c9f0a 100644
--- a/fs/ext3/namei.h
+++ b/fs/ext3/namei.h
@@ -6,3 +6,22 @@
6*/ 6*/
7 7
8extern struct dentry *ext3_get_parent(struct dentry *child); 8extern struct dentry *ext3_get_parent(struct dentry *child);
9
10static inline struct buffer_head *ext3_dir_bread(handle_t *handle,
11 struct inode *inode,
12 int block, int create,
13 int *err)
14{
15 struct buffer_head *bh;
16
17 bh = ext3_bread(handle, inode, block, create, err);
18
19 if (!bh && !(*err)) {
20 *err = -EIO;
21 ext3_error(inode->i_sb, __func__,
22 "Directory hole detected on inode %lu\n",
23 inode->i_ino);
24 return NULL;
25 }
26 return bh;
27}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 29e79713c7eb..5366393528df 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1001,7 +1001,7 @@ static int parse_options (char *options, struct super_block *sb,
1001 uid = make_kuid(current_user_ns(), option); 1001 uid = make_kuid(current_user_ns(), option);
1002 if (!uid_valid(uid)) { 1002 if (!uid_valid(uid)) {
1003 ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option); 1003 ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option);
1004 return -1; 1004 return 0;
1005 1005
1006 } 1006 }
1007 sbi->s_resuid = uid; 1007 sbi->s_resuid = uid;
@@ -1012,7 +1012,7 @@ static int parse_options (char *options, struct super_block *sb,
1012 gid = make_kgid(current_user_ns(), option); 1012 gid = make_kgid(current_user_ns(), option);
1013 if (!gid_valid(gid)) { 1013 if (!gid_valid(gid)) {
1014 ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option); 1014 ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option);
1015 return -1; 1015 return 0;
1016 } 1016 }
1017 sbi->s_resgid = gid; 1017 sbi->s_resgid = gid;
1018 break; 1018 break;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1b5089067d01..cf1821784a16 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -174,8 +174,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
174 ext4_free_inodes_set(sb, gdp, 0); 174 ext4_free_inodes_set(sb, gdp, 0);
175 ext4_itable_unused_set(sb, gdp, 0); 175 ext4_itable_unused_set(sb, gdp, 0);
176 memset(bh->b_data, 0xff, sb->s_blocksize); 176 memset(bh->b_data, 0xff, sb->s_blocksize);
177 ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, 177 ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
178 EXT4_BLOCKS_PER_GROUP(sb) / 8);
179 return; 178 return;
180 } 179 }
181 memset(bh->b_data, 0, sb->s_blocksize); 180 memset(bh->b_data, 0, sb->s_blocksize);
@@ -212,8 +211,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
212 */ 211 */
213 ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), 212 ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
214 sb->s_blocksize * 8, bh->b_data); 213 sb->s_blocksize * 8, bh->b_data);
215 ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, 214 ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
216 EXT4_BLOCKS_PER_GROUP(sb) / 8);
217 ext4_group_desc_csum_set(sb, block_group, gdp); 215 ext4_group_desc_csum_set(sb, block_group, gdp);
218} 216}
219 217
@@ -350,7 +348,7 @@ void ext4_validate_block_bitmap(struct super_block *sb,
350 return; 348 return;
351 } 349 }
352 if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, 350 if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
353 desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) { 351 desc, bh))) {
354 ext4_unlock_group(sb, block_group); 352 ext4_unlock_group(sb, block_group);
355 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); 353 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
356 return; 354 return;
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 5c2d1813ebe9..3285aa5a706a 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -58,11 +58,12 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
58 58
59int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, 59int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
60 struct ext4_group_desc *gdp, 60 struct ext4_group_desc *gdp,
61 struct buffer_head *bh, int sz) 61 struct buffer_head *bh)
62{ 62{
63 __u32 hi; 63 __u32 hi;
64 __u32 provided, calculated; 64 __u32 provided, calculated;
65 struct ext4_sb_info *sbi = EXT4_SB(sb); 65 struct ext4_sb_info *sbi = EXT4_SB(sb);
66 int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
66 67
67 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 68 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
68 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 69 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
@@ -84,8 +85,9 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
84 85
85void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, 86void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
86 struct ext4_group_desc *gdp, 87 struct ext4_group_desc *gdp,
87 struct buffer_head *bh, int sz) 88 struct buffer_head *bh)
88{ 89{
90 int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
89 __u32 csum; 91 __u32 csum;
90 struct ext4_sb_info *sbi = EXT4_SB(sb); 92 struct ext4_sb_info *sbi = EXT4_SB(sb);
91 93
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3ab2539b7b2e..3c20de1d59d0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1882,10 +1882,10 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
1882 struct buffer_head *bh, int sz); 1882 struct buffer_head *bh, int sz);
1883void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, 1883void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
1884 struct ext4_group_desc *gdp, 1884 struct ext4_group_desc *gdp,
1885 struct buffer_head *bh, int sz); 1885 struct buffer_head *bh);
1886int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, 1886int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
1887 struct ext4_group_desc *gdp, 1887 struct ext4_group_desc *gdp,
1888 struct buffer_head *bh, int sz); 1888 struct buffer_head *bh);
1889 1889
1890/* balloc.c */ 1890/* balloc.c */
1891extern void ext4_validate_block_bitmap(struct super_block *sb, 1891extern void ext4_validate_block_bitmap(struct super_block *sb,
@@ -2063,8 +2063,7 @@ extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
2063extern int ext4_calculate_overhead(struct super_block *sb); 2063extern int ext4_calculate_overhead(struct super_block *sb);
2064extern int ext4_superblock_csum_verify(struct super_block *sb, 2064extern int ext4_superblock_csum_verify(struct super_block *sb,
2065 struct ext4_super_block *es); 2065 struct ext4_super_block *es);
2066extern void ext4_superblock_csum_set(struct super_block *sb, 2066extern void ext4_superblock_csum_set(struct super_block *sb);
2067 struct ext4_super_block *es);
2068extern void *ext4_kvmalloc(size_t size, gfp_t flags); 2067extern void *ext4_kvmalloc(size_t size, gfp_t flags);
2069extern void *ext4_kvzalloc(size_t size, gfp_t flags); 2068extern void *ext4_kvzalloc(size_t size, gfp_t flags);
2070extern void ext4_kvfree(void *ptr); 2069extern void ext4_kvfree(void *ptr);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index bfa65b49d424..b4323ba846b5 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -143,17 +143,13 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
143 struct buffer_head *bh = EXT4_SB(sb)->s_sbh; 143 struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
144 int err = 0; 144 int err = 0;
145 145
146 ext4_superblock_csum_set(sb);
146 if (ext4_handle_valid(handle)) { 147 if (ext4_handle_valid(handle)) {
147 ext4_superblock_csum_set(sb,
148 (struct ext4_super_block *)bh->b_data);
149 err = jbd2_journal_dirty_metadata(handle, bh); 148 err = jbd2_journal_dirty_metadata(handle, bh);
150 if (err) 149 if (err)
151 ext4_journal_abort_handle(where, line, __func__, 150 ext4_journal_abort_handle(where, line, __func__,
152 bh, handle, err); 151 bh, handle, err);
153 } else { 152 } else
154 ext4_superblock_csum_set(sb,
155 (struct ext4_super_block *)bh->b_data);
156 mark_buffer_dirty(bh); 153 mark_buffer_dirty(bh);
157 }
158 return err; 154 return err;
159} 155}
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1c94cca35ed1..7011ac967208 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -52,6 +52,9 @@
52#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ 52#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
53#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ 53#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
54 54
55#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
56#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
57
55static __le32 ext4_extent_block_csum(struct inode *inode, 58static __le32 ext4_extent_block_csum(struct inode *inode,
56 struct ext4_extent_header *eh) 59 struct ext4_extent_header *eh)
57{ 60{
@@ -2914,6 +2917,9 @@ static int ext4_split_extent_at(handle_t *handle,
2914 unsigned int ee_len, depth; 2917 unsigned int ee_len, depth;
2915 int err = 0; 2918 int err = 0;
2916 2919
2920 BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
2921 (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
2922
2917 ext_debug("ext4_split_extents_at: inode %lu, logical" 2923 ext_debug("ext4_split_extents_at: inode %lu, logical"
2918 "block %llu\n", inode->i_ino, (unsigned long long)split); 2924 "block %llu\n", inode->i_ino, (unsigned long long)split);
2919 2925
@@ -2972,7 +2978,14 @@ static int ext4_split_extent_at(handle_t *handle,
2972 2978
2973 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 2979 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
2974 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 2980 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2975 err = ext4_ext_zeroout(inode, &orig_ex); 2981 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
2982 if (split_flag & EXT4_EXT_DATA_VALID1)
2983 err = ext4_ext_zeroout(inode, ex2);
2984 else
2985 err = ext4_ext_zeroout(inode, ex);
2986 } else
2987 err = ext4_ext_zeroout(inode, &orig_ex);
2988
2976 if (err) 2989 if (err)
2977 goto fix_extent_len; 2990 goto fix_extent_len;
2978 /* update the extent length and mark as initialized */ 2991 /* update the extent length and mark as initialized */
@@ -3025,12 +3038,13 @@ static int ext4_split_extent(handle_t *handle,
3025 uninitialized = ext4_ext_is_uninitialized(ex); 3038 uninitialized = ext4_ext_is_uninitialized(ex);
3026 3039
3027 if (map->m_lblk + map->m_len < ee_block + ee_len) { 3040 if (map->m_lblk + map->m_len < ee_block + ee_len) {
3028 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? 3041 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
3029 EXT4_EXT_MAY_ZEROOUT : 0;
3030 flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; 3042 flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
3031 if (uninitialized) 3043 if (uninitialized)
3032 split_flag1 |= EXT4_EXT_MARK_UNINIT1 | 3044 split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
3033 EXT4_EXT_MARK_UNINIT2; 3045 EXT4_EXT_MARK_UNINIT2;
3046 if (split_flag & EXT4_EXT_DATA_VALID2)
3047 split_flag1 |= EXT4_EXT_DATA_VALID1;
3034 err = ext4_split_extent_at(handle, inode, path, 3048 err = ext4_split_extent_at(handle, inode, path,
3035 map->m_lblk + map->m_len, split_flag1, flags1); 3049 map->m_lblk + map->m_len, split_flag1, flags1);
3036 if (err) 3050 if (err)
@@ -3043,8 +3057,8 @@ static int ext4_split_extent(handle_t *handle,
3043 return PTR_ERR(path); 3057 return PTR_ERR(path);
3044 3058
3045 if (map->m_lblk >= ee_block) { 3059 if (map->m_lblk >= ee_block) {
3046 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? 3060 split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
3047 EXT4_EXT_MAY_ZEROOUT : 0; 3061 EXT4_EXT_DATA_VALID2);
3048 if (uninitialized) 3062 if (uninitialized)
3049 split_flag1 |= EXT4_EXT_MARK_UNINIT1; 3063 split_flag1 |= EXT4_EXT_MARK_UNINIT1;
3050 if (split_flag & EXT4_EXT_MARK_UNINIT2) 3064 if (split_flag & EXT4_EXT_MARK_UNINIT2)
@@ -3323,26 +3337,47 @@ static int ext4_split_unwritten_extents(handle_t *handle,
3323 3337
3324 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3338 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3325 split_flag |= EXT4_EXT_MARK_UNINIT2; 3339 split_flag |= EXT4_EXT_MARK_UNINIT2;
3326 3340 if (flags & EXT4_GET_BLOCKS_CONVERT)
3341 split_flag |= EXT4_EXT_DATA_VALID2;
3327 flags |= EXT4_GET_BLOCKS_PRE_IO; 3342 flags |= EXT4_GET_BLOCKS_PRE_IO;
3328 return ext4_split_extent(handle, inode, path, map, split_flag, flags); 3343 return ext4_split_extent(handle, inode, path, map, split_flag, flags);
3329} 3344}
3330 3345
3331static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3346static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3332 struct inode *inode, 3347 struct inode *inode,
3333 struct ext4_ext_path *path) 3348 struct ext4_map_blocks *map,
3349 struct ext4_ext_path *path)
3334{ 3350{
3335 struct ext4_extent *ex; 3351 struct ext4_extent *ex;
3352 ext4_lblk_t ee_block;
3353 unsigned int ee_len;
3336 int depth; 3354 int depth;
3337 int err = 0; 3355 int err = 0;
3338 3356
3339 depth = ext_depth(inode); 3357 depth = ext_depth(inode);
3340 ex = path[depth].p_ext; 3358 ex = path[depth].p_ext;
3359 ee_block = le32_to_cpu(ex->ee_block);
3360 ee_len = ext4_ext_get_actual_len(ex);
3341 3361
3342 ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" 3362 ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
3343 "block %llu, max_blocks %u\n", inode->i_ino, 3363 "block %llu, max_blocks %u\n", inode->i_ino,
3344 (unsigned long long)le32_to_cpu(ex->ee_block), 3364 (unsigned long long)ee_block, ee_len);
3345 ext4_ext_get_actual_len(ex)); 3365
3366 /* If extent is larger than requested then split is required */
3367 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3368 err = ext4_split_unwritten_extents(handle, inode, map, path,
3369 EXT4_GET_BLOCKS_CONVERT);
3370 if (err < 0)
3371 goto out;
3372 ext4_ext_drop_refs(path);
3373 path = ext4_ext_find_extent(inode, map->m_lblk, path);
3374 if (IS_ERR(path)) {
3375 err = PTR_ERR(path);
3376 goto out;
3377 }
3378 depth = ext_depth(inode);
3379 ex = path[depth].p_ext;
3380 }
3346 3381
3347 err = ext4_ext_get_access(handle, inode, path + depth); 3382 err = ext4_ext_get_access(handle, inode, path + depth);
3348 if (err) 3383 if (err)
@@ -3652,7 +3687,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3652 } 3687 }
3653 /* IO end_io complete, convert the filled extent to written */ 3688 /* IO end_io complete, convert the filled extent to written */
3654 if ((flags & EXT4_GET_BLOCKS_CONVERT)) { 3689 if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
3655 ret = ext4_convert_unwritten_extents_endio(handle, inode, 3690 ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
3656 path); 3691 path);
3657 if (ret >= 0) { 3692 if (ret >= 0) {
3658 ext4_update_inode_fsync_trans(handle, inode, 1); 3693 ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -4428,6 +4463,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4428 */ 4463 */
4429 if (len <= EXT_UNINIT_MAX_LEN << blkbits) 4464 if (len <= EXT_UNINIT_MAX_LEN << blkbits)
4430 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; 4465 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4466
4467 /* Prevent race condition between unwritten */
4468 ext4_flush_unwritten_io(inode);
4431retry: 4469retry:
4432 while (ret >= 0 && ret < max_blocks) { 4470 while (ret >= 0 && ret < max_blocks) {
4433 map.m_lblk = map.m_lblk + ret; 4471 map.m_lblk = map.m_lblk + ret;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fa36372f3fdf..3a100e7a62a8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -725,6 +725,10 @@ repeat_in_this_group:
725 "inode=%lu", ino + 1); 725 "inode=%lu", ino + 1);
726 continue; 726 continue;
727 } 727 }
728 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
729 err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
730 if (err)
731 goto fail;
728 ext4_lock_group(sb, group); 732 ext4_lock_group(sb, group);
729 ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); 733 ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
730 ext4_unlock_group(sb, group); 734 ext4_unlock_group(sb, group);
@@ -738,6 +742,11 @@ repeat_in_this_group:
738 goto out; 742 goto out;
739 743
740got: 744got:
745 BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
746 err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
747 if (err)
748 goto fail;
749
741 /* We may have to initialize the block bitmap if it isn't already */ 750 /* We may have to initialize the block bitmap if it isn't already */
742 if (ext4_has_group_desc_csum(sb) && 751 if (ext4_has_group_desc_csum(sb) &&
743 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 752 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -762,9 +771,7 @@ got:
762 ext4_free_group_clusters_set(sb, gdp, 771 ext4_free_group_clusters_set(sb, gdp,
763 ext4_free_clusters_after_init(sb, group, gdp)); 772 ext4_free_clusters_after_init(sb, group, gdp));
764 ext4_block_bitmap_csum_set(sb, group, gdp, 773 ext4_block_bitmap_csum_set(sb, group, gdp,
765 block_bitmap_bh, 774 block_bitmap_bh);
766 EXT4_BLOCKS_PER_GROUP(sb) /
767 8);
768 ext4_group_desc_csum_set(sb, group, gdp); 775 ext4_group_desc_csum_set(sb, group, gdp);
769 } 776 }
770 ext4_unlock_group(sb, group); 777 ext4_unlock_group(sb, group);
@@ -773,11 +780,6 @@ got:
773 goto fail; 780 goto fail;
774 } 781 }
775 782
776 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
777 err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
778 if (err)
779 goto fail;
780
781 BUFFER_TRACE(group_desc_bh, "get_write_access"); 783 BUFFER_TRACE(group_desc_bh, "get_write_access");
782 err = ext4_journal_get_write_access(handle, group_desc_bh); 784 err = ext4_journal_get_write_access(handle, group_desc_bh);
783 if (err) 785 if (err)
@@ -825,11 +827,6 @@ got:
825 } 827 }
826 ext4_unlock_group(sb, group); 828 ext4_unlock_group(sb, group);
827 829
828 BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
829 err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
830 if (err)
831 goto fail;
832
833 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); 830 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
834 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); 831 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
835 if (err) 832 if (err)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f8b27bf80aca..526e55358606 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2805,8 +2805,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2805 } 2805 }
2806 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len; 2806 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
2807 ext4_free_group_clusters_set(sb, gdp, len); 2807 ext4_free_group_clusters_set(sb, gdp, len);
2808 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh, 2808 ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
2809 EXT4_BLOCKS_PER_GROUP(sb) / 8);
2810 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp); 2809 ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
2811 2810
2812 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 2811 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
@@ -4666,8 +4665,7 @@ do_more:
4666 4665
4667 ret = ext4_free_group_clusters(sb, gdp) + count_clusters; 4666 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4668 ext4_free_group_clusters_set(sb, gdp, ret); 4667 ext4_free_group_clusters_set(sb, gdp, ret);
4669 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, 4668 ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
4670 EXT4_BLOCKS_PER_GROUP(sb) / 8);
4671 ext4_group_desc_csum_set(sb, block_group, gdp); 4669 ext4_group_desc_csum_set(sb, block_group, gdp);
4672 ext4_unlock_group(sb, block_group); 4670 ext4_unlock_group(sb, block_group);
4673 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); 4671 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
@@ -4811,8 +4809,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4811 mb_free_blocks(NULL, &e4b, bit, count); 4809 mb_free_blocks(NULL, &e4b, bit, count);
4812 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); 4810 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
4813 ext4_free_group_clusters_set(sb, desc, blk_free_count); 4811 ext4_free_group_clusters_set(sb, desc, blk_free_count);
4814 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh, 4812 ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
4815 EXT4_BLOCKS_PER_GROUP(sb) / 8);
4816 ext4_group_desc_csum_set(sb, block_group, desc); 4813 ext4_group_desc_csum_set(sb, block_group, desc);
4817 ext4_unlock_group(sb, block_group); 4814 ext4_unlock_group(sb, block_group);
4818 percpu_counter_add(&sbi->s_freeclusters_counter, 4815 percpu_counter_add(&sbi->s_freeclusters_counter,
@@ -4993,8 +4990,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4993 minlen = EXT4_NUM_B2C(EXT4_SB(sb), 4990 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
4994 range->minlen >> sb->s_blocksize_bits); 4991 range->minlen >> sb->s_blocksize_bits);
4995 4992
4996 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || 4993 if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
4997 unlikely(start >= max_blks)) 4994 start >= max_blks ||
4995 range->len < sb->s_blocksize)
4998 return -EINVAL; 4996 return -EINVAL;
4999 if (end >= max_blks) 4997 if (end >= max_blks)
5000 end = max_blks - 1; 4998 end = max_blks - 1;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 7a75e1086961..47bf06a2765d 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
1212 bh = ext4_get_bitmap(sb, group_data->block_bitmap); 1212 bh = ext4_get_bitmap(sb, group_data->block_bitmap);
1213 if (!bh) 1213 if (!bh)
1214 return -EIO; 1214 return -EIO;
1215 ext4_block_bitmap_csum_set(sb, group, gdp, bh, 1215 ext4_block_bitmap_csum_set(sb, group, gdp, bh);
1216 EXT4_BLOCKS_PER_GROUP(sb) / 8);
1217 brelse(bh); 1216 brelse(bh);
1218 1217
1219 return 0; 1218 return 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7265a0367476..80928f716850 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -143,9 +143,10 @@ int ext4_superblock_csum_verify(struct super_block *sb,
143 return es->s_checksum == ext4_superblock_csum(sb, es); 143 return es->s_checksum == ext4_superblock_csum(sb, es);
144} 144}
145 145
146void ext4_superblock_csum_set(struct super_block *sb, 146void ext4_superblock_csum_set(struct super_block *sb)
147 struct ext4_super_block *es)
148{ 147{
148 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
149
149 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 150 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
150 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 151 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
151 return; 152 return;
@@ -1963,7 +1964,7 @@ static int ext4_fill_flex_info(struct super_block *sb)
1963 sbi->s_log_groups_per_flex = 0; 1964 sbi->s_log_groups_per_flex = 0;
1964 return 1; 1965 return 1;
1965 } 1966 }
1966 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1967 groups_per_flex = 1U << sbi->s_log_groups_per_flex;
1967 1968
1968 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); 1969 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
1969 if (err) 1970 if (err)
@@ -4381,7 +4382,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
4381 cpu_to_le32(percpu_counter_sum_positive( 4382 cpu_to_le32(percpu_counter_sum_positive(
4382 &EXT4_SB(sb)->s_freeinodes_counter)); 4383 &EXT4_SB(sb)->s_freeinodes_counter));
4383 BUFFER_TRACE(sbh, "marking dirty"); 4384 BUFFER_TRACE(sbh, "marking dirty");
4384 ext4_superblock_csum_set(sb, es); 4385 ext4_superblock_csum_set(sb);
4385 mark_buffer_dirty(sbh); 4386 mark_buffer_dirty(sbh);
4386 if (sync) { 4387 if (sync) {
4387 error = sync_dirty_buffer(sbh); 4388 error = sync_dirty_buffer(sbh);
diff --git a/fs/file.c b/fs/file.c
index d3b5fa80b71b..7cb71b992603 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -685,7 +685,6 @@ void do_close_on_exec(struct files_struct *files)
685 struct fdtable *fdt; 685 struct fdtable *fdt;
686 686
687 /* exec unshares first */ 687 /* exec unshares first */
688 BUG_ON(atomic_read(&files->count) != 1);
689 spin_lock(&files->file_lock); 688 spin_lock(&files->file_lock);
690 for (i = 0; ; i++) { 689 for (i = 0; ; i++) {
691 unsigned long set; 690 unsigned long set;
@@ -900,7 +899,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
900 return __close_fd(files, fd); 899 return __close_fd(files, fd);
901 900
902 if (fd >= rlimit(RLIMIT_NOFILE)) 901 if (fd >= rlimit(RLIMIT_NOFILE))
903 return -EMFILE; 902 return -EBADF;
904 903
905 spin_lock(&files->file_lock); 904 spin_lock(&files->file_lock);
906 err = expand_files(files, fd); 905 err = expand_files(files, fd);
@@ -926,7 +925,7 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
926 return -EINVAL; 925 return -EINVAL;
927 926
928 if (newfd >= rlimit(RLIMIT_NOFILE)) 927 if (newfd >= rlimit(RLIMIT_NOFILE))
929 return -EMFILE; 928 return -EBADF;
930 929
931 spin_lock(&files->file_lock); 930 spin_lock(&files->file_lock);
932 err = expand_files(files, newfd); 931 err = expand_files(files, newfd);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 51ea267d444c..3e3422f7f0a4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -228,6 +228,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
228static void inode_sync_complete(struct inode *inode) 228static void inode_sync_complete(struct inode *inode)
229{ 229{
230 inode->i_state &= ~I_SYNC; 230 inode->i_state &= ~I_SYNC;
231 /* If inode is clean an unused, put it into LRU now... */
232 inode_add_lru(inode);
231 /* Waiters must see I_SYNC cleared before being woken up */ 233 /* Waiters must see I_SYNC cleared before being woken up */
232 smp_mb(); 234 smp_mb();
233 wake_up_bit(&inode->i_state, __I_SYNC); 235 wake_up_bit(&inode->i_state, __I_SYNC);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 0def0504afc1..e056b4ce4877 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -516,15 +516,13 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
516 struct gfs2_holder i_gh; 516 struct gfs2_holder i_gh;
517 int error; 517 int error;
518 518
519 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 519 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
520 error = gfs2_glock_nq(&i_gh); 520 &i_gh);
521 if (error == 0) {
522 file_accessed(file);
523 gfs2_glock_dq(&i_gh);
524 }
525 gfs2_holder_uninit(&i_gh);
526 if (error) 521 if (error)
527 return error; 522 return error;
523 /* grab lock to update inode */
524 gfs2_glock_dq_uninit(&i_gh);
525 file_accessed(file);
528 } 526 }
529 vma->vm_ops = &gfs2_vm_ops; 527 vma->vm_ops = &gfs2_vm_ops;
530 528
@@ -677,10 +675,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
677 size_t writesize = iov_length(iov, nr_segs); 675 size_t writesize = iov_length(iov, nr_segs);
678 struct dentry *dentry = file->f_dentry; 676 struct dentry *dentry = file->f_dentry;
679 struct gfs2_inode *ip = GFS2_I(dentry->d_inode); 677 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
680 struct gfs2_sbd *sdp;
681 int ret; 678 int ret;
682 679
683 sdp = GFS2_SB(file->f_mapping->host);
684 ret = gfs2_rs_alloc(ip); 680 ret = gfs2_rs_alloc(ip);
685 if (ret) 681 if (ret)
686 return ret; 682 return ret;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8ff95a2d54ee..9ceccb1595a3 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -393,12 +393,10 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
393 struct gfs2_meta_header *mh; 393 struct gfs2_meta_header *mh;
394 struct gfs2_trans *tr; 394 struct gfs2_trans *tr;
395 395
396 lock_buffer(bd->bd_bh);
397 gfs2_log_lock(sdp);
398 tr = current->journal_info; 396 tr = current->journal_info;
399 tr->tr_touched = 1; 397 tr->tr_touched = 1;
400 if (!list_empty(&bd->bd_list)) 398 if (!list_empty(&bd->bd_list))
401 goto out; 399 return;
402 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 400 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
403 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 401 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
404 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; 402 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
@@ -414,9 +412,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
414 sdp->sd_log_num_buf++; 412 sdp->sd_log_num_buf++;
415 list_add(&bd->bd_list, &sdp->sd_log_le_buf); 413 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
416 tr->tr_num_buf_new++; 414 tr->tr_num_buf_new++;
417out:
418 gfs2_log_unlock(sdp);
419 unlock_buffer(bd->bd_bh);
420} 415}
421 416
422static void gfs2_check_magic(struct buffer_head *bh) 417static void gfs2_check_magic(struct buffer_head *bh)
@@ -621,7 +616,6 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
621 616
622static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 617static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
623{ 618{
624 struct gfs2_log_descriptor *ld;
625 struct gfs2_meta_header *mh; 619 struct gfs2_meta_header *mh;
626 unsigned int offset; 620 unsigned int offset;
627 struct list_head *head = &sdp->sd_log_le_revoke; 621 struct list_head *head = &sdp->sd_log_le_revoke;
@@ -634,7 +628,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
634 628
635 length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64)); 629 length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
636 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke); 630 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
637 ld = page_address(page);
638 offset = sizeof(struct gfs2_log_descriptor); 631 offset = sizeof(struct gfs2_log_descriptor);
639 632
640 list_for_each_entry(bd, head, bd_list) { 633 list_for_each_entry(bd, head, bd_list) {
@@ -777,12 +770,10 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
777 struct address_space *mapping = bd->bd_bh->b_page->mapping; 770 struct address_space *mapping = bd->bd_bh->b_page->mapping;
778 struct gfs2_inode *ip = GFS2_I(mapping->host); 771 struct gfs2_inode *ip = GFS2_I(mapping->host);
779 772
780 lock_buffer(bd->bd_bh);
781 gfs2_log_lock(sdp);
782 if (tr) 773 if (tr)
783 tr->tr_touched = 1; 774 tr->tr_touched = 1;
784 if (!list_empty(&bd->bd_list)) 775 if (!list_empty(&bd->bd_list))
785 goto out; 776 return;
786 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 777 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
787 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 778 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
788 if (gfs2_is_jdata(ip)) { 779 if (gfs2_is_jdata(ip)) {
@@ -793,9 +784,6 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
793 } else { 784 } else {
794 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered); 785 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
795 } 786 }
796out:
797 gfs2_log_unlock(sdp);
798 unlock_buffer(bd->bd_bh);
799} 787}
800 788
801/** 789/**
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 40c4b0d42fa8..c5af8e18f27a 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -497,8 +497,11 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
497 struct gfs2_quota_data **qd; 497 struct gfs2_quota_data **qd;
498 int error; 498 int error;
499 499
500 if (ip->i_res == NULL) 500 if (ip->i_res == NULL) {
501 gfs2_rs_alloc(ip); 501 error = gfs2_rs_alloc(ip);
502 if (error)
503 return error;
504 }
502 505
503 qd = ip->i_res->rs_qa_qd; 506 qd = ip->i_res->rs_qa_qd;
504 507
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3cc402ce6fea..38fe18f2f055 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -553,7 +553,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
553 */ 553 */
554int gfs2_rs_alloc(struct gfs2_inode *ip) 554int gfs2_rs_alloc(struct gfs2_inode *ip)
555{ 555{
556 int error = 0;
557 struct gfs2_blkreserv *res; 556 struct gfs2_blkreserv *res;
558 557
559 if (ip->i_res) 558 if (ip->i_res)
@@ -561,7 +560,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
561 560
562 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); 561 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
563 if (!res) 562 if (!res)
564 error = -ENOMEM; 563 return -ENOMEM;
565 564
566 RB_CLEAR_NODE(&res->rs_node); 565 RB_CLEAR_NODE(&res->rs_node);
567 566
@@ -571,7 +570,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
571 else 570 else
572 ip->i_res = res; 571 ip->i_res = res;
573 up_write(&ip->i_rw_mutex); 572 up_write(&ip->i_rw_mutex);
574 return error; 573 return 0;
575} 574}
576 575
577static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) 576static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
@@ -1263,7 +1262,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1263 int ret = 0; 1262 int ret = 0;
1264 u64 amt; 1263 u64 amt;
1265 u64 trimmed = 0; 1264 u64 trimmed = 0;
1265 u64 start, end, minlen;
1266 unsigned int x; 1266 unsigned int x;
1267 unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
1267 1268
1268 if (!capable(CAP_SYS_ADMIN)) 1269 if (!capable(CAP_SYS_ADMIN))
1269 return -EPERM; 1270 return -EPERM;
@@ -1271,19 +1272,25 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1271 if (!blk_queue_discard(q)) 1272 if (!blk_queue_discard(q))
1272 return -EOPNOTSUPP; 1273 return -EOPNOTSUPP;
1273 1274
1274 if (argp == NULL) { 1275 if (copy_from_user(&r, argp, sizeof(r)))
1275 r.start = 0;
1276 r.len = ULLONG_MAX;
1277 r.minlen = 0;
1278 } else if (copy_from_user(&r, argp, sizeof(r)))
1279 return -EFAULT; 1276 return -EFAULT;
1280 1277
1281 ret = gfs2_rindex_update(sdp); 1278 ret = gfs2_rindex_update(sdp);
1282 if (ret) 1279 if (ret)
1283 return ret; 1280 return ret;
1284 1281
1285 rgd = gfs2_blk2rgrpd(sdp, r.start, 0); 1282 start = r.start >> bs_shift;
1286 rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0); 1283 end = start + (r.len >> bs_shift);
1284 minlen = max_t(u64, r.minlen,
1285 q->limits.discard_granularity) >> bs_shift;
1286
1287 rgd = gfs2_blk2rgrpd(sdp, start, 0);
1288 rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0);
1289
1290 if (end <= start ||
1291 minlen > sdp->sd_max_rg_data ||
1292 start > rgd_end->rd_data0 + rgd_end->rd_data)
1293 return -EINVAL;
1287 1294
1288 while (1) { 1295 while (1) {
1289 1296
@@ -1295,7 +1302,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1295 /* Trim each bitmap in the rgrp */ 1302 /* Trim each bitmap in the rgrp */
1296 for (x = 0; x < rgd->rd_length; x++) { 1303 for (x = 0; x < rgd->rd_length; x++) {
1297 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1304 struct gfs2_bitmap *bi = rgd->rd_bits + x;
1298 ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt); 1305 ret = gfs2_rgrp_send_discards(sdp,
1306 rgd->rd_data0, NULL, bi, minlen,
1307 &amt);
1299 if (ret) { 1308 if (ret) {
1300 gfs2_glock_dq_uninit(&gh); 1309 gfs2_glock_dq_uninit(&gh);
1301 goto out; 1310 goto out;
@@ -1324,7 +1333,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1324 1333
1325out: 1334out:
1326 r.len = trimmed << 9; 1335 r.len = trimmed << 9;
1327 if (argp && copy_to_user(argp, &r, sizeof(r))) 1336 if (copy_to_user(argp, &r, sizeof(r)))
1328 return -EFAULT; 1337 return -EFAULT;
1329 1338
1330 return ret; 1339 return ret;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index bc737261f234..d6488674d916 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -810,7 +810,8 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
810 return; 810 return;
811 } 811 }
812 need_unlock = 1; 812 need_unlock = 1;
813 } 813 } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
814 return;
814 815
815 if (current->journal_info == NULL) { 816 if (current->journal_info == NULL) {
816 ret = gfs2_trans_begin(sdp, RES_DINODE, 0); 817 ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index adbd27875ef9..413627072f36 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -155,14 +155,22 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
155 struct gfs2_sbd *sdp = gl->gl_sbd; 155 struct gfs2_sbd *sdp = gl->gl_sbd;
156 struct gfs2_bufdata *bd; 156 struct gfs2_bufdata *bd;
157 157
158 lock_buffer(bh);
159 gfs2_log_lock(sdp);
158 bd = bh->b_private; 160 bd = bh->b_private;
159 if (bd) 161 if (bd)
160 gfs2_assert(sdp, bd->bd_gl == gl); 162 gfs2_assert(sdp, bd->bd_gl == gl);
161 else { 163 else {
164 gfs2_log_unlock(sdp);
165 unlock_buffer(bh);
162 gfs2_attach_bufdata(gl, bh, meta); 166 gfs2_attach_bufdata(gl, bh, meta);
163 bd = bh->b_private; 167 bd = bh->b_private;
168 lock_buffer(bh);
169 gfs2_log_lock(sdp);
164 } 170 }
165 lops_add(sdp, bd); 171 lops_add(sdp, bd);
172 gfs2_log_unlock(sdp);
173 unlock_buffer(bh);
166} 174}
167 175
168void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) 176void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
diff --git a/fs/inode.c b/fs/inode.c
index b03c71957246..64999f144153 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -408,6 +408,19 @@ static void inode_lru_list_add(struct inode *inode)
408 spin_unlock(&inode->i_sb->s_inode_lru_lock); 408 spin_unlock(&inode->i_sb->s_inode_lru_lock);
409} 409}
410 410
411/*
412 * Add inode to LRU if needed (inode is unused and clean).
413 *
414 * Needs inode->i_lock held.
415 */
416void inode_add_lru(struct inode *inode)
417{
418 if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) &&
419 !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
420 inode_lru_list_add(inode);
421}
422
423
411static void inode_lru_list_del(struct inode *inode) 424static void inode_lru_list_del(struct inode *inode)
412{ 425{
413 spin_lock(&inode->i_sb->s_inode_lru_lock); 426 spin_lock(&inode->i_sb->s_inode_lru_lock);
@@ -1390,8 +1403,7 @@ static void iput_final(struct inode *inode)
1390 1403
1391 if (!drop && (sb->s_flags & MS_ACTIVE)) { 1404 if (!drop && (sb->s_flags & MS_ACTIVE)) {
1392 inode->i_state |= I_REFERENCED; 1405 inode->i_state |= I_REFERENCED;
1393 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1406 inode_add_lru(inode);
1394 inode_lru_list_add(inode);
1395 spin_unlock(&inode->i_lock); 1407 spin_unlock(&inode->i_lock);
1396 return; 1408 return;
1397 } 1409 }
diff --git a/fs/internal.h b/fs/internal.h
index 916b7cbf3e3e..2f6af7f645eb 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -110,6 +110,7 @@ extern int open_check_o_direct(struct file *f);
110 * inode.c 110 * inode.c
111 */ 111 */
112extern spinlock_t inode_sb_list_lock; 112extern spinlock_t inode_sb_list_lock;
113extern void inode_add_lru(struct inode *inode);
113 114
114/* 115/*
115 * fs-writeback.c 116 * fs-writeback.c
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 78b7f84241d4..7f5120bf0ec2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1961,7 +1961,9 @@ retry:
1961 spin_unlock(&journal->j_list_lock); 1961 spin_unlock(&journal->j_list_lock);
1962 jbd_unlock_bh_state(bh); 1962 jbd_unlock_bh_state(bh);
1963 spin_unlock(&journal->j_state_lock); 1963 spin_unlock(&journal->j_state_lock);
1964 unlock_buffer(bh);
1964 log_wait_commit(journal, tid); 1965 log_wait_commit(journal, tid);
1966 lock_buffer(bh);
1965 goto retry; 1967 goto retry;
1966 } 1968 }
1967 /* 1969 /*
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 60ef3fb707ff..1506673c087e 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -138,33 +138,39 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
138 struct page *pg; 138 struct page *pg;
139 struct inode *inode = mapping->host; 139 struct inode *inode = mapping->host;
140 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 140 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
141 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
142 struct jffs2_raw_inode ri;
143 uint32_t alloc_len = 0;
141 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 144 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
142 uint32_t pageofs = index << PAGE_CACHE_SHIFT; 145 uint32_t pageofs = index << PAGE_CACHE_SHIFT;
143 int ret = 0; 146 int ret = 0;
144 147
148 jffs2_dbg(1, "%s()\n", __func__);
149
150 if (pageofs > inode->i_size) {
151 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
152 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
153 if (ret)
154 return ret;
155 }
156
157 mutex_lock(&f->sem);
145 pg = grab_cache_page_write_begin(mapping, index, flags); 158 pg = grab_cache_page_write_begin(mapping, index, flags);
146 if (!pg) 159 if (!pg) {
160 if (alloc_len)
161 jffs2_complete_reservation(c);
162 mutex_unlock(&f->sem);
147 return -ENOMEM; 163 return -ENOMEM;
164 }
148 *pagep = pg; 165 *pagep = pg;
149 166
150 jffs2_dbg(1, "%s()\n", __func__); 167 if (alloc_len) {
151
152 if (pageofs > inode->i_size) {
153 /* Make new hole frag from old EOF to new page */ 168 /* Make new hole frag from old EOF to new page */
154 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
155 struct jffs2_raw_inode ri;
156 struct jffs2_full_dnode *fn; 169 struct jffs2_full_dnode *fn;
157 uint32_t alloc_len;
158 170
159 jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", 171 jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
160 (unsigned int)inode->i_size, pageofs); 172 (unsigned int)inode->i_size, pageofs);
161 173
162 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
163 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
164 if (ret)
165 goto out_page;
166
167 mutex_lock(&f->sem);
168 memset(&ri, 0, sizeof(ri)); 174 memset(&ri, 0, sizeof(ri));
169 175
170 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 176 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -191,7 +197,6 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
191 if (IS_ERR(fn)) { 197 if (IS_ERR(fn)) {
192 ret = PTR_ERR(fn); 198 ret = PTR_ERR(fn);
193 jffs2_complete_reservation(c); 199 jffs2_complete_reservation(c);
194 mutex_unlock(&f->sem);
195 goto out_page; 200 goto out_page;
196 } 201 }
197 ret = jffs2_add_full_dnode_to_inode(c, f, fn); 202 ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -206,12 +211,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
206 jffs2_mark_node_obsolete(c, fn->raw); 211 jffs2_mark_node_obsolete(c, fn->raw);
207 jffs2_free_full_dnode(fn); 212 jffs2_free_full_dnode(fn);
208 jffs2_complete_reservation(c); 213 jffs2_complete_reservation(c);
209 mutex_unlock(&f->sem);
210 goto out_page; 214 goto out_page;
211 } 215 }
212 jffs2_complete_reservation(c); 216 jffs2_complete_reservation(c);
213 inode->i_size = pageofs; 217 inode->i_size = pageofs;
214 mutex_unlock(&f->sem);
215 } 218 }
216 219
217 /* 220 /*
@@ -220,18 +223,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
220 * case of a short-copy. 223 * case of a short-copy.
221 */ 224 */
222 if (!PageUptodate(pg)) { 225 if (!PageUptodate(pg)) {
223 mutex_lock(&f->sem);
224 ret = jffs2_do_readpage_nolock(inode, pg); 226 ret = jffs2_do_readpage_nolock(inode, pg);
225 mutex_unlock(&f->sem);
226 if (ret) 227 if (ret)
227 goto out_page; 228 goto out_page;
228 } 229 }
230 mutex_unlock(&f->sem);
229 jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); 231 jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
230 return ret; 232 return ret;
231 233
232out_page: 234out_page:
233 unlock_page(pg); 235 unlock_page(pg);
234 page_cache_release(pg); 236 page_cache_release(pg);
237 mutex_unlock(&f->sem);
235 return ret; 238 return ret;
236} 239}
237 240
diff --git a/fs/jfs/jfs_discard.c b/fs/jfs/jfs_discard.c
index 9947563e4175..dfcd50304559 100644
--- a/fs/jfs/jfs_discard.c
+++ b/fs/jfs/jfs_discard.c
@@ -83,7 +83,7 @@ int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range)
83 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; 83 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
84 struct super_block *sb = ipbmap->i_sb; 84 struct super_block *sb = ipbmap->i_sb;
85 int agno, agno_end; 85 int agno, agno_end;
86 s64 start, end, minlen; 86 u64 start, end, minlen;
87 u64 trimmed = 0; 87 u64 trimmed = 0;
88 88
89 /** 89 /**
@@ -93,15 +93,19 @@ int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range)
93 * minlen: minimum extent length in Bytes 93 * minlen: minimum extent length in Bytes
94 */ 94 */
95 start = range->start >> sb->s_blocksize_bits; 95 start = range->start >> sb->s_blocksize_bits;
96 if (start < 0)
97 start = 0;
98 end = start + (range->len >> sb->s_blocksize_bits) - 1; 96 end = start + (range->len >> sb->s_blocksize_bits) - 1;
99 if (end >= bmp->db_mapsize)
100 end = bmp->db_mapsize - 1;
101 minlen = range->minlen >> sb->s_blocksize_bits; 97 minlen = range->minlen >> sb->s_blocksize_bits;
102 if (minlen <= 0) 98 if (minlen == 0)
103 minlen = 1; 99 minlen = 1;
104 100
101 if (minlen > bmp->db_agsize ||
102 start >= bmp->db_mapsize ||
103 range->len < sb->s_blocksize)
104 return -EINVAL;
105
106 if (end >= bmp->db_mapsize)
107 end = bmp->db_mapsize - 1;
108
105 /** 109 /**
106 * we trim all ag's within the range 110 * we trim all ag's within the range
107 */ 111 */
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index d269ada7670e..982d2676e1f8 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -223,7 +223,7 @@ static void encode_nlm_stat(struct xdr_stream *xdr,
223{ 223{
224 __be32 *p; 224 __be32 *p;
225 225
226 BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD); 226 WARN_ON_ONCE(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
227 p = xdr_reserve_space(xdr, 4); 227 p = xdr_reserve_space(xdr, 4);
228 *p = stat; 228 *p = stat;
229} 229}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e4fb3ba5a58a..3d7e09bcc0e9 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -85,29 +85,38 @@ static struct rpc_clnt *nsm_create(struct net *net)
85 return rpc_create(&args); 85 return rpc_create(&args);
86} 86}
87 87
88static struct rpc_clnt *nsm_client_set(struct lockd_net *ln,
89 struct rpc_clnt *clnt)
90{
91 spin_lock(&ln->nsm_clnt_lock);
92 if (ln->nsm_users == 0) {
93 if (clnt == NULL)
94 goto out;
95 ln->nsm_clnt = clnt;
96 }
97 clnt = ln->nsm_clnt;
98 ln->nsm_users++;
99out:
100 spin_unlock(&ln->nsm_clnt_lock);
101 return clnt;
102}
103
88static struct rpc_clnt *nsm_client_get(struct net *net) 104static struct rpc_clnt *nsm_client_get(struct net *net)
89{ 105{
90 static DEFINE_MUTEX(nsm_create_mutex); 106 struct rpc_clnt *clnt, *new;
91 struct rpc_clnt *clnt;
92 struct lockd_net *ln = net_generic(net, lockd_net_id); 107 struct lockd_net *ln = net_generic(net, lockd_net_id);
93 108
94 spin_lock(&ln->nsm_clnt_lock); 109 clnt = nsm_client_set(ln, NULL);
95 if (ln->nsm_users) { 110 if (clnt != NULL)
96 ln->nsm_users++;
97 clnt = ln->nsm_clnt;
98 spin_unlock(&ln->nsm_clnt_lock);
99 goto out; 111 goto out;
100 }
101 spin_unlock(&ln->nsm_clnt_lock);
102 112
103 mutex_lock(&nsm_create_mutex); 113 clnt = new = nsm_create(net);
104 clnt = nsm_create(net); 114 if (IS_ERR(clnt))
105 if (!IS_ERR(clnt)) { 115 goto out;
106 ln->nsm_clnt = clnt; 116
107 smp_wmb(); 117 clnt = nsm_client_set(ln, new);
108 ln->nsm_users = 1; 118 if (clnt != new)
109 } 119 rpc_shutdown_client(new);
110 mutex_unlock(&nsm_create_mutex);
111out: 120out:
112 return clnt; 121 return clnt;
113} 122}
@@ -115,18 +124,16 @@ out:
115static void nsm_client_put(struct net *net) 124static void nsm_client_put(struct net *net)
116{ 125{
117 struct lockd_net *ln = net_generic(net, lockd_net_id); 126 struct lockd_net *ln = net_generic(net, lockd_net_id);
118 struct rpc_clnt *clnt = ln->nsm_clnt; 127 struct rpc_clnt *clnt = NULL;
119 int shutdown = 0;
120 128
121 spin_lock(&ln->nsm_clnt_lock); 129 spin_lock(&ln->nsm_clnt_lock);
122 if (ln->nsm_users) { 130 ln->nsm_users--;
123 if (--ln->nsm_users) 131 if (ln->nsm_users == 0) {
124 ln->nsm_clnt = NULL; 132 clnt = ln->nsm_clnt;
125 shutdown = !ln->nsm_users; 133 ln->nsm_clnt = NULL;
126 } 134 }
127 spin_unlock(&ln->nsm_clnt_lock); 135 spin_unlock(&ln->nsm_clnt_lock);
128 136 if (clnt != NULL)
129 if (shutdown)
130 rpc_shutdown_client(clnt); 137 rpc_shutdown_client(clnt);
131} 138}
132 139
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3009a365e082..21171f0c6477 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -68,7 +68,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
68 68
69 /* Obtain file pointer. Not used by FREE_ALL call. */ 69 /* Obtain file pointer. Not used by FREE_ALL call. */
70 if (filp != NULL) { 70 if (filp != NULL) {
71 if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0) 71 error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh));
72 if (error != 0)
72 goto no_locks; 73 goto no_locks;
73 *filp = file; 74 *filp = file;
74 75
diff --git a/fs/namei.c b/fs/namei.c
index d1895f308156..937f9d50c84b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -705,8 +705,8 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki
705 path_put(link); 705 path_put(link);
706} 706}
707 707
708int sysctl_protected_symlinks __read_mostly = 1; 708int sysctl_protected_symlinks __read_mostly = 0;
709int sysctl_protected_hardlinks __read_mostly = 1; 709int sysctl_protected_hardlinks __read_mostly = 0;
710 710
711/** 711/**
712 * may_follow_link - Check symlink following for unsafe situations 712 * may_follow_link - Check symlink following for unsafe situations
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 9a521fb39869..5088b57b078a 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -241,7 +241,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
241 svc_exit_thread(cb_info->rqst); 241 svc_exit_thread(cb_info->rqst);
242 cb_info->rqst = NULL; 242 cb_info->rqst = NULL;
243 cb_info->task = NULL; 243 cb_info->task = NULL;
244 return PTR_ERR(cb_info->task); 244 return ret;
245 } 245 }
246 dprintk("nfs_callback_up: service started\n"); 246 dprintk("nfs_callback_up: service started\n");
247 return 0; 247 return 0;
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 31c26c4dcc23..ca4b11ec87a2 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -217,7 +217,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
217{ 217{
218 char buf1[NFS_DNS_HOSTNAME_MAXLEN+1]; 218 char buf1[NFS_DNS_HOSTNAME_MAXLEN+1];
219 struct nfs_dns_ent key, *item; 219 struct nfs_dns_ent key, *item;
220 unsigned long ttl; 220 unsigned int ttl;
221 ssize_t len; 221 ssize_t len;
222 int ret = -EINVAL; 222 int ret = -EINVAL;
223 223
@@ -240,7 +240,8 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
240 key.namelen = len; 240 key.namelen = len;
241 memset(&key.h, 0, sizeof(key.h)); 241 memset(&key.h, 0, sizeof(key.h));
242 242
243 ttl = get_expiry(&buf); 243 if (get_uint(&buf, &ttl) < 0)
244 goto out;
244 if (ttl == 0) 245 if (ttl == 0)
245 goto out; 246 goto out;
246 key.h.expiry_time = ttl + seconds_since_boot(); 247 key.h.expiry_time = ttl + seconds_since_boot();
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5c7325c5c5e6..6fa01aea2488 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -685,7 +685,10 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
685 if (ctx->cred != NULL) 685 if (ctx->cred != NULL)
686 put_rpccred(ctx->cred); 686 put_rpccred(ctx->cred);
687 dput(ctx->dentry); 687 dput(ctx->dentry);
688 nfs_sb_deactive(sb); 688 if (is_sync)
689 nfs_sb_deactive(sb);
690 else
691 nfs_sb_deactive_async(sb);
689 kfree(ctx->mdsthreshold); 692 kfree(ctx->mdsthreshold);
690 kfree(ctx); 693 kfree(ctx);
691} 694}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 59b133c5d652..05521cadac2e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -351,10 +351,12 @@ extern int __init register_nfs_fs(void);
351extern void __exit unregister_nfs_fs(void); 351extern void __exit unregister_nfs_fs(void);
352extern void nfs_sb_active(struct super_block *sb); 352extern void nfs_sb_active(struct super_block *sb);
353extern void nfs_sb_deactive(struct super_block *sb); 353extern void nfs_sb_deactive(struct super_block *sb);
354extern void nfs_sb_deactive_async(struct super_block *sb);
354 355
355/* namespace.c */ 356/* namespace.c */
357#define NFS_PATH_CANONICAL 1
356extern char *nfs_path(char **p, struct dentry *dentry, 358extern char *nfs_path(char **p, struct dentry *dentry,
357 char *buffer, ssize_t buflen); 359 char *buffer, ssize_t buflen, unsigned flags);
358extern struct vfsmount *nfs_d_automount(struct path *path); 360extern struct vfsmount *nfs_d_automount(struct path *path);
359struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *, 361struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
360 struct nfs_fh *, struct nfs_fattr *); 362 struct nfs_fh *, struct nfs_fattr *);
@@ -498,7 +500,7 @@ static inline char *nfs_devname(struct dentry *dentry,
498 char *buffer, ssize_t buflen) 500 char *buffer, ssize_t buflen)
499{ 501{
500 char *dummy; 502 char *dummy;
501 return nfs_path(&dummy, dentry, buffer, buflen); 503 return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL);
502} 504}
503 505
504/* 506/*
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 8e65c7f1f87c..015f71f8f62c 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -181,7 +181,7 @@ int nfs_mount(struct nfs_mount_request *info)
181 else 181 else
182 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT]; 182 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT];
183 183
184 status = rpc_call_sync(mnt_clnt, &msg, 0); 184 status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT);
185 rpc_shutdown_client(mnt_clnt); 185 rpc_shutdown_client(mnt_clnt);
186 186
187 if (status < 0) 187 if (status < 0)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 655925373b91..dd057bc6b65b 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -33,6 +33,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
33 * @dentry - pointer to dentry 33 * @dentry - pointer to dentry
34 * @buffer - result buffer 34 * @buffer - result buffer
35 * @buflen - length of buffer 35 * @buflen - length of buffer
36 * @flags - options (see below)
36 * 37 *
37 * Helper function for constructing the server pathname 38 * Helper function for constructing the server pathname
38 * by arbitrary hashed dentry. 39 * by arbitrary hashed dentry.
@@ -40,8 +41,14 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
40 * This is mainly for use in figuring out the path on the 41 * This is mainly for use in figuring out the path on the
41 * server side when automounting on top of an existing partition 42 * server side when automounting on top of an existing partition
42 * and in generating /proc/mounts and friends. 43 * and in generating /proc/mounts and friends.
44 *
45 * Supported flags:
46 * NFS_PATH_CANONICAL: ensure there is exactly one slash after
47 * the original device (export) name
48 * (if unset, the original name is returned verbatim)
43 */ 49 */
44char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen) 50char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
51 unsigned flags)
45{ 52{
46 char *end; 53 char *end;
47 int namelen; 54 int namelen;
@@ -74,7 +81,7 @@ rename_retry:
74 rcu_read_unlock(); 81 rcu_read_unlock();
75 goto rename_retry; 82 goto rename_retry;
76 } 83 }
77 if (*end != '/') { 84 if ((flags & NFS_PATH_CANONICAL) && *end != '/') {
78 if (--buflen < 0) { 85 if (--buflen < 0) {
79 spin_unlock(&dentry->d_lock); 86 spin_unlock(&dentry->d_lock);
80 rcu_read_unlock(); 87 rcu_read_unlock();
@@ -91,9 +98,11 @@ rename_retry:
91 return end; 98 return end;
92 } 99 }
93 namelen = strlen(base); 100 namelen = strlen(base);
94 /* Strip off excess slashes in base string */ 101 if (flags & NFS_PATH_CANONICAL) {
95 while (namelen > 0 && base[namelen - 1] == '/') 102 /* Strip off excess slashes in base string */
96 namelen--; 103 while (namelen > 0 && base[namelen - 1] == '/')
104 namelen--;
105 }
97 buflen -= namelen; 106 buflen -= namelen;
98 if (buflen < 0) { 107 if (buflen < 0) {
99 spin_unlock(&dentry->d_lock); 108 spin_unlock(&dentry->d_lock);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 52d847212066..2e45fd9c02a3 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -122,12 +122,21 @@ static void filelayout_reset_read(struct nfs_read_data *data)
122 } 122 }
123} 123}
124 124
125static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
126{
127 if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
128 return;
129 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
130 pnfs_return_layout(inode);
131}
132
125static int filelayout_async_handle_error(struct rpc_task *task, 133static int filelayout_async_handle_error(struct rpc_task *task,
126 struct nfs4_state *state, 134 struct nfs4_state *state,
127 struct nfs_client *clp, 135 struct nfs_client *clp,
128 struct pnfs_layout_segment *lseg) 136 struct pnfs_layout_segment *lseg)
129{ 137{
130 struct inode *inode = lseg->pls_layout->plh_inode; 138 struct pnfs_layout_hdr *lo = lseg->pls_layout;
139 struct inode *inode = lo->plh_inode;
131 struct nfs_server *mds_server = NFS_SERVER(inode); 140 struct nfs_server *mds_server = NFS_SERVER(inode);
132 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); 141 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
133 struct nfs_client *mds_client = mds_server->nfs_client; 142 struct nfs_client *mds_client = mds_server->nfs_client;
@@ -204,10 +213,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
204 dprintk("%s DS connection error %d\n", __func__, 213 dprintk("%s DS connection error %d\n", __func__,
205 task->tk_status); 214 task->tk_status);
206 nfs4_mark_deviceid_unavailable(devid); 215 nfs4_mark_deviceid_unavailable(devid);
207 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); 216 set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
208 _pnfs_return_layout(inode);
209 rpc_wake_up(&tbl->slot_tbl_waitq); 217 rpc_wake_up(&tbl->slot_tbl_waitq);
210 nfs4_ds_disconnect(clp);
211 /* fall through */ 218 /* fall through */
212 default: 219 default:
213reset: 220reset:
@@ -331,7 +338,9 @@ static void filelayout_read_count_stats(struct rpc_task *task, void *data)
331static void filelayout_read_release(void *data) 338static void filelayout_read_release(void *data)
332{ 339{
333 struct nfs_read_data *rdata = data; 340 struct nfs_read_data *rdata = data;
341 struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
334 342
343 filelayout_fenceme(lo->plh_inode, lo);
335 nfs_put_client(rdata->ds_clp); 344 nfs_put_client(rdata->ds_clp);
336 rdata->header->mds_ops->rpc_release(data); 345 rdata->header->mds_ops->rpc_release(data);
337} 346}
@@ -429,7 +438,9 @@ static void filelayout_write_count_stats(struct rpc_task *task, void *data)
429static void filelayout_write_release(void *data) 438static void filelayout_write_release(void *data)
430{ 439{
431 struct nfs_write_data *wdata = data; 440 struct nfs_write_data *wdata = data;
441 struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
432 442
443 filelayout_fenceme(lo->plh_inode, lo);
433 nfs_put_client(wdata->ds_clp); 444 nfs_put_client(wdata->ds_clp);
434 wdata->header->mds_ops->rpc_release(data); 445 wdata->header->mds_ops->rpc_release(data);
435} 446}
@@ -739,7 +750,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
739 goto out_err; 750 goto out_err;
740 751
741 if (fl->num_fh > 0) { 752 if (fl->num_fh > 0) {
742 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), 753 fl->fh_array = kcalloc(fl->num_fh, sizeof(fl->fh_array[0]),
743 gfp_flags); 754 gfp_flags);
744 if (!fl->fh_array) 755 if (!fl->fh_array)
745 goto out_err; 756 goto out_err;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index dca47d786710..8c07241fe52b 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -149,6 +149,5 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
149extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); 149extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
150struct nfs4_file_layout_dsaddr * 150struct nfs4_file_layout_dsaddr *
151filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); 151filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
152void nfs4_ds_disconnect(struct nfs_client *clp);
153 152
154#endif /* FS_NFS_NFS4FILELAYOUT_H */ 153#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 3336d5eaf879..a8eaa9b7bb0f 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -149,28 +149,6 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
149} 149}
150 150
151/* 151/*
152 * Lookup DS by nfs_client pointer. Zero data server client pointer
153 */
154void nfs4_ds_disconnect(struct nfs_client *clp)
155{
156 struct nfs4_pnfs_ds *ds;
157 struct nfs_client *found = NULL;
158
159 dprintk("%s clp %p\n", __func__, clp);
160 spin_lock(&nfs4_ds_cache_lock);
161 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
162 if (ds->ds_clp && ds->ds_clp == clp) {
163 found = ds->ds_clp;
164 ds->ds_clp = NULL;
165 }
166 spin_unlock(&nfs4_ds_cache_lock);
167 if (found) {
168 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
169 nfs_put_client(clp);
170 }
171}
172
173/*
174 * Create an rpc connection to the nfs4_pnfs_ds data server 152 * Create an rpc connection to the nfs4_pnfs_ds data server
175 * Currently only supports IPv4 and IPv6 addresses 153 * Currently only supports IPv4 and IPv6 addresses
176 */ 154 */
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index 6a83780e0ce6..549462e5b9b0 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -5,6 +5,7 @@
5 5
6#include <linux/nfs_fs.h> 6#include <linux/nfs_fs.h>
7#include "nfs4_fs.h" 7#include "nfs4_fs.h"
8#include "internal.h"
8 9
9#define NFSDBG_FACILITY NFSDBG_CLIENT 10#define NFSDBG_FACILITY NFSDBG_CLIENT
10 11
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 79fbb61ce202..1e09eb78543b 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -81,7 +81,8 @@ static char *nfs_path_component(const char *nfspath, const char *end)
81static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) 81static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
82{ 82{
83 char *limit; 83 char *limit;
84 char *path = nfs_path(&limit, dentry, buffer, buflen); 84 char *path = nfs_path(&limit, dentry, buffer, buflen,
85 NFS_PATH_CANONICAL);
85 if (!IS_ERR(path)) { 86 if (!IS_ERR(path)) {
86 char *path_component = nfs_path_component(path, limit); 87 char *path_component = nfs_path_component(path, limit);
87 if (path_component) 88 if (path_component)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 68b21d81b7ac..5eec4429970c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -339,8 +339,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
339 dprintk("%s ERROR: %d Reset session\n", __func__, 339 dprintk("%s ERROR: %d Reset session\n", __func__,
340 errorcode); 340 errorcode);
341 nfs4_schedule_session_recovery(clp->cl_session, errorcode); 341 nfs4_schedule_session_recovery(clp->cl_session, errorcode);
342 exception->retry = 1; 342 goto wait_on_recovery;
343 break;
344#endif /* defined(CONFIG_NFS_V4_1) */ 343#endif /* defined(CONFIG_NFS_V4_1) */
345 case -NFS4ERR_FILE_OPEN: 344 case -NFS4ERR_FILE_OPEN:
346 if (exception->timeout > HZ) { 345 if (exception->timeout > HZ) {
@@ -1572,9 +1571,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1572 data->timestamp = jiffies; 1571 data->timestamp = jiffies;
1573 if (nfs4_setup_sequence(data->o_arg.server, 1572 if (nfs4_setup_sequence(data->o_arg.server,
1574 &data->o_arg.seq_args, 1573 &data->o_arg.seq_args,
1575 &data->o_res.seq_res, task)) 1574 &data->o_res.seq_res,
1576 return; 1575 task) != 0)
1577 rpc_call_start(task); 1576 nfs_release_seqid(data->o_arg.seqid);
1577 else
1578 rpc_call_start(task);
1578 return; 1579 return;
1579unlock_no_action: 1580unlock_no_action:
1580 rcu_read_unlock(); 1581 rcu_read_unlock();
@@ -1748,7 +1749,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
1748 1749
1749 /* even though OPEN succeeded, access is denied. Close the file */ 1750 /* even though OPEN succeeded, access is denied. Close the file */
1750 nfs4_close_state(state, fmode); 1751 nfs4_close_state(state, fmode);
1751 return -NFS4ERR_ACCESS; 1752 return -EACCES;
1752} 1753}
1753 1754
1754/* 1755/*
@@ -2196,7 +2197,7 @@ static void nfs4_free_closedata(void *data)
2196 nfs4_put_open_state(calldata->state); 2197 nfs4_put_open_state(calldata->state);
2197 nfs_free_seqid(calldata->arg.seqid); 2198 nfs_free_seqid(calldata->arg.seqid);
2198 nfs4_put_state_owner(sp); 2199 nfs4_put_state_owner(sp);
2199 nfs_sb_deactive(sb); 2200 nfs_sb_deactive_async(sb);
2200 kfree(calldata); 2201 kfree(calldata);
2201} 2202}
2202 2203
@@ -2296,9 +2297,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2296 if (nfs4_setup_sequence(NFS_SERVER(inode), 2297 if (nfs4_setup_sequence(NFS_SERVER(inode),
2297 &calldata->arg.seq_args, 2298 &calldata->arg.seq_args,
2298 &calldata->res.seq_res, 2299 &calldata->res.seq_res,
2299 task)) 2300 task) != 0)
2300 goto out; 2301 nfs_release_seqid(calldata->arg.seqid);
2301 rpc_call_start(task); 2302 else
2303 rpc_call_start(task);
2302out: 2304out:
2303 dprintk("%s: done!\n", __func__); 2305 dprintk("%s: done!\n", __func__);
2304} 2306}
@@ -4529,6 +4531,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
4529 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) 4531 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
4530 rpc_restart_call_prepare(task); 4532 rpc_restart_call_prepare(task);
4531 } 4533 }
4534 nfs_release_seqid(calldata->arg.seqid);
4532} 4535}
4533 4536
4534static void nfs4_locku_prepare(struct rpc_task *task, void *data) 4537static void nfs4_locku_prepare(struct rpc_task *task, void *data)
@@ -4545,9 +4548,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4545 calldata->timestamp = jiffies; 4548 calldata->timestamp = jiffies;
4546 if (nfs4_setup_sequence(calldata->server, 4549 if (nfs4_setup_sequence(calldata->server,
4547 &calldata->arg.seq_args, 4550 &calldata->arg.seq_args,
4548 &calldata->res.seq_res, task)) 4551 &calldata->res.seq_res,
4549 return; 4552 task) != 0)
4550 rpc_call_start(task); 4553 nfs_release_seqid(calldata->arg.seqid);
4554 else
4555 rpc_call_start(task);
4551} 4556}
4552 4557
4553static const struct rpc_call_ops nfs4_locku_ops = { 4558static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4692,7 +4697,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4692 /* Do we need to do an open_to_lock_owner? */ 4697 /* Do we need to do an open_to_lock_owner? */
4693 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { 4698 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
4694 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) 4699 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
4695 return; 4700 goto out_release_lock_seqid;
4696 data->arg.open_stateid = &state->stateid; 4701 data->arg.open_stateid = &state->stateid;
4697 data->arg.new_lock_owner = 1; 4702 data->arg.new_lock_owner = 1;
4698 data->res.open_seqid = data->arg.open_seqid; 4703 data->res.open_seqid = data->arg.open_seqid;
@@ -4701,10 +4706,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4701 data->timestamp = jiffies; 4706 data->timestamp = jiffies;
4702 if (nfs4_setup_sequence(data->server, 4707 if (nfs4_setup_sequence(data->server,
4703 &data->arg.seq_args, 4708 &data->arg.seq_args,
4704 &data->res.seq_res, task)) 4709 &data->res.seq_res,
4710 task) == 0) {
4711 rpc_call_start(task);
4705 return; 4712 return;
4706 rpc_call_start(task); 4713 }
4707 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4714 nfs_release_seqid(data->arg.open_seqid);
4715out_release_lock_seqid:
4716 nfs_release_seqid(data->arg.lock_seqid);
4717 dprintk("%s: done!, ret = %d\n", __func__, task->tk_status);
4708} 4718}
4709 4719
4710static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata) 4720static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
@@ -5667,7 +5677,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
5667 tbl->slots = new; 5677 tbl->slots = new;
5668 tbl->max_slots = max_slots; 5678 tbl->max_slots = max_slots;
5669 } 5679 }
5670 tbl->highest_used_slotid = -1; /* no slot is currently used */ 5680 tbl->highest_used_slotid = NFS4_NO_SLOT;
5671 for (i = 0; i < tbl->max_slots; i++) 5681 for (i = 0; i < tbl->max_slots; i++)
5672 tbl->slots[i].seq_nr = ivalue; 5682 tbl->slots[i].seq_nr = ivalue;
5673 spin_unlock(&tbl->slot_tbl_lock); 5683 spin_unlock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index be731e6b7b9c..c6f990656f89 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -369,7 +369,7 @@ void objio_free_result(struct objlayout_io_res *oir)
369 kfree(objios); 369 kfree(objios);
370} 370}
371 371
372enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) 372static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
373{ 373{
374 switch (oep) { 374 switch (oep) {
375 case OSD_ERR_PRI_NO_ERROR: 375 case OSD_ERR_PRI_NO_ERROR:
@@ -574,7 +574,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
574 (unsigned long)pgio->pg_layout_private; 574 (unsigned long)pgio->pg_layout_private;
575} 575}
576 576
577void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 577static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
578{ 578{
579 pnfs_generic_pg_init_read(pgio, req); 579 pnfs_generic_pg_init_read(pgio, req);
580 if (unlikely(pgio->pg_lseg == NULL)) 580 if (unlikely(pgio->pg_lseg == NULL))
@@ -604,7 +604,7 @@ static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
604 return false; 604 return false;
605} 605}
606 606
607void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 607static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
608{ 608{
609 unsigned long stripe_end = 0; 609 unsigned long stripe_end = 0;
610 u64 wb_size; 610 u64 wb_size;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index fe624c91bd00..2878f97bd78d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -925,8 +925,8 @@ pnfs_find_alloc_layout(struct inode *ino,
925 if (likely(nfsi->layout == NULL)) { /* Won the race? */ 925 if (likely(nfsi->layout == NULL)) { /* Won the race? */
926 nfsi->layout = new; 926 nfsi->layout = new;
927 return new; 927 return new;
928 } 928 } else if (new != NULL)
929 pnfs_free_layout_hdr(new); 929 pnfs_free_layout_hdr(new);
930out_existing: 930out_existing:
931 pnfs_get_layout_hdr(nfsi->layout); 931 pnfs_get_layout_hdr(nfsi->layout);
932 return nfsi->layout; 932 return nfsi->layout;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2d722dba1111..dbf7bba52da0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -62,6 +62,7 @@ enum {
62 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ 62 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ 63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */ 64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */
65 NFS_LAYOUT_RETURN, /* Return this layout ASAP */
65}; 66};
66 67
67enum layoutdriver_policy_flags { 68enum layoutdriver_policy_flags {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e831bce49766..652d3f7176a9 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -54,6 +54,7 @@
54#include <linux/parser.h> 54#include <linux/parser.h>
55#include <linux/nsproxy.h> 55#include <linux/nsproxy.h>
56#include <linux/rcupdate.h> 56#include <linux/rcupdate.h>
57#include <linux/kthread.h>
57 58
58#include <asm/uaccess.h> 59#include <asm/uaccess.h>
59 60
@@ -415,6 +416,54 @@ void nfs_sb_deactive(struct super_block *sb)
415} 416}
416EXPORT_SYMBOL_GPL(nfs_sb_deactive); 417EXPORT_SYMBOL_GPL(nfs_sb_deactive);
417 418
419static int nfs_deactivate_super_async_work(void *ptr)
420{
421 struct super_block *sb = ptr;
422
423 deactivate_super(sb);
424 module_put_and_exit(0);
425 return 0;
426}
427
428/*
429 * same effect as deactivate_super, but will do final unmount in kthread
430 * context
431 */
432static void nfs_deactivate_super_async(struct super_block *sb)
433{
434 struct task_struct *task;
435 char buf[INET6_ADDRSTRLEN + 1];
436 struct nfs_server *server = NFS_SB(sb);
437 struct nfs_client *clp = server->nfs_client;
438
439 if (!atomic_add_unless(&sb->s_active, -1, 1)) {
440 rcu_read_lock();
441 snprintf(buf, sizeof(buf),
442 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
443 rcu_read_unlock();
444
445 __module_get(THIS_MODULE);
446 task = kthread_run(nfs_deactivate_super_async_work, sb,
447 "%s-deactivate-super", buf);
448 if (IS_ERR(task)) {
449 pr_err("%s: kthread_run: %ld\n",
450 __func__, PTR_ERR(task));
451 /* make synchronous call and hope for the best */
452 deactivate_super(sb);
453 module_put(THIS_MODULE);
454 }
455 }
456}
457
458void nfs_sb_deactive_async(struct super_block *sb)
459{
460 struct nfs_server *server = NFS_SB(sb);
461
462 if (atomic_dec_and_test(&server->active))
463 nfs_deactivate_super_async(sb);
464}
465EXPORT_SYMBOL_GPL(nfs_sb_deactive_async);
466
418/* 467/*
419 * Deliver file system statistics to userspace 468 * Deliver file system statistics to userspace
420 */ 469 */
@@ -771,7 +820,7 @@ int nfs_show_devname(struct seq_file *m, struct dentry *root)
771 int err = 0; 820 int err = 0;
772 if (!page) 821 if (!page)
773 return -ENOMEM; 822 return -ENOMEM;
774 devname = nfs_path(&dummy, root, page, PAGE_SIZE); 823 devname = nfs_path(&dummy, root, page, PAGE_SIZE, 0);
775 if (IS_ERR(devname)) 824 if (IS_ERR(devname))
776 err = PTR_ERR(devname); 825 err = PTR_ERR(devname);
777 else 826 else
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 13cea637eff8..3f79c77153b8 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata)
95 95
96 nfs_dec_sillycount(data->dir); 96 nfs_dec_sillycount(data->dir);
97 nfs_free_unlinkdata(data); 97 nfs_free_unlinkdata(data);
98 nfs_sb_deactive(sb); 98 nfs_sb_deactive_async(sb);
99} 99}
100 100
101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) 101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f35794b97e8e..a50636025364 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
21 if ((old->path.mnt == new->path.mnt) && 21 if ((old->path.mnt == new->path.mnt) &&
22 (old->path.dentry == new->path.dentry)) 22 (old->path.dentry == new->path.dentry))
23 return true; 23 return true;
24 break;
24 case (FSNOTIFY_EVENT_NONE): 25 case (FSNOTIFY_EVENT_NONE):
25 return true; 26 return true;
26 default: 27 default:
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 721d692fa8d4..6fcaeb8c902e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -258,7 +258,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
258 if (ret) 258 if (ret)
259 goto out_close_fd; 259 goto out_close_fd;
260 260
261 fd_install(fd, f); 261 if (fd != FAN_NOFD)
262 fd_install(fd, f);
262 return fanotify_event_metadata.event_len; 263 return fanotify_event_metadata.event_len;
263 264
264out_close_fd: 265out_close_fd:
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 144a96732dd7..9e28356a959a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -873,6 +873,113 @@ static const struct file_operations proc_environ_operations = {
873 .release = mem_release, 873 .release = mem_release,
874}; 874};
875 875
876static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
877 loff_t *ppos)
878{
879 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
880 char buffer[PROC_NUMBUF];
881 int oom_adj = OOM_ADJUST_MIN;
882 size_t len;
883 unsigned long flags;
884
885 if (!task)
886 return -ESRCH;
887 if (lock_task_sighand(task, &flags)) {
888 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
889 oom_adj = OOM_ADJUST_MAX;
890 else
891 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
892 OOM_SCORE_ADJ_MAX;
893 unlock_task_sighand(task, &flags);
894 }
895 put_task_struct(task);
896 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
897 return simple_read_from_buffer(buf, count, ppos, buffer, len);
898}
899
900static ssize_t oom_adj_write(struct file *file, const char __user *buf,
901 size_t count, loff_t *ppos)
902{
903 struct task_struct *task;
904 char buffer[PROC_NUMBUF];
905 int oom_adj;
906 unsigned long flags;
907 int err;
908
909 memset(buffer, 0, sizeof(buffer));
910 if (count > sizeof(buffer) - 1)
911 count = sizeof(buffer) - 1;
912 if (copy_from_user(buffer, buf, count)) {
913 err = -EFAULT;
914 goto out;
915 }
916
917 err = kstrtoint(strstrip(buffer), 0, &oom_adj);
918 if (err)
919 goto out;
920 if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
921 oom_adj != OOM_DISABLE) {
922 err = -EINVAL;
923 goto out;
924 }
925
926 task = get_proc_task(file->f_path.dentry->d_inode);
927 if (!task) {
928 err = -ESRCH;
929 goto out;
930 }
931
932 task_lock(task);
933 if (!task->mm) {
934 err = -EINVAL;
935 goto err_task_lock;
936 }
937
938 if (!lock_task_sighand(task, &flags)) {
939 err = -ESRCH;
940 goto err_task_lock;
941 }
942
943 /*
944 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
945 * value is always attainable.
946 */
947 if (oom_adj == OOM_ADJUST_MAX)
948 oom_adj = OOM_SCORE_ADJ_MAX;
949 else
950 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
951
952 if (oom_adj < task->signal->oom_score_adj &&
953 !capable(CAP_SYS_RESOURCE)) {
954 err = -EACCES;
955 goto err_sighand;
956 }
957
958 /*
959 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
960 * /proc/pid/oom_score_adj instead.
961 */
962 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
963 current->comm, task_pid_nr(current), task_pid_nr(task),
964 task_pid_nr(task));
965
966 task->signal->oom_score_adj = oom_adj;
967 trace_oom_score_adj_update(task);
968err_sighand:
969 unlock_task_sighand(task, &flags);
970err_task_lock:
971 task_unlock(task);
972 put_task_struct(task);
973out:
974 return err < 0 ? err : count;
975}
976
977static const struct file_operations proc_oom_adj_operations = {
978 .read = oom_adj_read,
979 .write = oom_adj_write,
980 .llseek = generic_file_llseek,
981};
982
876static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 983static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
877 size_t count, loff_t *ppos) 984 size_t count, loff_t *ppos)
878{ 985{
@@ -1770,8 +1877,9 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
1770 if (!vma) 1877 if (!vma)
1771 goto out_no_vma; 1878 goto out_no_vma;
1772 1879
1773 result = proc_map_files_instantiate(dir, dentry, task, 1880 if (vma->vm_file)
1774 (void *)(unsigned long)vma->vm_file->f_mode); 1881 result = proc_map_files_instantiate(dir, dentry, task,
1882 (void *)(unsigned long)vma->vm_file->f_mode);
1775 1883
1776out_no_vma: 1884out_no_vma:
1777 up_read(&mm->mmap_sem); 1885 up_read(&mm->mmap_sem);
@@ -2598,6 +2706,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2598 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2706 REG("cgroup", S_IRUGO, proc_cgroup_operations),
2599#endif 2707#endif
2600 INF("oom_score", S_IRUGO, proc_oom_score), 2708 INF("oom_score", S_IRUGO, proc_oom_score),
2709 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
2601 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2710 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2602#ifdef CONFIG_AUDITSYSCALL 2711#ifdef CONFIG_AUDITSYSCALL
2603 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2712 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
@@ -2964,6 +3073,7 @@ static const struct pid_entry tid_base_stuff[] = {
2964 REG("cgroup", S_IRUGO, proc_cgroup_operations), 3073 REG("cgroup", S_IRUGO, proc_cgroup_operations),
2965#endif 3074#endif
2966 INF("oom_score", S_IRUGO, proc_oom_score), 3075 INF("oom_score", S_IRUGO, proc_oom_score),
3076 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
2967 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3077 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2968#ifdef CONFIG_AUDITSYSCALL 3078#ifdef CONFIG_AUDITSYSCALL
2969 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3079 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index cceaab07ad54..43973b084abf 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -12,6 +12,7 @@
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14struct ctl_table_header; 14struct ctl_table_header;
15struct mempolicy;
15 16
16extern struct proc_dir_entry proc_root; 17extern struct proc_dir_entry proc_root;
17#ifdef CONFIG_PROC_SYSCTL 18#ifdef CONFIG_PROC_SYSCTL
@@ -74,6 +75,9 @@ struct proc_maps_private {
74#ifdef CONFIG_MMU 75#ifdef CONFIG_MMU
75 struct vm_area_struct *tail_vma; 76 struct vm_area_struct *tail_vma;
76#endif 77#endif
78#ifdef CONFIG_NUMA
79 struct mempolicy *task_mempolicy;
80#endif
77}; 81};
78 82
79void proc_init_inodecache(void); 83void proc_init_inodecache(void);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 64c3b3172367..e296572c73ed 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -45,10 +45,13 @@ static cputime64_t get_iowait_time(int cpu)
45 45
46static u64 get_idle_time(int cpu) 46static u64 get_idle_time(int cpu)
47{ 47{
48 u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL); 48 u64 idle, idle_time = -1ULL;
49
50 if (cpu_online(cpu))
51 idle_time = get_cpu_idle_time_us(cpu, NULL);
49 52
50 if (idle_time == -1ULL) 53 if (idle_time == -1ULL)
51 /* !NO_HZ so we can rely on cpustat.idle */ 54 /* !NO_HZ or cpu offline so we can rely on cpustat.idle */
52 idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; 55 idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
53 else 56 else
54 idle = usecs_to_cputime64(idle_time); 57 idle = usecs_to_cputime64(idle_time);
@@ -58,10 +61,13 @@ static u64 get_idle_time(int cpu)
58 61
59static u64 get_iowait_time(int cpu) 62static u64 get_iowait_time(int cpu)
60{ 63{
61 u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL); 64 u64 iowait, iowait_time = -1ULL;
65
66 if (cpu_online(cpu))
67 iowait_time = get_cpu_iowait_time_us(cpu, NULL);
62 68
63 if (iowait_time == -1ULL) 69 if (iowait_time == -1ULL)
64 /* !NO_HZ so we can rely on cpustat.iowait */ 70 /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
65 iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; 71 iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
66 else 72 else
67 iowait = usecs_to_cputime64(iowait_time); 73 iowait = usecs_to_cputime64(iowait_time);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 79827ce03e3b..90c63f9392a5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -90,10 +90,55 @@ static void pad_len_spaces(struct seq_file *m, int len)
90 seq_printf(m, "%*c", len, ' '); 90 seq_printf(m, "%*c", len, ' ');
91} 91}
92 92
93#ifdef CONFIG_NUMA
94/*
95 * These functions are for numa_maps but called in generic **maps seq_file
96 * ->start(), ->stop() ops.
97 *
98 * numa_maps scans all vmas under mmap_sem and checks their mempolicy.
99 * Each mempolicy object is controlled by reference counting. The problem here
100 * is how to avoid accessing dead mempolicy object.
101 *
102 * Because we're holding mmap_sem while reading seq_file, it's safe to access
103 * each vma's mempolicy, no vma objects will never drop refs to mempolicy.
104 *
105 * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy
106 * is set and replaced under mmap_sem but unrefed and cleared under task_lock().
107 * So, without task_lock(), we cannot trust get_vma_policy() because we cannot
108 * gurantee the task never exits under us. But taking task_lock() around
109 * get_vma_plicy() causes lock order problem.
110 *
111 * To access task->mempolicy without lock, we hold a reference count of an
112 * object pointed by task->mempolicy and remember it. This will guarantee
113 * that task->mempolicy points to an alive object or NULL in numa_maps accesses.
114 */
115static void hold_task_mempolicy(struct proc_maps_private *priv)
116{
117 struct task_struct *task = priv->task;
118
119 task_lock(task);
120 priv->task_mempolicy = task->mempolicy;
121 mpol_get(priv->task_mempolicy);
122 task_unlock(task);
123}
124static void release_task_mempolicy(struct proc_maps_private *priv)
125{
126 mpol_put(priv->task_mempolicy);
127}
128#else
129static void hold_task_mempolicy(struct proc_maps_private *priv)
130{
131}
132static void release_task_mempolicy(struct proc_maps_private *priv)
133{
134}
135#endif
136
93static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) 137static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
94{ 138{
95 if (vma && vma != priv->tail_vma) { 139 if (vma && vma != priv->tail_vma) {
96 struct mm_struct *mm = vma->vm_mm; 140 struct mm_struct *mm = vma->vm_mm;
141 release_task_mempolicy(priv);
97 up_read(&mm->mmap_sem); 142 up_read(&mm->mmap_sem);
98 mmput(mm); 143 mmput(mm);
99 } 144 }
@@ -132,7 +177,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
132 177
133 tail_vma = get_gate_vma(priv->task->mm); 178 tail_vma = get_gate_vma(priv->task->mm);
134 priv->tail_vma = tail_vma; 179 priv->tail_vma = tail_vma;
135 180 hold_task_mempolicy(priv);
136 /* Start with last addr hint */ 181 /* Start with last addr hint */
137 vma = find_vma(mm, last_addr); 182 vma = find_vma(mm, last_addr);
138 if (last_addr && vma) { 183 if (last_addr && vma) {
@@ -159,6 +204,7 @@ out:
159 if (vma) 204 if (vma)
160 return vma; 205 return vma;
161 206
207 release_task_mempolicy(priv);
162 /* End of vmas has been reached */ 208 /* End of vmas has been reached */
163 m->version = (tail_vma != NULL)? 0: -1UL; 209 m->version = (tail_vma != NULL)? 0: -1UL;
164 up_read(&mm->mmap_sem); 210 up_read(&mm->mmap_sem);
@@ -1158,6 +1204,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1158 struct vm_area_struct *vma = v; 1204 struct vm_area_struct *vma = v;
1159 struct numa_maps *md = &numa_priv->md; 1205 struct numa_maps *md = &numa_priv->md;
1160 struct file *file = vma->vm_file; 1206 struct file *file = vma->vm_file;
1207 struct task_struct *task = proc_priv->task;
1161 struct mm_struct *mm = vma->vm_mm; 1208 struct mm_struct *mm = vma->vm_mm;
1162 struct mm_walk walk = {}; 1209 struct mm_walk walk = {};
1163 struct mempolicy *pol; 1210 struct mempolicy *pol;
@@ -1177,7 +1224,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1177 walk.private = md; 1224 walk.private = md;
1178 walk.mm = mm; 1225 walk.mm = mm;
1179 1226
1180 pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); 1227 pol = get_vma_policy(task, vma, vma->vm_start);
1181 mpol_to_str(buffer, sizeof(buffer), pol, 0); 1228 mpol_to_str(buffer, sizeof(buffer), pol, 0);
1182 mpol_cond_put(pol); 1229 mpol_cond_put(pol);
1183 1230
@@ -1189,7 +1236,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1189 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { 1236 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1190 seq_printf(m, " heap"); 1237 seq_printf(m, " heap");
1191 } else { 1238 } else {
1192 pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid); 1239 pid_t tid = vm_is_stack(task, vma, is_pid);
1193 if (tid != 0) { 1240 if (tid != 0) {
1194 /* 1241 /*
1195 * Thread stack in /proc/PID/task/TID/maps or 1242 * Thread stack in /proc/PID/task/TID/maps or
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index a40da07e93d6..947fbe06c3b1 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -161,6 +161,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
161 161
162 while (s < e) { 162 while (s < e) {
163 unsigned long flags; 163 unsigned long flags;
164 u64 id;
164 165
165 if (c > psinfo->bufsize) 166 if (c > psinfo->bufsize)
166 c = psinfo->bufsize; 167 c = psinfo->bufsize;
@@ -172,7 +173,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
172 spin_lock_irqsave(&psinfo->buf_lock, flags); 173 spin_lock_irqsave(&psinfo->buf_lock, flags);
173 } 174 }
174 memcpy(psinfo->buf, s, c); 175 memcpy(psinfo->buf, s, c);
175 psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo); 176 psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, c, psinfo);
176 spin_unlock_irqrestore(&psinfo->buf_lock, flags); 177 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
177 s += c; 178 s += c;
178 c = e - s; 179 c = e - s;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 557a9c20a215..05ae3c97f7a5 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1160,6 +1160,8 @@ static int need_print_warning(struct dquot_warn *warn)
1160 return uid_eq(current_fsuid(), warn->w_dq_id.uid); 1160 return uid_eq(current_fsuid(), warn->w_dq_id.uid);
1161 case GRPQUOTA: 1161 case GRPQUOTA:
1162 return in_group_p(warn->w_dq_id.gid); 1162 return in_group_p(warn->w_dq_id.gid);
1163 case PRJQUOTA: /* Never taken... Just make gcc happy */
1164 return 0;
1163 } 1165 }
1164 return 0; 1166 return 0;
1165} 1167}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f27f01a98aa2..d83736fbc26c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1782,8 +1782,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1782 1782
1783 BUG_ON(!th->t_trans_id); 1783 BUG_ON(!th->t_trans_id);
1784 1784
1785 dquot_initialize(inode); 1785 reiserfs_write_unlock(inode->i_sb);
1786 err = dquot_alloc_inode(inode); 1786 err = dquot_alloc_inode(inode);
1787 reiserfs_write_lock(inode->i_sb);
1787 if (err) 1788 if (err)
1788 goto out_end_trans; 1789 goto out_end_trans;
1789 if (!dir->i_nlink) { 1790 if (!dir->i_nlink) {
@@ -1979,8 +1980,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1979 1980
1980 out_end_trans: 1981 out_end_trans:
1981 journal_end(th, th->t_super, th->t_blocks_allocated); 1982 journal_end(th, th->t_super, th->t_blocks_allocated);
1983 reiserfs_write_unlock(inode->i_sb);
1982 /* Drop can be outside and it needs more credits so it's better to have it outside */ 1984 /* Drop can be outside and it needs more credits so it's better to have it outside */
1983 dquot_drop(inode); 1985 dquot_drop(inode);
1986 reiserfs_write_lock(inode->i_sb);
1984 inode->i_flags |= S_NOQUOTA; 1987 inode->i_flags |= S_NOQUOTA;
1985 make_bad_inode(inode); 1988 make_bad_inode(inode);
1986 1989
@@ -3103,10 +3106,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3103 /* must be turned off for recursive notify_change calls */ 3106 /* must be turned off for recursive notify_change calls */
3104 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3107 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3105 3108
3106 depth = reiserfs_write_lock_once(inode->i_sb);
3107 if (is_quota_modification(inode, attr)) 3109 if (is_quota_modification(inode, attr))
3108 dquot_initialize(inode); 3110 dquot_initialize(inode);
3109 3111 depth = reiserfs_write_lock_once(inode->i_sb);
3110 if (attr->ia_valid & ATTR_SIZE) { 3112 if (attr->ia_valid & ATTR_SIZE) {
3111 /* version 2 items will be caught by the s_maxbytes check 3113 /* version 2 items will be caught by the s_maxbytes check
3112 ** done for us in vmtruncate 3114 ** done for us in vmtruncate
@@ -3170,7 +3172,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3170 error = journal_begin(&th, inode->i_sb, jbegin_count); 3172 error = journal_begin(&th, inode->i_sb, jbegin_count);
3171 if (error) 3173 if (error)
3172 goto out; 3174 goto out;
3175 reiserfs_write_unlock_once(inode->i_sb, depth);
3173 error = dquot_transfer(inode, attr); 3176 error = dquot_transfer(inode, attr);
3177 depth = reiserfs_write_lock_once(inode->i_sb);
3174 if (error) { 3178 if (error) {
3175 journal_end(&th, inode->i_sb, jbegin_count); 3179 journal_end(&th, inode->i_sb, jbegin_count);
3176 goto out; 3180 goto out;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index f8afa4b162b8..2f40a4c70a4d 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
1968 key2type(&(key->on_disk_key))); 1968 key2type(&(key->on_disk_key)));
1969#endif 1969#endif
1970 1970
1971 reiserfs_write_unlock(inode->i_sb);
1971 retval = dquot_alloc_space_nodirty(inode, pasted_size); 1972 retval = dquot_alloc_space_nodirty(inode, pasted_size);
1973 reiserfs_write_lock(inode->i_sb);
1972 if (retval) { 1974 if (retval) {
1973 pathrelse(search_path); 1975 pathrelse(search_path);
1974 return retval; 1976 return retval;
@@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2061 "reiserquota insert_item(): allocating %u id=%u type=%c", 2063 "reiserquota insert_item(): allocating %u id=%u type=%c",
2062 quota_bytes, inode->i_uid, head2type(ih)); 2064 quota_bytes, inode->i_uid, head2type(ih));
2063#endif 2065#endif
2066 reiserfs_write_unlock(inode->i_sb);
2064 /* We can't dirty inode here. It would be immediately written but 2067 /* We can't dirty inode here. It would be immediately written but
2065 * appropriate stat item isn't inserted yet... */ 2068 * appropriate stat item isn't inserted yet... */
2066 retval = dquot_alloc_space_nodirty(inode, quota_bytes); 2069 retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2070 reiserfs_write_lock(inode->i_sb);
2067 if (retval) { 2071 if (retval) {
2068 pathrelse(path); 2072 pathrelse(path);
2069 return retval; 2073 return retval;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1078ae179993..418bdc3a57da 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -298,7 +298,9 @@ static int finish_unfinished(struct super_block *s)
298 retval = remove_save_link_only(s, &save_link_key, 0); 298 retval = remove_save_link_only(s, &save_link_key, 0);
299 continue; 299 continue;
300 } 300 }
301 reiserfs_write_unlock(s);
301 dquot_initialize(inode); 302 dquot_initialize(inode);
303 reiserfs_write_lock(s);
302 304
303 if (truncate && S_ISDIR(inode->i_mode)) { 305 if (truncate && S_ISDIR(inode->i_mode)) {
304 /* We got a truncate request for a dir which is impossible. 306 /* We got a truncate request for a dir which is impossible.
@@ -1335,7 +1337,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1335 kfree(qf_names[i]); 1337 kfree(qf_names[i]);
1336#endif 1338#endif
1337 err = -EINVAL; 1339 err = -EINVAL;
1338 goto out_err; 1340 goto out_unlock;
1339 } 1341 }
1340#ifdef CONFIG_QUOTA 1342#ifdef CONFIG_QUOTA
1341 handle_quota_files(s, qf_names, &qfmt); 1343 handle_quota_files(s, qf_names, &qfmt);
@@ -1379,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1379 if (blocks) { 1381 if (blocks) {
1380 err = reiserfs_resize(s, blocks); 1382 err = reiserfs_resize(s, blocks);
1381 if (err != 0) 1383 if (err != 0)
1382 goto out_err; 1384 goto out_unlock;
1383 } 1385 }
1384 1386
1385 if (*mount_flags & MS_RDONLY) { 1387 if (*mount_flags & MS_RDONLY) {
@@ -1389,9 +1391,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1389 /* it is read-only already */ 1391 /* it is read-only already */
1390 goto out_ok; 1392 goto out_ok;
1391 1393
1394 /*
1395 * Drop write lock. Quota will retake it when needed and lock
1396 * ordering requires calling dquot_suspend() without it.
1397 */
1398 reiserfs_write_unlock(s);
1392 err = dquot_suspend(s, -1); 1399 err = dquot_suspend(s, -1);
1393 if (err < 0) 1400 if (err < 0)
1394 goto out_err; 1401 goto out_err;
1402 reiserfs_write_lock(s);
1395 1403
1396 /* try to remount file system with read-only permissions */ 1404 /* try to remount file system with read-only permissions */
1397 if (sb_umount_state(rs) == REISERFS_VALID_FS 1405 if (sb_umount_state(rs) == REISERFS_VALID_FS
@@ -1401,7 +1409,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1401 1409
1402 err = journal_begin(&th, s, 10); 1410 err = journal_begin(&th, s, 10);
1403 if (err) 1411 if (err)
1404 goto out_err; 1412 goto out_unlock;
1405 1413
1406 /* Mounting a rw partition read-only. */ 1414 /* Mounting a rw partition read-only. */
1407 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); 1415 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1416,7 +1424,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1416 1424
1417 if (reiserfs_is_journal_aborted(journal)) { 1425 if (reiserfs_is_journal_aborted(journal)) {
1418 err = journal->j_errno; 1426 err = journal->j_errno;
1419 goto out_err; 1427 goto out_unlock;
1420 } 1428 }
1421 1429
1422 handle_data_mode(s, mount_options); 1430 handle_data_mode(s, mount_options);
@@ -1425,7 +1433,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1425 s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ 1433 s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */
1426 err = journal_begin(&th, s, 10); 1434 err = journal_begin(&th, s, 10);
1427 if (err) 1435 if (err)
1428 goto out_err; 1436 goto out_unlock;
1429 1437
1430 /* Mount a partition which is read-only, read-write */ 1438 /* Mount a partition which is read-only, read-write */
1431 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); 1439 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1442,10 +1450,16 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1442 SB_JOURNAL(s)->j_must_wait = 1; 1450 SB_JOURNAL(s)->j_must_wait = 1;
1443 err = journal_end(&th, s, 10); 1451 err = journal_end(&th, s, 10);
1444 if (err) 1452 if (err)
1445 goto out_err; 1453 goto out_unlock;
1446 1454
1447 if (!(*mount_flags & MS_RDONLY)) { 1455 if (!(*mount_flags & MS_RDONLY)) {
1456 /*
1457 * Drop write lock. Quota will retake it when needed and lock
1458 * ordering requires calling dquot_resume() without it.
1459 */
1460 reiserfs_write_unlock(s);
1448 dquot_resume(s, -1); 1461 dquot_resume(s, -1);
1462 reiserfs_write_lock(s);
1449 finish_unfinished(s); 1463 finish_unfinished(s);
1450 reiserfs_xattr_init(s, *mount_flags); 1464 reiserfs_xattr_init(s, *mount_flags);
1451 } 1465 }
@@ -1455,9 +1469,10 @@ out_ok:
1455 reiserfs_write_unlock(s); 1469 reiserfs_write_unlock(s);
1456 return 0; 1470 return 0;
1457 1471
1472out_unlock:
1473 reiserfs_write_unlock(s);
1458out_err: 1474out_err:
1459 kfree(new_opts); 1475 kfree(new_opts);
1460 reiserfs_write_unlock(s);
1461 return err; 1476 return err;
1462} 1477}
1463 1478
@@ -2095,13 +2110,15 @@ static int reiserfs_write_dquot(struct dquot *dquot)
2095 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2110 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2096 if (ret) 2111 if (ret)
2097 goto out; 2112 goto out;
2113 reiserfs_write_unlock(dquot->dq_sb);
2098 ret = dquot_commit(dquot); 2114 ret = dquot_commit(dquot);
2115 reiserfs_write_lock(dquot->dq_sb);
2099 err = 2116 err =
2100 journal_end(&th, dquot->dq_sb, 2117 journal_end(&th, dquot->dq_sb,
2101 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2118 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2102 if (!ret && err) 2119 if (!ret && err)
2103 ret = err; 2120 ret = err;
2104 out: 2121out:
2105 reiserfs_write_unlock(dquot->dq_sb); 2122 reiserfs_write_unlock(dquot->dq_sb);
2106 return ret; 2123 return ret;
2107} 2124}
@@ -2117,13 +2134,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
2117 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2134 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2118 if (ret) 2135 if (ret)
2119 goto out; 2136 goto out;
2137 reiserfs_write_unlock(dquot->dq_sb);
2120 ret = dquot_acquire(dquot); 2138 ret = dquot_acquire(dquot);
2139 reiserfs_write_lock(dquot->dq_sb);
2121 err = 2140 err =
2122 journal_end(&th, dquot->dq_sb, 2141 journal_end(&th, dquot->dq_sb,
2123 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2142 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2124 if (!ret && err) 2143 if (!ret && err)
2125 ret = err; 2144 ret = err;
2126 out: 2145out:
2127 reiserfs_write_unlock(dquot->dq_sb); 2146 reiserfs_write_unlock(dquot->dq_sb);
2128 return ret; 2147 return ret;
2129} 2148}
@@ -2137,19 +2156,21 @@ static int reiserfs_release_dquot(struct dquot *dquot)
2137 ret = 2156 ret =
2138 journal_begin(&th, dquot->dq_sb, 2157 journal_begin(&th, dquot->dq_sb,
2139 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2158 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2159 reiserfs_write_unlock(dquot->dq_sb);
2140 if (ret) { 2160 if (ret) {
2141 /* Release dquot anyway to avoid endless cycle in dqput() */ 2161 /* Release dquot anyway to avoid endless cycle in dqput() */
2142 dquot_release(dquot); 2162 dquot_release(dquot);
2143 goto out; 2163 goto out;
2144 } 2164 }
2145 ret = dquot_release(dquot); 2165 ret = dquot_release(dquot);
2166 reiserfs_write_lock(dquot->dq_sb);
2146 err = 2167 err =
2147 journal_end(&th, dquot->dq_sb, 2168 journal_end(&th, dquot->dq_sb,
2148 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2169 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2149 if (!ret && err) 2170 if (!ret && err)
2150 ret = err; 2171 ret = err;
2151 out:
2152 reiserfs_write_unlock(dquot->dq_sb); 2172 reiserfs_write_unlock(dquot->dq_sb);
2173out:
2153 return ret; 2174 return ret;
2154} 2175}
2155 2176
@@ -2174,11 +2195,13 @@ static int reiserfs_write_info(struct super_block *sb, int type)
2174 ret = journal_begin(&th, sb, 2); 2195 ret = journal_begin(&th, sb, 2);
2175 if (ret) 2196 if (ret)
2176 goto out; 2197 goto out;
2198 reiserfs_write_unlock(sb);
2177 ret = dquot_commit_info(sb, type); 2199 ret = dquot_commit_info(sb, type);
2200 reiserfs_write_lock(sb);
2178 err = journal_end(&th, sb, 2); 2201 err = journal_end(&th, sb, 2);
2179 if (!ret && err) 2202 if (!ret && err)
2180 ret = err; 2203 ret = err;
2181 out: 2204out:
2182 reiserfs_write_unlock(sb); 2205 reiserfs_write_unlock(sb);
2183 return ret; 2206 return ret;
2184} 2207}
@@ -2203,8 +2226,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2203 struct reiserfs_transaction_handle th; 2226 struct reiserfs_transaction_handle th;
2204 int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA; 2227 int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
2205 2228
2206 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) 2229 reiserfs_write_lock(sb);
2207 return -EINVAL; 2230 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) {
2231 err = -EINVAL;
2232 goto out;
2233 }
2208 2234
2209 /* Quotafile not on the same filesystem? */ 2235 /* Quotafile not on the same filesystem? */
2210 if (path->dentry->d_sb != sb) { 2236 if (path->dentry->d_sb != sb) {
@@ -2246,8 +2272,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2246 if (err) 2272 if (err)
2247 goto out; 2273 goto out;
2248 } 2274 }
2249 err = dquot_quota_on(sb, type, format_id, path); 2275 reiserfs_write_unlock(sb);
2276 return dquot_quota_on(sb, type, format_id, path);
2250out: 2277out:
2278 reiserfs_write_unlock(sb);
2251 return err; 2279 return err;
2252} 2280}
2253 2281
@@ -2320,7 +2348,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2320 tocopy = sb->s_blocksize - offset < towrite ? 2348 tocopy = sb->s_blocksize - offset < towrite ?
2321 sb->s_blocksize - offset : towrite; 2349 sb->s_blocksize - offset : towrite;
2322 tmp_bh.b_state = 0; 2350 tmp_bh.b_state = 0;
2351 reiserfs_write_lock(sb);
2323 err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); 2352 err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
2353 reiserfs_write_unlock(sb);
2324 if (err) 2354 if (err)
2325 goto out; 2355 goto out;
2326 if (offset || tocopy != sb->s_blocksize) 2356 if (offset || tocopy != sb->s_blocksize)
@@ -2336,10 +2366,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2336 flush_dcache_page(bh->b_page); 2366 flush_dcache_page(bh->b_page);
2337 set_buffer_uptodate(bh); 2367 set_buffer_uptodate(bh);
2338 unlock_buffer(bh); 2368 unlock_buffer(bh);
2369 reiserfs_write_lock(sb);
2339 reiserfs_prepare_for_journal(sb, bh, 1); 2370 reiserfs_prepare_for_journal(sb, bh, 1);
2340 journal_mark_dirty(current->journal_info, sb, bh); 2371 journal_mark_dirty(current->journal_info, sb, bh);
2341 if (!journal_quota) 2372 if (!journal_quota)
2342 reiserfs_add_ordered_list(inode, bh); 2373 reiserfs_add_ordered_list(inode, bh);
2374 reiserfs_write_unlock(sb);
2343 brelse(bh); 2375 brelse(bh);
2344 offset = 0; 2376 offset = 0;
2345 towrite -= tocopy; 2377 towrite -= tocopy;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 6b0bb00d4d2b..2fbdff6be25c 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -485,20 +485,18 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
485/** 485/**
486 * sysfs_pathname - return full path to sysfs dirent 486 * sysfs_pathname - return full path to sysfs dirent
487 * @sd: sysfs_dirent whose path we want 487 * @sd: sysfs_dirent whose path we want
488 * @path: caller allocated buffer 488 * @path: caller allocated buffer of size PATH_MAX
489 * 489 *
490 * Gives the name "/" to the sysfs_root entry; any path returned 490 * Gives the name "/" to the sysfs_root entry; any path returned
491 * is relative to wherever sysfs is mounted. 491 * is relative to wherever sysfs is mounted.
492 *
493 * XXX: does no error checking on @path size
494 */ 492 */
495static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) 493static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
496{ 494{
497 if (sd->s_parent) { 495 if (sd->s_parent) {
498 sysfs_pathname(sd->s_parent, path); 496 sysfs_pathname(sd->s_parent, path);
499 strcat(path, "/"); 497 strlcat(path, "/", PATH_MAX);
500 } 498 }
501 strcat(path, sd->s_name); 499 strlcat(path, sd->s_name, PATH_MAX);
502 return path; 500 return path;
503} 501}
504 502
@@ -531,9 +529,11 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
531 char *path = kzalloc(PATH_MAX, GFP_KERNEL); 529 char *path = kzalloc(PATH_MAX, GFP_KERNEL);
532 WARN(1, KERN_WARNING 530 WARN(1, KERN_WARNING
533 "sysfs: cannot create duplicate filename '%s'\n", 531 "sysfs: cannot create duplicate filename '%s'\n",
534 (path == NULL) ? sd->s_name : 532 (path == NULL) ? sd->s_name
535 strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"), 533 : (sysfs_pathname(acxt->parent_sd, path),
536 sd->s_name)); 534 strlcat(path, "/", PATH_MAX),
535 strlcat(path, sd->s_name, PATH_MAX),
536 path));
537 kfree(path); 537 kfree(path);
538 } 538 }
539 539
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 28ec13af28d9..2dcf3d473fec 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
681 if (!lprops) { 681 if (!lprops) {
682 lprops = ubifs_fast_find_freeable(c); 682 lprops = ubifs_fast_find_freeable(c);
683 if (!lprops) { 683 if (!lprops) {
684 ubifs_assert(c->freeable_cnt == 0); 684 /*
685 if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { 685 * The first condition means the following: go scan the
686 * LPT if there are uncategorized lprops, which means
687 * there may be freeable LEBs there (UBIFS does not
688 * store the information about freeable LEBs in the
689 * master node).
690 */
691 if (c->in_a_category_cnt != c->main_lebs ||
692 c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
693 ubifs_assert(c->freeable_cnt == 0);
686 lprops = scan_for_leb_for_idx(c); 694 lprops = scan_for_leb_for_idx(c);
687 if (IS_ERR(lprops)) { 695 if (IS_ERR(lprops)) {
688 err = PTR_ERR(lprops); 696 err = PTR_ERR(lprops);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index e5a2a35a46dc..46190a7c42a6 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
300 default: 300 default:
301 ubifs_assert(0); 301 ubifs_assert(0);
302 } 302 }
303
303 lprops->flags &= ~LPROPS_CAT_MASK; 304 lprops->flags &= ~LPROPS_CAT_MASK;
304 lprops->flags |= cat; 305 lprops->flags |= cat;
306 c->in_a_category_cnt += 1;
307 ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
305} 308}
306 309
307/** 310/**
@@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
334 default: 337 default:
335 ubifs_assert(0); 338 ubifs_assert(0);
336 } 339 }
340
341 c->in_a_category_cnt -= 1;
342 ubifs_assert(c->in_a_category_cnt >= 0);
337} 343}
338 344
339/** 345/**
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 5486346d0a3f..d133c276fe05 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1183,6 +1183,8 @@ struct ubifs_debug_info;
1183 * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) 1183 * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
1184 * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) 1184 * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
1185 * @freeable_cnt: number of freeable LEBs in @freeable_list 1185 * @freeable_cnt: number of freeable LEBs in @freeable_list
1186 * @in_a_category_cnt: count of lprops which are in a certain category, which
1187 * basically meants that they were loaded from the flash
1186 * 1188 *
1187 * @ltab_lnum: LEB number of LPT's own lprops table 1189 * @ltab_lnum: LEB number of LPT's own lprops table
1188 * @ltab_offs: offset of LPT's own lprops table 1190 * @ltab_offs: offset of LPT's own lprops table
@@ -1412,6 +1414,7 @@ struct ubifs_info {
1412 struct list_head freeable_list; 1414 struct list_head freeable_list;
1413 struct list_head frdi_idx_list; 1415 struct list_head frdi_idx_list;
1414 int freeable_cnt; 1416 int freeable_cnt;
1417 int in_a_category_cnt;
1415 1418
1416 int ltab_lnum; 1419 int ltab_lnum;
1417 int ltab_offs; 1420 int ltab_offs;
diff --git a/fs/xattr.c b/fs/xattr.c
index e164dddb8e96..e21c119f4f99 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -846,7 +846,7 @@ static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
846 const void *value, size_t size, int flags) 846 const void *value, size_t size, int flags)
847{ 847{
848 struct simple_xattr *xattr; 848 struct simple_xattr *xattr;
849 struct simple_xattr *uninitialized_var(new_xattr); 849 struct simple_xattr *new_xattr = NULL;
850 int err = 0; 850 int err = 0;
851 851
852 /* value == NULL means remove */ 852 /* value == NULL means remove */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 4f33c32affe3..335206a9c698 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1866,6 +1866,7 @@ xfs_alloc_fix_freelist(
1866 /* 1866 /*
1867 * Initialize the args structure. 1867 * Initialize the args structure.
1868 */ 1868 */
1869 memset(&targs, 0, sizeof(targs));
1869 targs.tp = tp; 1870 targs.tp = tp;
1870 targs.mp = mp; 1871 targs.mp = mp;
1871 targs.agbp = agbp; 1872 targs.agbp = agbp;
@@ -2207,7 +2208,7 @@ xfs_alloc_read_agf(
2207 * group or loop over the allocation groups to find the result. 2208 * group or loop over the allocation groups to find the result.
2208 */ 2209 */
2209int /* error */ 2210int /* error */
2210__xfs_alloc_vextent( 2211xfs_alloc_vextent(
2211 xfs_alloc_arg_t *args) /* allocation argument structure */ 2212 xfs_alloc_arg_t *args) /* allocation argument structure */
2212{ 2213{
2213 xfs_agblock_t agsize; /* allocation group size */ 2214 xfs_agblock_t agsize; /* allocation group size */
@@ -2417,46 +2418,6 @@ error0:
2417 return error; 2418 return error;
2418} 2419}
2419 2420
2420static void
2421xfs_alloc_vextent_worker(
2422 struct work_struct *work)
2423{
2424 struct xfs_alloc_arg *args = container_of(work,
2425 struct xfs_alloc_arg, work);
2426 unsigned long pflags;
2427
2428 /* we are in a transaction context here */
2429 current_set_flags_nested(&pflags, PF_FSTRANS);
2430
2431 args->result = __xfs_alloc_vextent(args);
2432 complete(args->done);
2433
2434 current_restore_flags_nested(&pflags, PF_FSTRANS);
2435}
2436
2437/*
2438 * Data allocation requests often come in with little stack to work on. Push
2439 * them off to a worker thread so there is lots of stack to use. Metadata
2440 * requests, OTOH, are generally from low stack usage paths, so avoid the
2441 * context switch overhead here.
2442 */
2443int
2444xfs_alloc_vextent(
2445 struct xfs_alloc_arg *args)
2446{
2447 DECLARE_COMPLETION_ONSTACK(done);
2448
2449 if (!args->userdata)
2450 return __xfs_alloc_vextent(args);
2451
2452
2453 args->done = &done;
2454 INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
2455 queue_work(xfs_alloc_wq, &args->work);
2456 wait_for_completion(&done);
2457 return args->result;
2458}
2459
2460/* 2421/*
2461 * Free an extent. 2422 * Free an extent.
2462 * Just break up the extent address and hand off to xfs_free_ag_extent 2423 * Just break up the extent address and hand off to xfs_free_ag_extent
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 93be4a667ca1..feacb061bab7 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -120,9 +120,6 @@ typedef struct xfs_alloc_arg {
120 char isfl; /* set if is freelist blocks - !acctg */ 120 char isfl; /* set if is freelist blocks - !acctg */
121 char userdata; /* set if this is user data */ 121 char userdata; /* set if this is user data */
122 xfs_fsblock_t firstblock; /* io first block allocated */ 122 xfs_fsblock_t firstblock; /* io first block allocated */
123 struct completion *done;
124 struct work_struct work;
125 int result;
126} xfs_alloc_arg_t; 123} xfs_alloc_arg_t;
127 124
128/* 125/*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index f1647caace8f..f7876c6d6165 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -121,6 +121,8 @@ xfs_allocbt_free_block(
121 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, 121 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
122 XFS_EXTENT_BUSY_SKIP_DISCARD); 122 XFS_EXTENT_BUSY_SKIP_DISCARD);
123 xfs_trans_agbtree_delta(cur->bc_tp, -1); 123 xfs_trans_agbtree_delta(cur->bc_tp, -1);
124
125 xfs_trans_binval(cur->bc_tp, bp);
124 return 0; 126 return 0;
125} 127}
126 128
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e562dd43f41f..e57e2daa357c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -481,11 +481,17 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
481 * 481 *
482 * The fix is two passes across the ioend list - one to start writeback on the 482 * The fix is two passes across the ioend list - one to start writeback on the
483 * buffer_heads, and then submit them for I/O on the second pass. 483 * buffer_heads, and then submit them for I/O on the second pass.
484 *
485 * If @fail is non-zero, it means that we have a situation where some part of
486 * the submission process has failed after we have marked paged for writeback
487 * and unlocked them. In this situation, we need to fail the ioend chain rather
488 * than submit it to IO. This typically only happens on a filesystem shutdown.
484 */ 489 */
485STATIC void 490STATIC void
486xfs_submit_ioend( 491xfs_submit_ioend(
487 struct writeback_control *wbc, 492 struct writeback_control *wbc,
488 xfs_ioend_t *ioend) 493 xfs_ioend_t *ioend,
494 int fail)
489{ 495{
490 xfs_ioend_t *head = ioend; 496 xfs_ioend_t *head = ioend;
491 xfs_ioend_t *next; 497 xfs_ioend_t *next;
@@ -506,6 +512,18 @@ xfs_submit_ioend(
506 next = ioend->io_list; 512 next = ioend->io_list;
507 bio = NULL; 513 bio = NULL;
508 514
515 /*
516 * If we are failing the IO now, just mark the ioend with an
517 * error and finish it. This will run IO completion immediately
518 * as there is only one reference to the ioend at this point in
519 * time.
520 */
521 if (fail) {
522 ioend->io_error = -fail;
523 xfs_finish_ioend(ioend);
524 continue;
525 }
526
509 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 527 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
510 528
511 if (!bio) { 529 if (!bio) {
@@ -1060,7 +1078,18 @@ xfs_vm_writepage(
1060 1078
1061 xfs_start_page_writeback(page, 1, count); 1079 xfs_start_page_writeback(page, 1, count);
1062 1080
1063 if (ioend && imap_valid) { 1081 /* if there is no IO to be submitted for this page, we are done */
1082 if (!ioend)
1083 return 0;
1084
1085 ASSERT(iohead);
1086
1087 /*
1088 * Any errors from this point onwards need tobe reported through the IO
1089 * completion path as we have marked the initial page as under writeback
1090 * and unlocked it.
1091 */
1092 if (imap_valid) {
1064 xfs_off_t end_index; 1093 xfs_off_t end_index;
1065 1094
1066 end_index = imap.br_startoff + imap.br_blockcount; 1095 end_index = imap.br_startoff + imap.br_blockcount;
@@ -1079,20 +1108,15 @@ xfs_vm_writepage(
1079 wbc, end_index); 1108 wbc, end_index);
1080 } 1109 }
1081 1110
1082 if (iohead) {
1083 /*
1084 * Reserve log space if we might write beyond the on-disk
1085 * inode size.
1086 */
1087 if (ioend->io_type != XFS_IO_UNWRITTEN &&
1088 xfs_ioend_is_append(ioend)) {
1089 err = xfs_setfilesize_trans_alloc(ioend);
1090 if (err)
1091 goto error;
1092 }
1093 1111
1094 xfs_submit_ioend(wbc, iohead); 1112 /*
1095 } 1113 * Reserve log space if we might write beyond the on-disk inode size.
1114 */
1115 err = 0;
1116 if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
1117 err = xfs_setfilesize_trans_alloc(ioend);
1118
1119 xfs_submit_ioend(wbc, iohead, err);
1096 1120
1097 return 0; 1121 return 0;
1098 1122
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d330111ca738..70eec1829776 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1291,6 +1291,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1291 leaf2 = blk2->bp->b_addr; 1291 leaf2 = blk2->bp->b_addr;
1292 ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1292 ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1293 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); 1293 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1294 ASSERT(leaf2->hdr.count == 0);
1294 args = state->args; 1295 args = state->args;
1295 1296
1296 trace_xfs_attr_leaf_rebalance(args); 1297 trace_xfs_attr_leaf_rebalance(args);
@@ -1361,6 +1362,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1361 * I assert that since all callers pass in an empty 1362 * I assert that since all callers pass in an empty
1362 * second buffer, this code should never execute. 1363 * second buffer, this code should never execute.
1363 */ 1364 */
1365 ASSERT(0);
1364 1366
1365 /* 1367 /*
1366 * Figure the total bytes to be added to the destination leaf. 1368 * Figure the total bytes to be added to the destination leaf.
@@ -1422,10 +1424,24 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1422 args->index2 = 0; 1424 args->index2 = 0;
1423 args->blkno2 = blk2->blkno; 1425 args->blkno2 = blk2->blkno;
1424 } else { 1426 } else {
1427 /*
1428 * On a double leaf split, the original attr location
1429 * is already stored in blkno2/index2, so don't
1430 * overwrite it overwise we corrupt the tree.
1431 */
1425 blk2->index = blk1->index 1432 blk2->index = blk1->index
1426 - be16_to_cpu(leaf1->hdr.count); 1433 - be16_to_cpu(leaf1->hdr.count);
1427 args->index = args->index2 = blk2->index; 1434 args->index = blk2->index;
1428 args->blkno = args->blkno2 = blk2->blkno; 1435 args->blkno = blk2->blkno;
1436 if (!state->extravalid) {
1437 /*
1438 * set the new attr location to match the old
1439 * one and let the higher level split code
1440 * decide where in the leaf to place it.
1441 */
1442 args->index2 = blk2->index;
1443 args->blkno2 = blk2->blkno;
1444 }
1429 } 1445 }
1430 } else { 1446 } else {
1431 ASSERT(state->inleaf == 1); 1447 ASSERT(state->inleaf == 1);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 848ffa77707b..83d0cf3df930 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2437,6 +2437,7 @@ xfs_bmap_btalloc(
2437 * Normal allocation, done through xfs_alloc_vextent. 2437 * Normal allocation, done through xfs_alloc_vextent.
2438 */ 2438 */
2439 tryagain = isaligned = 0; 2439 tryagain = isaligned = 0;
2440 memset(&args, 0, sizeof(args));
2440 args.tp = ap->tp; 2441 args.tp = ap->tp;
2441 args.mp = mp; 2442 args.mp = mp;
2442 args.fsbno = ap->blkno; 2443 args.fsbno = ap->blkno;
@@ -3082,6 +3083,7 @@ xfs_bmap_extents_to_btree(
3082 * Convert to a btree with two levels, one record in root. 3083 * Convert to a btree with two levels, one record in root.
3083 */ 3084 */
3084 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); 3085 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
3086 memset(&args, 0, sizeof(args));
3085 args.tp = tp; 3087 args.tp = tp;
3086 args.mp = mp; 3088 args.mp = mp;
3087 args.firstblock = *firstblock; 3089 args.firstblock = *firstblock;
@@ -3237,6 +3239,7 @@ xfs_bmap_local_to_extents(
3237 xfs_buf_t *bp; /* buffer for extent block */ 3239 xfs_buf_t *bp; /* buffer for extent block */
3238 xfs_bmbt_rec_host_t *ep;/* extent record pointer */ 3240 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
3239 3241
3242 memset(&args, 0, sizeof(args));
3240 args.tp = tp; 3243 args.tp = tp;
3241 args.mp = ip->i_mount; 3244 args.mp = ip->i_mount;
3242 args.firstblock = *firstblock; 3245 args.firstblock = *firstblock;
@@ -4616,12 +4619,11 @@ xfs_bmapi_delay(
4616 4619
4617 4620
4618STATIC int 4621STATIC int
4619xfs_bmapi_allocate( 4622__xfs_bmapi_allocate(
4620 struct xfs_bmalloca *bma, 4623 struct xfs_bmalloca *bma)
4621 int flags)
4622{ 4624{
4623 struct xfs_mount *mp = bma->ip->i_mount; 4625 struct xfs_mount *mp = bma->ip->i_mount;
4624 int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? 4626 int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
4625 XFS_ATTR_FORK : XFS_DATA_FORK; 4627 XFS_ATTR_FORK : XFS_DATA_FORK;
4626 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); 4628 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4627 int tmp_logflags = 0; 4629 int tmp_logflags = 0;
@@ -4654,24 +4656,27 @@ xfs_bmapi_allocate(
4654 * Indicate if this is the first user data in the file, or just any 4656 * Indicate if this is the first user data in the file, or just any
4655 * user data. 4657 * user data.
4656 */ 4658 */
4657 if (!(flags & XFS_BMAPI_METADATA)) { 4659 if (!(bma->flags & XFS_BMAPI_METADATA)) {
4658 bma->userdata = (bma->offset == 0) ? 4660 bma->userdata = (bma->offset == 0) ?
4659 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; 4661 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
4660 } 4662 }
4661 4663
4662 bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1; 4664 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4663 4665
4664 /* 4666 /*
4665 * Only want to do the alignment at the eof if it is userdata and 4667 * Only want to do the alignment at the eof if it is userdata and
4666 * allocation length is larger than a stripe unit. 4668 * allocation length is larger than a stripe unit.
4667 */ 4669 */
4668 if (mp->m_dalign && bma->length >= mp->m_dalign && 4670 if (mp->m_dalign && bma->length >= mp->m_dalign &&
4669 !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { 4671 !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4670 error = xfs_bmap_isaeof(bma, whichfork); 4672 error = xfs_bmap_isaeof(bma, whichfork);
4671 if (error) 4673 if (error)
4672 return error; 4674 return error;
4673 } 4675 }
4674 4676
4677 if (bma->flags & XFS_BMAPI_STACK_SWITCH)
4678 bma->stack_switch = 1;
4679
4675 error = xfs_bmap_alloc(bma); 4680 error = xfs_bmap_alloc(bma);
4676 if (error) 4681 if (error)
4677 return error; 4682 return error;
@@ -4706,7 +4711,7 @@ xfs_bmapi_allocate(
4706 * A wasdelay extent has been initialized, so shouldn't be flagged 4711 * A wasdelay extent has been initialized, so shouldn't be flagged
4707 * as unwritten. 4712 * as unwritten.
4708 */ 4713 */
4709 if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) && 4714 if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
4710 xfs_sb_version_hasextflgbit(&mp->m_sb)) 4715 xfs_sb_version_hasextflgbit(&mp->m_sb))
4711 bma->got.br_state = XFS_EXT_UNWRITTEN; 4716 bma->got.br_state = XFS_EXT_UNWRITTEN;
4712 4717
@@ -4734,6 +4739,45 @@ xfs_bmapi_allocate(
4734 return 0; 4739 return 0;
4735} 4740}
4736 4741
4742static void
4743xfs_bmapi_allocate_worker(
4744 struct work_struct *work)
4745{
4746 struct xfs_bmalloca *args = container_of(work,
4747 struct xfs_bmalloca, work);
4748 unsigned long pflags;
4749
4750 /* we are in a transaction context here */
4751 current_set_flags_nested(&pflags, PF_FSTRANS);
4752
4753 args->result = __xfs_bmapi_allocate(args);
4754 complete(args->done);
4755
4756 current_restore_flags_nested(&pflags, PF_FSTRANS);
4757}
4758
4759/*
4760 * Some allocation requests often come in with little stack to work on. Push
4761 * them off to a worker thread so there is lots of stack to use. Otherwise just
4762 * call directly to avoid the context switch overhead here.
4763 */
4764int
4765xfs_bmapi_allocate(
4766 struct xfs_bmalloca *args)
4767{
4768 DECLARE_COMPLETION_ONSTACK(done);
4769
4770 if (!args->stack_switch)
4771 return __xfs_bmapi_allocate(args);
4772
4773
4774 args->done = &done;
4775 INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
4776 queue_work(xfs_alloc_wq, &args->work);
4777 wait_for_completion(&done);
4778 return args->result;
4779}
4780
4737STATIC int 4781STATIC int
4738xfs_bmapi_convert_unwritten( 4782xfs_bmapi_convert_unwritten(
4739 struct xfs_bmalloca *bma, 4783 struct xfs_bmalloca *bma,
@@ -4919,6 +4963,7 @@ xfs_bmapi_write(
4919 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4963 bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4920 bma.wasdel = wasdelay; 4964 bma.wasdel = wasdelay;
4921 bma.offset = bno; 4965 bma.offset = bno;
4966 bma.flags = flags;
4922 4967
4923 /* 4968 /*
4924 * There's a 32/64 bit type mismatch between the 4969 * There's a 32/64 bit type mismatch between the
@@ -4934,7 +4979,7 @@ xfs_bmapi_write(
4934 4979
4935 ASSERT(len > 0); 4980 ASSERT(len > 0);
4936 ASSERT(bma.length > 0); 4981 ASSERT(bma.length > 0);
4937 error = xfs_bmapi_allocate(&bma, flags); 4982 error = xfs_bmapi_allocate(&bma);
4938 if (error) 4983 if (error)
4939 goto error0; 4984 goto error0;
4940 if (bma.blkno == NULLFSBLOCK) 4985 if (bma.blkno == NULLFSBLOCK)
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 803b56d7ce16..5f469c3516eb 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -77,6 +77,7 @@ typedef struct xfs_bmap_free
77 * from written to unwritten, otherwise convert from unwritten to written. 77 * from written to unwritten, otherwise convert from unwritten to written.
78 */ 78 */
79#define XFS_BMAPI_CONVERT 0x040 79#define XFS_BMAPI_CONVERT 0x040
80#define XFS_BMAPI_STACK_SWITCH 0x080
80 81
81#define XFS_BMAPI_FLAGS \ 82#define XFS_BMAPI_FLAGS \
82 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 83 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
@@ -85,7 +86,8 @@ typedef struct xfs_bmap_free
85 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 86 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
86 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 87 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
87 { XFS_BMAPI_CONTIG, "CONTIG" }, \ 88 { XFS_BMAPI_CONTIG, "CONTIG" }, \
88 { XFS_BMAPI_CONVERT, "CONVERT" } 89 { XFS_BMAPI_CONVERT, "CONVERT" }, \
90 { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
89 91
90 92
91static inline int xfs_bmapi_aflag(int w) 93static inline int xfs_bmapi_aflag(int w)
@@ -133,6 +135,11 @@ typedef struct xfs_bmalloca {
133 char userdata;/* set if is user data */ 135 char userdata;/* set if is user data */
134 char aeof; /* allocated space at eof */ 136 char aeof; /* allocated space at eof */
135 char conv; /* overwriting unwritten extents */ 137 char conv; /* overwriting unwritten extents */
138 char stack_switch;
139 int flags;
140 struct completion *done;
141 struct work_struct work;
142 int result;
136} xfs_bmalloca_t; 143} xfs_bmalloca_t;
137 144
138/* 145/*
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 933b7930b863..4b0b8dd1b7b0 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1197,9 +1197,14 @@ xfs_buf_bio_end_io(
1197{ 1197{
1198 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; 1198 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
1199 1199
1200 xfs_buf_ioerror(bp, -error); 1200 /*
1201 * don't overwrite existing errors - otherwise we can lose errors on
1202 * buffers that require multiple bios to complete.
1203 */
1204 if (!bp->b_error)
1205 xfs_buf_ioerror(bp, -error);
1201 1206
1202 if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1207 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1203 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1208 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1204 1209
1205 _xfs_buf_ioend(bp, 1); 1210 _xfs_buf_ioend(bp, 1);
@@ -1279,6 +1284,11 @@ next_chunk:
1279 if (size) 1284 if (size)
1280 goto next_chunk; 1285 goto next_chunk;
1281 } else { 1286 } else {
1287 /*
1288 * This is guaranteed not to be the last io reference count
1289 * because the caller (xfs_buf_iorequest) holds a count itself.
1290 */
1291 atomic_dec(&bp->b_io_remaining);
1282 xfs_buf_ioerror(bp, EIO); 1292 xfs_buf_ioerror(bp, EIO);
1283 bio_put(bio); 1293 bio_put(bio);
1284 } 1294 }
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a8d0ed911196..becf4a97efc6 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -526,7 +526,25 @@ xfs_buf_item_unpin(
526 } 526 }
527 xfs_buf_relse(bp); 527 xfs_buf_relse(bp);
528 } else if (freed && remove) { 528 } else if (freed && remove) {
529 /*
530 * There are currently two references to the buffer - the active
531 * LRU reference and the buf log item. What we are about to do
532 * here - simulate a failed IO completion - requires 3
533 * references.
534 *
535 * The LRU reference is removed by the xfs_buf_stale() call. The
536 * buf item reference is removed by the xfs_buf_iodone()
537 * callback that is run by xfs_buf_do_callbacks() during ioend
538 * processing (via the bp->b_iodone callback), and then finally
539 * the ioend processing will drop the IO reference if the buffer
540 * is marked XBF_ASYNC.
541 *
542 * Hence we need to take an additional reference here so that IO
543 * completion processing doesn't free the buffer prematurely.
544 */
529 xfs_buf_lock(bp); 545 xfs_buf_lock(bp);
546 xfs_buf_hold(bp);
547 bp->b_flags |= XBF_ASYNC;
530 xfs_buf_ioerror(bp, EIO); 548 xfs_buf_ioerror(bp, EIO);
531 XFS_BUF_UNDONE(bp); 549 XFS_BUF_UNDONE(bp);
532 xfs_buf_stale(bp); 550 xfs_buf_stale(bp);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index c25b094efbf7..4beaede43277 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -399,9 +399,26 @@ xfs_growfs_data_private(
399 399
400 /* update secondary superblocks. */ 400 /* update secondary superblocks. */
401 for (agno = 1; agno < nagcount; agno++) { 401 for (agno = 1; agno < nagcount; agno++) {
402 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 402 error = 0;
403 /*
404 * new secondary superblocks need to be zeroed, not read from
405 * disk as the contents of the new area we are growing into is
406 * completely unknown.
407 */
408 if (agno < oagcount) {
409 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
403 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 410 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
404 XFS_FSS_TO_BB(mp, 1), 0, &bp); 411 XFS_FSS_TO_BB(mp, 1), 0, &bp);
412 } else {
413 bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
414 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
415 XFS_FSS_TO_BB(mp, 1), 0);
416 if (bp)
417 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
418 else
419 error = ENOMEM;
420 }
421
405 if (error) { 422 if (error) {
406 xfs_warn(mp, 423 xfs_warn(mp,
407 "error %d reading secondary superblock for ag %d", 424 "error %d reading secondary superblock for ag %d",
@@ -423,7 +440,7 @@ xfs_growfs_data_private(
423 break; /* no point in continuing */ 440 break; /* no point in continuing */
424 } 441 }
425 } 442 }
426 return 0; 443 return error;
427 444
428 error0: 445 error0:
429 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 446 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 445bf1aef31c..c5c4ef4f2bdb 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -250,6 +250,7 @@ xfs_ialloc_ag_alloc(
250 /* boundary */ 250 /* boundary */
251 struct xfs_perag *pag; 251 struct xfs_perag *pag;
252 252
253 memset(&args, 0, sizeof(args));
253 args.tp = tp; 254 args.tp = tp;
254 args.mp = tp->t_mountp; 255 args.mp = tp->t_mountp;
255 256
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2778258fcfa2..1938b41ee9f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1509,7 +1509,8 @@ xfs_ifree_cluster(
1509 * to mark all the active inodes on the buffer stale. 1509 * to mark all the active inodes on the buffer stale.
1510 */ 1510 */
1511 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 1511 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
1512 mp->m_bsize * blks_per_cluster, 0); 1512 mp->m_bsize * blks_per_cluster,
1513 XBF_UNMAPPED);
1513 1514
1514 if (!bp) 1515 if (!bp)
1515 return ENOMEM; 1516 return ENOMEM;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 8305f2ac6773..c1df3c623de2 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -70,7 +70,7 @@ xfs_find_handle(
70 int hsize; 70 int hsize;
71 xfs_handle_t handle; 71 xfs_handle_t handle;
72 struct inode *inode; 72 struct inode *inode;
73 struct fd f; 73 struct fd f = {0};
74 struct path path; 74 struct path path;
75 int error; 75 int error;
76 struct xfs_inode *ip; 76 struct xfs_inode *ip;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 973dff6ad935..7f537663365b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -584,7 +584,9 @@ xfs_iomap_write_allocate(
584 * pointer that the caller gave to us. 584 * pointer that the caller gave to us.
585 */ 585 */
586 error = xfs_bmapi_write(tp, ip, map_start_fsb, 586 error = xfs_bmapi_write(tp, ip, map_start_fsb,
587 count_fsb, 0, &first_block, 1, 587 count_fsb,
588 XFS_BMAPI_STACK_SWITCH,
589 &first_block, 1,
588 imap, &nimaps, &free_list); 590 imap, &nimaps, &free_list);
589 if (error) 591 if (error)
590 goto trans_cancel; 592 goto trans_cancel;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7f4f9370d0e7..4dad756962d0 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2387,14 +2387,27 @@ xlog_state_do_callback(
2387 2387
2388 2388
2389 /* 2389 /*
2390 * update the last_sync_lsn before we drop the 2390 * Completion of a iclog IO does not imply that
2391 * a transaction has completed, as transactions
2392 * can be large enough to span many iclogs. We
2393 * cannot change the tail of the log half way
2394 * through a transaction as this may be the only
2395 * transaction in the log and moving th etail to
2396 * point to the middle of it will prevent
2397 * recovery from finding the start of the
2398 * transaction. Hence we should only update the
2399 * last_sync_lsn if this iclog contains
2400 * transaction completion callbacks on it.
2401 *
2402 * We have to do this before we drop the
2391 * icloglock to ensure we are the only one that 2403 * icloglock to ensure we are the only one that
2392 * can update it. 2404 * can update it.
2393 */ 2405 */
2394 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), 2406 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
2395 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); 2407 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
2396 atomic64_set(&log->l_last_sync_lsn, 2408 if (iclog->ic_callback)
2397 be64_to_cpu(iclog->ic_header.h_lsn)); 2409 atomic64_set(&log->l_last_sync_lsn,
2410 be64_to_cpu(iclog->ic_header.h_lsn));
2398 2411
2399 } else 2412 } else
2400 ioerrors++; 2413 ioerrors++;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5da3ace352bf..d308749fabf1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3541,7 +3541,7 @@ xlog_do_recovery_pass(
3541 * - order is important. 3541 * - order is important.
3542 */ 3542 */
3543 error = xlog_bread_offset(log, 0, 3543 error = xlog_bread_offset(log, 0,
3544 bblks - split_bblks, hbp, 3544 bblks - split_bblks, dbp,
3545 offset + BBTOB(split_bblks)); 3545 offset + BBTOB(split_bblks));
3546 if (error) 3546 if (error)
3547 goto bread_err2; 3547 goto bread_err2;