aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c52
-rw-r--r--fs/bio.c4
-rw-r--r--fs/block_dev.c43
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/extent-tree.c9
-rw-r--r--fs/btrfs/extent_io.c138
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/inode.c126
-rw-r--r--fs/btrfs/ioctl.c7
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/relocation.c13
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/eventpoll.c95
-rw-r--r--fs/fuse/dir.c7
-rw-r--r--fs/fuse/file.c52
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/gfs2/main.c9
-rw-r--r--fs/inode.c31
-rw-r--r--fs/internal.h2
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nilfs2/btnode.c6
-rw-r--r--fs/nilfs2/btnode.h1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/page.c13
-rw-r--r--fs/nilfs2/page.h1
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/refcounttree.c7
-rw-r--r--fs/ocfs2/super.c28
-rw-r--r--fs/partitions/ldm.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c2
-rw-r--r--fs/xfs/xfs_fsops.c3
34 files changed, 568 insertions, 153 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b5..789b3afb3423 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
140 candidate->first = candidate->last = index; 140 candidate->first = candidate->last = index;
141 candidate->offset_first = from; 141 candidate->offset_first = from;
142 candidate->to_last = to; 142 candidate->to_last = to;
143 INIT_LIST_HEAD(&candidate->link);
143 candidate->usage = 1; 144 candidate->usage = 1;
144 candidate->state = AFS_WBACK_PENDING; 145 candidate->state = AFS_WBACK_PENDING;
145 init_waitqueue_head(&candidate->waitq); 146 init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index 020de5cb4a67..a936b7fe4f69 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -228,15 +228,23 @@ static void __put_ioctx(struct kioctx *ctx)
228 call_rcu(&ctx->rcu_head, ctx_rcu_free); 228 call_rcu(&ctx->rcu_head, ctx_rcu_free);
229} 229}
230 230
231#define get_ioctx(kioctx) do { \ 231static inline void get_ioctx(struct kioctx *kioctx)
232 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 232{
233 atomic_inc(&(kioctx)->users); \ 233 BUG_ON(atomic_read(&kioctx->users) <= 0);
234} while (0) 234 atomic_inc(&kioctx->users);
235#define put_ioctx(kioctx) do { \ 235}
236 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 236
237 if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ 237static inline int try_get_ioctx(struct kioctx *kioctx)
238 __put_ioctx(kioctx); \ 238{
239} while (0) 239 return atomic_inc_not_zero(&kioctx->users);
240}
241
242static inline void put_ioctx(struct kioctx *kioctx)
243{
244 BUG_ON(atomic_read(&kioctx->users) <= 0);
245 if (unlikely(atomic_dec_and_test(&kioctx->users)))
246 __put_ioctx(kioctx);
247}
240 248
241/* ioctx_alloc 249/* ioctx_alloc
242 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 250 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
@@ -590,8 +598,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
590 rcu_read_lock(); 598 rcu_read_lock();
591 599
592 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { 600 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
593 if (ctx->user_id == ctx_id && !ctx->dead) { 601 /*
594 get_ioctx(ctx); 602 * RCU protects us against accessing freed memory but
603 * we have to be careful not to get a reference when the
604 * reference count already dropped to 0 (ctx->dead test
605 * is unreliable because of races).
606 */
607 if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
595 ret = ctx; 608 ret = ctx;
596 break; 609 break;
597 } 610 }
@@ -1569,6 +1582,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1569 goto out_put_req; 1582 goto out_put_req;
1570 1583
1571 spin_lock_irq(&ctx->ctx_lock); 1584 spin_lock_irq(&ctx->ctx_lock);
1585 /*
1586 * We could have raced with io_destroy() and are currently holding a
1587 * reference to ctx which should be destroyed. We cannot submit IO
1588 * since ctx gets freed as soon as io_submit() puts its reference. The
1589 * check here is reliable: io_destroy() sets ctx->dead before waiting
1590 * for outstanding IO and the barrier between these two is realized by
1591 * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
1592 * increment ctx->reqs_active before checking for ctx->dead and the
1593 * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
1594 * don't see ctx->dead set here, io_destroy() waits for our IO to
1595 * finish.
1596 */
1597 if (ctx->dead) {
1598 spin_unlock_irq(&ctx->ctx_lock);
1599 ret = -EINVAL;
1600 goto out_put_req;
1601 }
1572 aio_run_iocb(req); 1602 aio_run_iocb(req);
1573 if (!list_empty(&ctx->run_list)) { 1603 if (!list_empty(&ctx->run_list)) {
1574 /* drain the run list */ 1604 /* drain the run list */
diff --git a/fs/bio.c b/fs/bio.c
index 4bd454fa844e..5694b756ed01 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -43,7 +43,7 @@ static mempool_t *bio_split_pool __read_mostly;
43 * unsigned short 43 * unsigned short
44 */ 44 */
45#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } 45#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
46struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { 46static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
47 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), 47 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
48}; 48};
49#undef BV 49#undef BV
@@ -1656,12 +1656,10 @@ static void __init biovec_init_slabs(void)
1656 int size; 1656 int size;
1657 struct biovec_slab *bvs = bvec_slabs + i; 1657 struct biovec_slab *bvs = bvec_slabs + i;
1658 1658
1659#ifndef CONFIG_BLK_DEV_INTEGRITY
1660 if (bvs->nr_vecs <= BIO_INLINE_VECS) { 1659 if (bvs->nr_vecs <= BIO_INLINE_VECS) {
1661 bvs->slab = NULL; 1660 bvs->slab = NULL;
1662 continue; 1661 continue;
1663 } 1662 }
1664#endif
1665 1663
1666 size = bvs->nr_vecs * sizeof(struct bio_vec); 1664 size = bvs->nr_vecs * sizeof(struct bio_vec);
1667 bvs->slab = kmem_cache_create(bvs->name, size, 0, 1665 bvs->slab = kmem_cache_create(bvs->name, size, 0,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index fffc2c672396..fbe05cbdd692 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); 873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
874 if (ret) 874 if (ret)
875 goto out_del; 875 goto out_del;
876 /*
877 * bdev could be deleted beneath us which would implicitly destroy
878 * the holder directory. Hold on to it.
879 */
880 kobject_get(bdev->bd_part->holder_dir);
876 881
877 list_add(&holder->list, &bdev->bd_holder_disks); 882 list_add(&holder->list, &bdev->bd_holder_disks);
878 goto out_unlock; 883 goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
909 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); 914 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
910 del_symlink(bdev->bd_part->holder_dir, 915 del_symlink(bdev->bd_part->holder_dir,
911 &disk_to_dev(disk)->kobj); 916 &disk_to_dev(disk)->kobj);
917 kobject_put(bdev->bd_part->holder_dir);
912 list_del_init(&holder->list); 918 list_del_init(&holder->list);
913 kfree(holder); 919 kfree(holder);
914 } 920 }
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
922 * flush_disk - invalidates all buffer-cache entries on a disk 928 * flush_disk - invalidates all buffer-cache entries on a disk
923 * 929 *
924 * @bdev: struct block device to be flushed 930 * @bdev: struct block device to be flushed
931 * @kill_dirty: flag to guide handling of dirty inodes
925 * 932 *
926 * Invalidates all buffer-cache entries on a disk. It should be called 933 * Invalidates all buffer-cache entries on a disk. It should be called
927 * when a disk has been changed -- either by a media change or online 934 * when a disk has been changed -- either by a media change or online
928 * resize. 935 * resize.
929 */ 936 */
930static void flush_disk(struct block_device *bdev) 937static void flush_disk(struct block_device *bdev, bool kill_dirty)
931{ 938{
932 if (__invalidate_device(bdev)) { 939 if (__invalidate_device(bdev, kill_dirty)) {
933 char name[BDEVNAME_SIZE] = ""; 940 char name[BDEVNAME_SIZE] = "";
934 941
935 if (bdev->bd_disk) 942 if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
966 "%s: detected capacity change from %lld to %lld\n", 973 "%s: detected capacity change from %lld to %lld\n",
967 name, bdev_size, disk_size); 974 name, bdev_size, disk_size);
968 i_size_write(bdev->bd_inode, disk_size); 975 i_size_write(bdev->bd_inode, disk_size);
969 flush_disk(bdev); 976 flush_disk(bdev, false);
970 } 977 }
971} 978}
972EXPORT_SYMBOL(check_disk_size_change); 979EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
1019 if (!(events & DISK_EVENT_MEDIA_CHANGE)) 1026 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1020 return 0; 1027 return 0;
1021 1028
1022 flush_disk(bdev); 1029 flush_disk(bdev, true);
1023 if (bdops->revalidate_disk) 1030 if (bdops->revalidate_disk)
1024 bdops->revalidate_disk(bdev->bd_disk); 1031 bdops->revalidate_disk(bdev->bd_disk);
1025 return 1; 1032 return 1;
@@ -1080,6 +1087,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1080 if (!disk) 1087 if (!disk)
1081 goto out; 1088 goto out;
1082 1089
1090 disk_block_events(disk);
1083 mutex_lock_nested(&bdev->bd_mutex, for_part); 1091 mutex_lock_nested(&bdev->bd_mutex, for_part);
1084 if (!bdev->bd_openers) { 1092 if (!bdev->bd_openers) {
1085 bdev->bd_disk = disk; 1093 bdev->bd_disk = disk;
@@ -1101,10 +1109,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1101 */ 1109 */
1102 disk_put_part(bdev->bd_part); 1110 disk_put_part(bdev->bd_part);
1103 bdev->bd_part = NULL; 1111 bdev->bd_part = NULL;
1104 module_put(disk->fops->owner);
1105 put_disk(disk);
1106 bdev->bd_disk = NULL; 1112 bdev->bd_disk = NULL;
1107 mutex_unlock(&bdev->bd_mutex); 1113 mutex_unlock(&bdev->bd_mutex);
1114 disk_unblock_events(disk);
1115 module_put(disk->fops->owner);
1116 put_disk(disk);
1108 goto restart; 1117 goto restart;
1109 } 1118 }
1110 if (ret) 1119 if (ret)
@@ -1141,9 +1150,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1141 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); 1150 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1142 } 1151 }
1143 } else { 1152 } else {
1144 module_put(disk->fops->owner);
1145 put_disk(disk);
1146 disk = NULL;
1147 if (bdev->bd_contains == bdev) { 1153 if (bdev->bd_contains == bdev) {
1148 if (bdev->bd_disk->fops->open) { 1154 if (bdev->bd_disk->fops->open) {
1149 ret = bdev->bd_disk->fops->open(bdev, mode); 1155 ret = bdev->bd_disk->fops->open(bdev, mode);
@@ -1153,11 +1159,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1153 if (bdev->bd_invalidated) 1159 if (bdev->bd_invalidated)
1154 rescan_partitions(bdev->bd_disk, bdev); 1160 rescan_partitions(bdev->bd_disk, bdev);
1155 } 1161 }
1162 /* only one opener holds refs to the module and disk */
1163 module_put(disk->fops->owner);
1164 put_disk(disk);
1156 } 1165 }
1157 bdev->bd_openers++; 1166 bdev->bd_openers++;
1158 if (for_part) 1167 if (for_part)
1159 bdev->bd_part_count++; 1168 bdev->bd_part_count++;
1160 mutex_unlock(&bdev->bd_mutex); 1169 mutex_unlock(&bdev->bd_mutex);
1170 disk_unblock_events(disk);
1161 return 0; 1171 return 0;
1162 1172
1163 out_clear: 1173 out_clear:
@@ -1170,9 +1180,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1170 bdev->bd_contains = NULL; 1180 bdev->bd_contains = NULL;
1171 out_unlock_bdev: 1181 out_unlock_bdev:
1172 mutex_unlock(&bdev->bd_mutex); 1182 mutex_unlock(&bdev->bd_mutex);
1183 disk_unblock_events(disk);
1173 out: 1184 out:
1174 if (disk) 1185 module_put(disk->fops->owner);
1175 module_put(disk->fops->owner);
1176 put_disk(disk); 1186 put_disk(disk);
1177 bdput(bdev); 1187 bdput(bdev);
1178 1188
@@ -1439,14 +1449,13 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
1439 if (bdev_free) { 1449 if (bdev_free) {
1440 if (bdev->bd_write_holder) { 1450 if (bdev->bd_write_holder) {
1441 disk_unblock_events(bdev->bd_disk); 1451 disk_unblock_events(bdev->bd_disk);
1442 bdev->bd_write_holder = false;
1443 } else
1444 disk_check_events(bdev->bd_disk); 1452 disk_check_events(bdev->bd_disk);
1453 bdev->bd_write_holder = false;
1454 }
1445 } 1455 }
1446 1456
1447 mutex_unlock(&bdev->bd_mutex); 1457 mutex_unlock(&bdev->bd_mutex);
1448 } else 1458 }
1449 disk_check_events(bdev->bd_disk);
1450 1459
1451 return __blkdev_put(bdev, mode, 0); 1460 return __blkdev_put(bdev, mode, 0);
1452} 1461}
@@ -1599,7 +1608,7 @@ fail:
1599} 1608}
1600EXPORT_SYMBOL(lookup_bdev); 1609EXPORT_SYMBOL(lookup_bdev);
1601 1610
1602int __invalidate_device(struct block_device *bdev) 1611int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1603{ 1612{
1604 struct super_block *sb = get_super(bdev); 1613 struct super_block *sb = get_super(bdev);
1605 int res = 0; 1614 int res = 0;
@@ -1612,7 +1621,7 @@ int __invalidate_device(struct block_device *bdev)
1612 * hold). 1621 * hold).
1613 */ 1622 */
1614 shrink_dcache_sb(sb); 1623 shrink_dcache_sb(sb);
1615 res = invalidate_inodes(sb); 1624 res = invalidate_inodes(sb, kill_dirty);
1616 drop_super(sb); 1625 drop_super(sb);
1617 } 1626 }
1618 invalidate_bdev(bdev); 1627 invalidate_bdev(bdev);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..6f820fa23df4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1254,6 +1254,7 @@ struct btrfs_root {
1254#define BTRFS_MOUNT_SPACE_CACHE (1 << 12) 1254#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1255#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) 1255#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1256#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1256#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1257#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1257 1258
1258#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1259#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1259#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1260#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2219,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
2218 u64 start, u64 end); 2219 u64 start, u64 end);
2219int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 2220int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
2220 u64 num_bytes); 2221 u64 num_bytes);
2222int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
2223 struct btrfs_root *root, u64 type);
2221 2224
2222/* ctree.c */ 2225/* ctree.c */
2223int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2226int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f3c96fc01439..588ff9849873 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5376,7 +5376,7 @@ again:
5376 num_bytes, data, 1); 5376 num_bytes, data, 1);
5377 goto again; 5377 goto again;
5378 } 5378 }
5379 if (ret == -ENOSPC) { 5379 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
5380 struct btrfs_space_info *sinfo; 5380 struct btrfs_space_info *sinfo;
5381 5381
5382 sinfo = __find_space_info(root->fs_info, data); 5382 sinfo = __find_space_info(root->fs_info, data);
@@ -8065,6 +8065,13 @@ out:
8065 return ret; 8065 return ret;
8066} 8066}
8067 8067
8068int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8069 struct btrfs_root *root, u64 type)
8070{
8071 u64 alloc_flags = get_alloc_profile(root, type);
8072 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
8073}
8074
8068/* 8075/*
8069 * helper to account the unused space of all the readonly block group in the 8076 * helper to account the unused space of all the readonly block group in the
8070 * list. takes mirrors into account. 8077 * list. takes mirrors into account.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b76f7cd47401..00497d551e51 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1433 */ 1433 */
1434u64 count_range_bits(struct extent_io_tree *tree, 1434u64 count_range_bits(struct extent_io_tree *tree,
1435 u64 *start, u64 search_end, u64 max_bytes, 1435 u64 *start, u64 search_end, u64 max_bytes,
1436 unsigned long bits) 1436 unsigned long bits, int contig)
1437{ 1437{
1438 struct rb_node *node; 1438 struct rb_node *node;
1439 struct extent_state *state; 1439 struct extent_state *state;
1440 u64 cur_start = *start; 1440 u64 cur_start = *start;
1441 u64 total_bytes = 0; 1441 u64 total_bytes = 0;
1442 u64 last = 0;
1442 int found = 0; 1443 int found = 0;
1443 1444
1444 if (search_end <= cur_start) { 1445 if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1463 state = rb_entry(node, struct extent_state, rb_node); 1464 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->start > search_end) 1465 if (state->start > search_end)
1465 break; 1466 break;
1466 if (state->end >= cur_start && (state->state & bits)) { 1467 if (contig && found && state->start > last + 1)
1468 break;
1469 if (state->end >= cur_start && (state->state & bits) == bits) {
1467 total_bytes += min(search_end, state->end) + 1 - 1470 total_bytes += min(search_end, state->end) + 1 -
1468 max(cur_start, state->start); 1471 max(cur_start, state->start);
1469 if (total_bytes >= max_bytes) 1472 if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1472 *start = state->start; 1475 *start = state->start;
1473 found = 1; 1476 found = 1;
1474 } 1477 }
1478 last = state->end;
1479 } else if (contig && found) {
1480 break;
1475 } 1481 }
1476 node = rb_next(node); 1482 node = rb_next(node);
1477 if (!node) 1483 if (!node)
@@ -2912,6 +2918,46 @@ out:
2912 return sector; 2918 return sector;
2913} 2919}
2914 2920
2921/*
2922 * helper function for fiemap, which doesn't want to see any holes.
2923 * This maps until we find something past 'last'
2924 */
2925static struct extent_map *get_extent_skip_holes(struct inode *inode,
2926 u64 offset,
2927 u64 last,
2928 get_extent_t *get_extent)
2929{
2930 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2931 struct extent_map *em;
2932 u64 len;
2933
2934 if (offset >= last)
2935 return NULL;
2936
2937 while(1) {
2938 len = last - offset;
2939 if (len == 0)
2940 break;
2941 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2942 em = get_extent(inode, NULL, 0, offset, len, 0);
2943 if (!em || IS_ERR(em))
2944 return em;
2945
2946 /* if this isn't a hole return it */
2947 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 em->block_start != EXTENT_MAP_HOLE) {
2949 return em;
2950 }
2951
2952 /* this is a hole, advance to the next extent */
2953 offset = extent_map_end(em);
2954 free_extent_map(em);
2955 if (offset >= last)
2956 break;
2957 }
2958 return NULL;
2959}
2960
2915int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2961int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2916 __u64 start, __u64 len, get_extent_t *get_extent) 2962 __u64 start, __u64 len, get_extent_t *get_extent)
2917{ 2963{
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2921 u32 flags = 0; 2967 u32 flags = 0;
2922 u32 found_type; 2968 u32 found_type;
2923 u64 last; 2969 u64 last;
2970 u64 last_for_get_extent = 0;
2924 u64 disko = 0; 2971 u64 disko = 0;
2972 u64 isize = i_size_read(inode);
2925 struct btrfs_key found_key; 2973 struct btrfs_key found_key;
2926 struct extent_map *em = NULL; 2974 struct extent_map *em = NULL;
2927 struct extent_state *cached_state = NULL; 2975 struct extent_state *cached_state = NULL;
2928 struct btrfs_path *path; 2976 struct btrfs_path *path;
2929 struct btrfs_file_extent_item *item; 2977 struct btrfs_file_extent_item *item;
2930 int end = 0; 2978 int end = 0;
2931 u64 em_start = 0, em_len = 0; 2979 u64 em_start = 0;
2980 u64 em_len = 0;
2981 u64 em_end = 0;
2932 unsigned long emflags; 2982 unsigned long emflags;
2933 int hole = 0;
2934 2983
2935 if (len == 0) 2984 if (len == 0)
2936 return -EINVAL; 2985 return -EINVAL;
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2940 return -ENOMEM; 2989 return -ENOMEM;
2941 path->leave_spinning = 1; 2990 path->leave_spinning = 1;
2942 2991
2992 /*
2993 * lookup the last file extent. We're not using i_size here
2994 * because there might be preallocation past i_size
2995 */
2943 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2996 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2944 path, inode->i_ino, -1, 0); 2997 path, inode->i_ino, -1, 0);
2945 if (ret < 0) { 2998 if (ret < 0) {
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2953 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 3006 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2954 found_type = btrfs_key_type(&found_key); 3007 found_type = btrfs_key_type(&found_key);
2955 3008
2956 /* No extents, just return */ 3009 /* No extents, but there might be delalloc bits */
2957 if (found_key.objectid != inode->i_ino || 3010 if (found_key.objectid != inode->i_ino ||
2958 found_type != BTRFS_EXTENT_DATA_KEY) { 3011 found_type != BTRFS_EXTENT_DATA_KEY) {
2959 btrfs_free_path(path); 3012 /* have to trust i_size as the end */
2960 return 0; 3013 last = (u64)-1;
3014 last_for_get_extent = isize;
3015 } else {
3016 /*
3017 * remember the start of the last extent. There are a
3018 * bunch of different factors that go into the length of the
3019 * extent, so its much less complex to remember where it started
3020 */
3021 last = found_key.offset;
3022 last_for_get_extent = last + 1;
2961 } 3023 }
2962 last = found_key.offset;
2963 btrfs_free_path(path); 3024 btrfs_free_path(path);
2964 3025
3026 /*
3027 * we might have some extents allocated but more delalloc past those
3028 * extents. so, we trust isize unless the start of the last extent is
3029 * beyond isize
3030 */
3031 if (last < isize) {
3032 last = (u64)-1;
3033 last_for_get_extent = isize;
3034 }
3035
2965 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3036 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2966 &cached_state, GFP_NOFS); 3037 &cached_state, GFP_NOFS);
2967 em = get_extent(inode, NULL, 0, off, max - off, 0); 3038
3039 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3040 get_extent);
2968 if (!em) 3041 if (!em)
2969 goto out; 3042 goto out;
2970 if (IS_ERR(em)) { 3043 if (IS_ERR(em)) {
@@ -2973,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2973 } 3046 }
2974 3047
2975 while (!end) { 3048 while (!end) {
2976 hole = 0; 3049 off = extent_map_end(em);
2977 off = em->start + em->len;
2978 if (off >= max) 3050 if (off >= max)
2979 end = 1; 3051 end = 1;
2980 3052
2981 if (em->block_start == EXTENT_MAP_HOLE) {
2982 hole = 1;
2983 goto next;
2984 }
2985
2986 em_start = em->start; 3053 em_start = em->start;
2987 em_len = em->len; 3054 em_len = em->len;
2988 3055 em_end = extent_map_end(em);
3056 emflags = em->flags;
2989 disko = 0; 3057 disko = 0;
2990 flags = 0; 3058 flags = 0;
2991 3059
@@ -3004,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3004 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3072 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
3005 flags |= FIEMAP_EXTENT_ENCODED; 3073 flags |= FIEMAP_EXTENT_ENCODED;
3006 3074
3007next:
3008 emflags = em->flags;
3009 free_extent_map(em); 3075 free_extent_map(em);
3010 em = NULL; 3076 em = NULL;
3011 if (!end) { 3077 if ((em_start >= last) || em_len == (u64)-1 ||
3012 em = get_extent(inode, NULL, 0, off, max - off, 0); 3078 (last == (u64)-1 && isize <= em_end)) {
3013 if (!em)
3014 goto out;
3015 if (IS_ERR(em)) {
3016 ret = PTR_ERR(em);
3017 goto out;
3018 }
3019 emflags = em->flags;
3020 }
3021
3022 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
3023 flags |= FIEMAP_EXTENT_LAST; 3079 flags |= FIEMAP_EXTENT_LAST;
3024 end = 1; 3080 end = 1;
3025 } 3081 }
3026 3082
3027 if (em_start == last) { 3083 /* now scan forward to see if this is really the last extent. */
3084 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3085 get_extent);
3086 if (IS_ERR(em)) {
3087 ret = PTR_ERR(em);
3088 goto out;
3089 }
3090 if (!em) {
3028 flags |= FIEMAP_EXTENT_LAST; 3091 flags |= FIEMAP_EXTENT_LAST;
3029 end = 1; 3092 end = 1;
3030 } 3093 }
3031 3094 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3032 if (!hole) { 3095 em_len, flags);
3033 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3096 if (ret)
3034 em_len, flags); 3097 goto out_free;
3035 if (ret)
3036 goto out_free;
3037 }
3038 } 3098 }
3039out_free: 3099out_free:
3040 free_extent_map(em); 3100 free_extent_map(em);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
191 191
192u64 count_range_bits(struct extent_io_tree *tree, 192u64 count_range_bits(struct extent_io_tree *tree,
193 u64 *start, u64 search_end, 193 u64 *start, u64 search_end,
194 u64 max_bytes, unsigned long bits); 194 u64 max_bytes, unsigned long bits, int contig);
195 195
196void free_extent_state(struct extent_state *state); 196void free_extent_state(struct extent_state *state);
197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 462e08e724b0..02438c917923 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1913,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
1913 1913
1914 private = 0; 1914 private = 0;
1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, 1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1916 (u64)-1, 1, EXTENT_DIRTY)) { 1916 (u64)-1, 1, EXTENT_DIRTY, 0)) {
1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, 1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
1918 start, &private_failure); 1918 start, &private_failure);
1919 if (ret == 0) { 1919 if (ret == 0) {
@@ -5280,6 +5280,128 @@ out:
5280 return em; 5280 return em;
5281} 5281}
5282 5282
5283struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
5284 size_t pg_offset, u64 start, u64 len,
5285 int create)
5286{
5287 struct extent_map *em;
5288 struct extent_map *hole_em = NULL;
5289 u64 range_start = start;
5290 u64 end;
5291 u64 found;
5292 u64 found_end;
5293 int err = 0;
5294
5295 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
5296 if (IS_ERR(em))
5297 return em;
5298 if (em) {
5299 /*
5300 * if our em maps to a hole, there might
5301 * actually be delalloc bytes behind it
5302 */
5303 if (em->block_start != EXTENT_MAP_HOLE)
5304 return em;
5305 else
5306 hole_em = em;
5307 }
5308
5309 /* check to see if we've wrapped (len == -1 or similar) */
5310 end = start + len;
5311 if (end < start)
5312 end = (u64)-1;
5313 else
5314 end -= 1;
5315
5316 em = NULL;
5317
5318 /* ok, we didn't find anything, lets look for delalloc */
5319 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
5320 end, len, EXTENT_DELALLOC, 1);
5321 found_end = range_start + found;
5322 if (found_end < range_start)
5323 found_end = (u64)-1;
5324
5325 /*
5326 * we didn't find anything useful, return
5327 * the original results from get_extent()
5328 */
5329 if (range_start > end || found_end <= start) {
5330 em = hole_em;
5331 hole_em = NULL;
5332 goto out;
5333 }
5334
5335 /* adjust the range_start to make sure it doesn't
5336 * go backwards from the start they passed in
5337 */
5338 range_start = max(start,range_start);
5339 found = found_end - range_start;
5340
5341 if (found > 0) {
5342 u64 hole_start = start;
5343 u64 hole_len = len;
5344
5345 em = alloc_extent_map(GFP_NOFS);
5346 if (!em) {
5347 err = -ENOMEM;
5348 goto out;
5349 }
5350 /*
5351 * when btrfs_get_extent can't find anything it
5352 * returns one huge hole
5353 *
5354 * make sure what it found really fits our range, and
5355 * adjust to make sure it is based on the start from
5356 * the caller
5357 */
5358 if (hole_em) {
5359 u64 calc_end = extent_map_end(hole_em);
5360
5361 if (calc_end <= start || (hole_em->start > end)) {
5362 free_extent_map(hole_em);
5363 hole_em = NULL;
5364 } else {
5365 hole_start = max(hole_em->start, start);
5366 hole_len = calc_end - hole_start;
5367 }
5368 }
5369 em->bdev = NULL;
5370 if (hole_em && range_start > hole_start) {
5371 /* our hole starts before our delalloc, so we
5372 * have to return just the parts of the hole
5373 * that go until the delalloc starts
5374 */
5375 em->len = min(hole_len,
5376 range_start - hole_start);
5377 em->start = hole_start;
5378 em->orig_start = hole_start;
5379 /*
5380 * don't adjust block start at all,
5381 * it is fixed at EXTENT_MAP_HOLE
5382 */
5383 em->block_start = hole_em->block_start;
5384 em->block_len = hole_len;
5385 } else {
5386 em->start = range_start;
5387 em->len = found;
5388 em->orig_start = range_start;
5389 em->block_start = EXTENT_MAP_DELALLOC;
5390 em->block_len = found;
5391 }
5392 } else if (hole_em) {
5393 return hole_em;
5394 }
5395out:
5396
5397 free_extent_map(hole_em);
5398 if (err) {
5399 free_extent_map(em);
5400 return ERR_PTR(err);
5401 }
5402 return em;
5403}
5404
5283static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5405static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5284 u64 start, u64 len) 5406 u64 start, u64 len)
5285{ 5407{
@@ -6102,7 +6224,7 @@ out:
6102static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6224static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
6103 __u64 start, __u64 len) 6225 __u64 start, __u64 len)
6104{ 6226{
6105 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); 6227 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
6106} 6228}
6107 6229
6108int btrfs_readpage(struct file *file, struct page *page) 6230int btrfs_readpage(struct file *file, struct page *page)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index be2d4f6aaa5e..5fdb2abc4fa7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1071 if (copy_from_user(&flags, arg, sizeof(flags))) 1071 if (copy_from_user(&flags, arg, sizeof(flags)))
1072 return -EFAULT; 1072 return -EFAULT;
1073 1073
1074 if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) 1074 if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
1075 return -EINVAL; 1075 return -EINVAL;
1076 1076
1077 if (flags & ~BTRFS_SUBVOL_RDONLY) 1077 if (flags & ~BTRFS_SUBVOL_RDONLY)
1078 return -EOPNOTSUPP; 1078 return -EOPNOTSUPP;
1079 1079
1080 if (!is_owner_or_cap(inode))
1081 return -EACCES;
1082
1080 down_write(&root->fs_info->subvol_sem); 1083 down_write(&root->fs_info->subvol_sem);
1081 1084
1082 /* nothing to do */ 1085 /* nothing to do */
@@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1097 goto out_reset; 1100 goto out_reset;
1098 } 1101 }
1099 1102
1100 ret = btrfs_update_root(trans, root, 1103 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1101 &root->root_key, &root->root_item); 1104 &root->root_key, &root->root_item);
1102 1105
1103 btrfs_commit_transaction(trans, root); 1106 btrfs_commit_transaction(trans, root);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
280 unsigned long tot_out; 280 unsigned long tot_out;
281 unsigned long tot_len; 281 unsigned long tot_len;
282 char *buf; 282 char *buf;
283 bool may_late_unmap, need_unmap;
283 284
284 data_in = kmap(pages_in[0]); 285 data_in = kmap(pages_in[0]);
285 tot_len = read_compress_length(data_in); 286 tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
300 301
301 tot_in += in_len; 302 tot_in += in_len;
302 working_bytes = in_len; 303 working_bytes = in_len;
304 may_late_unmap = need_unmap = false;
303 305
304 /* fast path: avoid using the working buffer */ 306 /* fast path: avoid using the working buffer */
305 if (in_page_bytes_left >= in_len) { 307 if (in_page_bytes_left >= in_len) {
306 buf = data_in + in_offset; 308 buf = data_in + in_offset;
307 bytes = in_len; 309 bytes = in_len;
310 may_late_unmap = true;
308 goto cont; 311 goto cont;
309 } 312 }
310 313
@@ -329,14 +332,17 @@ cont:
329 if (working_bytes == 0 && tot_in >= tot_len) 332 if (working_bytes == 0 && tot_in >= tot_len)
330 break; 333 break;
331 334
332 kunmap(pages_in[page_in_index]); 335 if (page_in_index + 1 >= total_pages_in) {
333 page_in_index++;
334 if (page_in_index >= total_pages_in) {
335 ret = -1; 336 ret = -1;
336 data_in = NULL;
337 goto done; 337 goto done;
338 } 338 }
339 data_in = kmap(pages_in[page_in_index]); 339
340 if (may_late_unmap)
341 need_unmap = true;
342 else
343 kunmap(pages_in[page_in_index]);
344
345 data_in = kmap(pages_in[++page_in_index]);
340 346
341 in_page_bytes_left = PAGE_CACHE_SIZE; 347 in_page_bytes_left = PAGE_CACHE_SIZE;
342 in_offset = 0; 348 in_offset = 0;
@@ -346,6 +352,8 @@ cont:
346 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); 352 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
347 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, 353 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
348 &out_len); 354 &out_len);
355 if (need_unmap)
356 kunmap(pages_in[page_in_index - 1]);
349 if (ret != LZO_E_OK) { 357 if (ret != LZO_E_OK) {
350 printk(KERN_WARNING "btrfs decompress failed\n"); 358 printk(KERN_WARNING "btrfs decompress failed\n");
351 ret = -1; 359 ret = -1;
@@ -363,8 +371,7 @@ cont:
363 break; 371 break;
364 } 372 }
365done: 373done:
366 if (data_in) 374 kunmap(pages_in[page_in_index]);
367 kunmap(pages_in[page_in_index]);
368 return ret; 375 return ret;
369} 376}
370 377
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0825e4ed9447..31ade5802ae8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3654,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3654 u32 item_size; 3654 u32 item_size;
3655 int ret; 3655 int ret;
3656 int err = 0; 3656 int err = 0;
3657 int progress = 0;
3657 3658
3658 path = btrfs_alloc_path(); 3659 path = btrfs_alloc_path();
3659 if (!path) 3660 if (!path)
@@ -3666,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3666 } 3667 }
3667 3668
3668 while (1) { 3669 while (1) {
3670 progress++;
3669 trans = btrfs_start_transaction(rc->extent_root, 0); 3671 trans = btrfs_start_transaction(rc->extent_root, 0);
3670 BUG_ON(IS_ERR(trans)); 3672 BUG_ON(IS_ERR(trans));
3671 3673restart:
3672 if (update_backref_cache(trans, &rc->backref_cache)) { 3674 if (update_backref_cache(trans, &rc->backref_cache)) {
3673 btrfs_end_transaction(trans, rc->extent_root); 3675 btrfs_end_transaction(trans, rc->extent_root);
3674 continue; 3676 continue;
@@ -3781,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3781 } 3783 }
3782 } 3784 }
3783 } 3785 }
3786 if (trans && progress && err == -ENOSPC) {
3787 ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
3788 rc->block_group->flags);
3789 if (ret == 0) {
3790 err = 0;
3791 progress = 0;
3792 goto restart;
3793 }
3794 }
3784 3795
3785 btrfs_release_path(rc->extent_root, path); 3796 btrfs_release_path(rc->extent_root, path);
3786 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3797 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a004008f7d28..d39a9895d932 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, 158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
159 Opt_enospc_debug, Opt_err,
159}; 160};
160 161
161static match_table_t tokens = { 162static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
184 {Opt_space_cache, "space_cache"}, 185 {Opt_space_cache, "space_cache"},
185 {Opt_clear_cache, "clear_cache"}, 186 {Opt_clear_cache, "clear_cache"},
186 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 187 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
188 {Opt_enospc_debug, "enospc_debug"},
187 {Opt_err, NULL}, 189 {Opt_err, NULL},
188}; 190};
189 191
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
358 case Opt_user_subvol_rm_allowed: 360 case Opt_user_subvol_rm_allowed:
359 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 361 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
360 break; 362 break;
363 case Opt_enospc_debug:
364 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
365 break;
361 case Opt_err: 366 case Opt_err:
362 printk(KERN_INFO "btrfs: unrecognized mount option " 367 printk(KERN_INFO "btrfs: unrecognized mount option "
363 "'%s'\n", p); 368 "'%s'\n", p);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6e0e82a1b188..9d554e8e6583 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1294,11 +1294,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1294 1294
1295 ret = btrfs_shrink_device(device, 0); 1295 ret = btrfs_shrink_device(device, 0);
1296 if (ret) 1296 if (ret)
1297 goto error_brelse; 1297 goto error_undo;
1298 1298
1299 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); 1299 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1300 if (ret) 1300 if (ret)
1301 goto error_brelse; 1301 goto error_undo;
1302 1302
1303 device->in_fs_metadata = 0; 1303 device->in_fs_metadata = 0;
1304 1304
@@ -1372,6 +1372,13 @@ out:
1372 mutex_unlock(&root->fs_info->volume_mutex); 1372 mutex_unlock(&root->fs_info->volume_mutex);
1373 mutex_unlock(&uuid_mutex); 1373 mutex_unlock(&uuid_mutex);
1374 return ret; 1374 return ret;
1375error_undo:
1376 if (device->writeable) {
1377 list_add(&device->dev_alloc_list,
1378 &root->fs_info->fs_devices->alloc_list);
1379 root->fs_info->fs_devices->rw_devices++;
1380 }
1381 goto error_brelse;
1375} 1382}
1376 1383
1377/* 1384/*
@@ -1589,7 +1596,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1589 device->dev_root = root->fs_info->dev_root; 1596 device->dev_root = root->fs_info->dev_root;
1590 device->bdev = bdev; 1597 device->bdev = bdev;
1591 device->in_fs_metadata = 1; 1598 device->in_fs_metadata = 1;
1592 device->mode = 0; 1599 device->mode = FMODE_EXCL;
1593 set_blocksize(device->bdev, 4096); 1600 set_blocksize(device->bdev, 4096);
1594 1601
1595 if (seeding_dev) { 1602 if (seeding_dev) {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 267d0ada4541..4a09af9e9a63 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
63 * cleanup path and it is also acquired by eventpoll_release_file() 63 * cleanup path and it is also acquired by eventpoll_release_file()
64 * if a file has been pushed inside an epoll set and it is then 64 * if a file has been pushed inside an epoll set and it is then
65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). 65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
66 * It is also acquired when inserting an epoll fd onto another epoll
67 * fd. We do this so that we walk the epoll tree and ensure that this
68 * insertion does not create a cycle of epoll file descriptors, which
69 * could lead to deadlock. We need a global mutex to prevent two
70 * simultaneous inserts (A into B and B into A) from racing and
71 * constructing a cycle without either insert observing that it is
72 * going to.
66 * It is possible to drop the "ep->mtx" and to use the global 73 * It is possible to drop the "ep->mtx" and to use the global
67 * mutex "epmutex" (together with "ep->lock") to have it working, 74 * mutex "epmutex" (together with "ep->lock") to have it working,
68 * but having "ep->mtx" will make the interface more scalable. 75 * but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
224 */ 231 */
225static DEFINE_MUTEX(epmutex); 232static DEFINE_MUTEX(epmutex);
226 233
234/* Used to check for epoll file descriptor inclusion loops */
235static struct nested_calls poll_loop_ncalls;
236
227/* Used for safe wake up implementation */ 237/* Used for safe wake up implementation */
228static struct nested_calls poll_safewake_ncalls; 238static struct nested_calls poll_safewake_ncalls;
229 239
@@ -1198,6 +1208,62 @@ retry:
1198 return res; 1208 return res;
1199} 1209}
1200 1210
1211/**
1212 * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
1213 * API, to verify that adding an epoll file inside another
1214 * epoll structure, does not violate the constraints, in
1215 * terms of closed loops, or too deep chains (which can
1216 * result in excessive stack usage).
1217 *
1218 * @priv: Pointer to the epoll file to be currently checked.
1219 * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
1220 * data structure pointer.
1221 * @call_nests: Current dept of the @ep_call_nested() call stack.
1222 *
1223 * Returns: Returns zero if adding the epoll @file inside current epoll
1224 * structure @ep does not violate the constraints, or -1 otherwise.
1225 */
1226static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1227{
1228 int error = 0;
1229 struct file *file = priv;
1230 struct eventpoll *ep = file->private_data;
1231 struct rb_node *rbp;
1232 struct epitem *epi;
1233
1234 mutex_lock(&ep->mtx);
1235 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1236 epi = rb_entry(rbp, struct epitem, rbn);
1237 if (unlikely(is_file_epoll(epi->ffd.file))) {
1238 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1239 ep_loop_check_proc, epi->ffd.file,
1240 epi->ffd.file->private_data, current);
1241 if (error != 0)
1242 break;
1243 }
1244 }
1245 mutex_unlock(&ep->mtx);
1246
1247 return error;
1248}
1249
1250/**
1251 * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
1252 * another epoll file (represented by @ep) does not create
1253 * closed loops or too deep chains.
1254 *
1255 * @ep: Pointer to the epoll private data structure.
1256 * @file: Pointer to the epoll file to be checked.
1257 *
1258 * Returns: Returns zero if adding the epoll @file inside current epoll
1259 * structure @ep does not violate the constraints, or -1 otherwise.
1260 */
1261static int ep_loop_check(struct eventpoll *ep, struct file *file)
1262{
1263 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1264 ep_loop_check_proc, file, ep, current);
1265}
1266
1201/* 1267/*
1202 * Open an eventpoll file descriptor. 1268 * Open an eventpoll file descriptor.
1203 */ 1269 */
@@ -1246,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1246 struct epoll_event __user *, event) 1312 struct epoll_event __user *, event)
1247{ 1313{
1248 int error; 1314 int error;
1315 int did_lock_epmutex = 0;
1249 struct file *file, *tfile; 1316 struct file *file, *tfile;
1250 struct eventpoll *ep; 1317 struct eventpoll *ep;
1251 struct epitem *epi; 1318 struct epitem *epi;
@@ -1287,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1287 */ 1354 */
1288 ep = file->private_data; 1355 ep = file->private_data;
1289 1356
1357 /*
1358 * When we insert an epoll file descriptor, inside another epoll file
1359 * descriptor, there is the change of creating closed loops, which are
1360 * better be handled here, than in more critical paths.
1361 *
1362 * We hold epmutex across the loop check and the insert in this case, in
1363 * order to prevent two separate inserts from racing and each doing the
1364 * insert "at the same time" such that ep_loop_check passes on both
1365 * before either one does the insert, thereby creating a cycle.
1366 */
1367 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
1368 mutex_lock(&epmutex);
1369 did_lock_epmutex = 1;
1370 error = -ELOOP;
1371 if (ep_loop_check(ep, tfile) != 0)
1372 goto error_tgt_fput;
1373 }
1374
1375
1290 mutex_lock(&ep->mtx); 1376 mutex_lock(&ep->mtx);
1291 1377
1292 /* 1378 /*
@@ -1322,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1322 mutex_unlock(&ep->mtx); 1408 mutex_unlock(&ep->mtx);
1323 1409
1324error_tgt_fput: 1410error_tgt_fput:
1411 if (unlikely(did_lock_epmutex))
1412 mutex_unlock(&epmutex);
1413
1325 fput(tfile); 1414 fput(tfile);
1326error_fput: 1415error_fput:
1327 fput(file); 1416 fput(file);
@@ -1441,6 +1530,12 @@ static int __init eventpoll_init(void)
1441 EP_ITEM_COST; 1530 EP_ITEM_COST;
1442 BUG_ON(max_user_watches < 0); 1531 BUG_ON(max_user_watches < 0);
1443 1532
1533 /*
1534 * Initialize the structure used to perform epoll file descriptor
1535 * inclusion loops checks.
1536 */
1537 ep_nested_calls_init(&poll_loop_ncalls);
1538
1444 /* Initialize the structure used to perform safe poll wait head wake ups */ 1539 /* Initialize the structure used to perform safe poll wait head wake ups */
1445 ep_nested_calls_init(&poll_safewake_ncalls); 1540 ep_nested_calls_init(&poll_safewake_ncalls);
1446 1541
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed80..83543b5ff941 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1283 if (err) 1283 if (err)
1284 return err; 1284 return err;
1285 1285
1286 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) 1286 if (attr->ia_valid & ATTR_OPEN) {
1287 return 0; 1287 if (fc->atomic_o_trunc)
1288 return 0;
1289 file = NULL;
1290 }
1288 1291
1289 if (attr->ia_valid & ATTR_SIZE) 1292 if (attr->ia_valid & ATTR_SIZE)
1290 is_truncate = true; 1293 is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c826..9e0832dbb1e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
86 return ff; 86 return ff;
87} 87}
88 88
89static void fuse_release_async(struct work_struct *work)
90{
91 struct fuse_req *req;
92 struct fuse_conn *fc;
93 struct path path;
94
95 req = container_of(work, struct fuse_req, misc.release.work);
96 path = req->misc.release.path;
97 fc = get_fuse_conn(path.dentry->d_inode);
98
99 fuse_put_request(fc, req);
100 path_put(&path);
101}
102
89static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 103static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
90{ 104{
91 path_put(&req->misc.release.path); 105 if (fc->destroy_req) {
106 /*
107 * If this is a fuseblk mount, then it's possible that
108 * releasing the path will result in releasing the
109 * super block and sending the DESTROY request. If
110 * the server is single threaded, this would hang.
111 * For this reason do the path_put() in a separate
112 * thread.
113 */
114 atomic_inc(&req->count);
115 INIT_WORK(&req->misc.release.work, fuse_release_async);
116 schedule_work(&req->misc.release.work);
117 } else {
118 path_put(&req->misc.release.path);
119 }
92} 120}
93 121
94static void fuse_file_put(struct fuse_file *ff) 122static void fuse_file_put(struct fuse_file *ff, bool sync)
95{ 123{
96 if (atomic_dec_and_test(&ff->count)) { 124 if (atomic_dec_and_test(&ff->count)) {
97 struct fuse_req *req = ff->reserved_req; 125 struct fuse_req *req = ff->reserved_req;
98 126
99 req->end = fuse_release_end; 127 if (sync) {
100 fuse_request_send_background(ff->fc, req); 128 fuse_request_send(ff->fc, req);
129 path_put(&req->misc.release.path);
130 fuse_put_request(ff->fc, req);
131 } else {
132 req->end = fuse_release_end;
133 fuse_request_send_background(ff->fc, req);
134 }
101 kfree(ff); 135 kfree(ff);
102 } 136 }
103} 137}
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
219 * Normally this will send the RELEASE request, however if 253 * Normally this will send the RELEASE request, however if
220 * some asynchronous READ or WRITE requests are outstanding, 254 * some asynchronous READ or WRITE requests are outstanding,
221 * the sending will be delayed. 255 * the sending will be delayed.
256 *
257 * Make the release synchronous if this is a fuseblk mount,
258 * synchronous RELEASE is allowed (and desirable) in this case
259 * because the server can be trusted not to screw up.
222 */ 260 */
223 fuse_file_put(ff); 261 fuse_file_put(ff, ff->fc->destroy_req != NULL);
224} 262}
225 263
226static int fuse_open(struct inode *inode, struct file *file) 264static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
558 page_cache_release(page); 596 page_cache_release(page);
559 } 597 }
560 if (req->ff) 598 if (req->ff)
561 fuse_file_put(req->ff); 599 fuse_file_put(req->ff, false);
562} 600}
563 601
564static void fuse_send_readpages(struct fuse_req *req, struct file *file) 602static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1137static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) 1175static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1138{ 1176{
1139 __free_page(req->pages[0]); 1177 __free_page(req->pages[0]);
1140 fuse_file_put(req->ff); 1178 fuse_file_put(req->ff, false);
1141} 1179}
1142 1180
1143static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) 1181static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e9..d4286947bc2c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22#include <linux/rbtree.h> 22#include <linux/rbtree.h>
23#include <linux/poll.h> 23#include <linux/poll.h>
24#include <linux/workqueue.h>
24 25
25/** Max number of pages that can be used in a single read request */ 26/** Max number of pages that can be used in a single read request */
26#define FUSE_MAX_PAGES_PER_REQ 32 27#define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
262 /** Data for asynchronous requests */ 263 /** Data for asynchronous requests */
263 union { 264 union {
264 struct { 265 struct {
265 struct fuse_release_in in; 266 union {
267 struct fuse_release_in in;
268 struct work_struct work;
269 };
266 struct path path; 270 struct path path;
267 } release; 271 } release;
268 struct fuse_init_in init_in; 272 struct fuse_init_in init_in;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 85ba027d1c4d..72c31a315d96 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -59,14 +59,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
59 struct address_space *mapping = (struct address_space *)(gl + 1); 59 struct address_space *mapping = (struct address_space *)(gl + 1);
60 60
61 gfs2_init_glock_once(gl); 61 gfs2_init_glock_once(gl);
62 memset(mapping, 0, sizeof(*mapping)); 62 address_space_init_once(mapping);
63 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
64 spin_lock_init(&mapping->tree_lock);
65 spin_lock_init(&mapping->i_mmap_lock);
66 INIT_LIST_HEAD(&mapping->private_list);
67 spin_lock_init(&mapping->private_lock);
68 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
69 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
70} 63}
71 64
72/** 65/**
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..0647d80accf6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -295,6 +295,20 @@ static void destroy_inode(struct inode *inode)
295 call_rcu(&inode->i_rcu, i_callback); 295 call_rcu(&inode->i_rcu, i_callback);
296} 296}
297 297
298void address_space_init_once(struct address_space *mapping)
299{
300 memset(mapping, 0, sizeof(*mapping));
301 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
302 spin_lock_init(&mapping->tree_lock);
303 spin_lock_init(&mapping->i_mmap_lock);
304 INIT_LIST_HEAD(&mapping->private_list);
305 spin_lock_init(&mapping->private_lock);
306 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
307 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
308 mutex_init(&mapping->unmap_mutex);
309}
310EXPORT_SYMBOL(address_space_init_once);
311
298/* 312/*
299 * These are initializations that only need to be done 313 * These are initializations that only need to be done
300 * once, because the fields are idempotent across use 314 * once, because the fields are idempotent across use
@@ -308,13 +322,7 @@ void inode_init_once(struct inode *inode)
308 INIT_LIST_HEAD(&inode->i_devices); 322 INIT_LIST_HEAD(&inode->i_devices);
309 INIT_LIST_HEAD(&inode->i_wb_list); 323 INIT_LIST_HEAD(&inode->i_wb_list);
310 INIT_LIST_HEAD(&inode->i_lru); 324 INIT_LIST_HEAD(&inode->i_lru);
311 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 325 address_space_init_once(&inode->i_data);
312 spin_lock_init(&inode->i_data.tree_lock);
313 spin_lock_init(&inode->i_data.i_mmap_lock);
314 INIT_LIST_HEAD(&inode->i_data.private_list);
315 spin_lock_init(&inode->i_data.private_lock);
316 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
317 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
318 i_size_ordered_init(inode); 326 i_size_ordered_init(inode);
319#ifdef CONFIG_FSNOTIFY 327#ifdef CONFIG_FSNOTIFY
320 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 328 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -540,11 +548,14 @@ void evict_inodes(struct super_block *sb)
540/** 548/**
541 * invalidate_inodes - attempt to free all inodes on a superblock 549 * invalidate_inodes - attempt to free all inodes on a superblock
542 * @sb: superblock to operate on 550 * @sb: superblock to operate on
551 * @kill_dirty: flag to guide handling of dirty inodes
543 * 552 *
544 * Attempts to free all inodes for a given superblock. If there were any 553 * Attempts to free all inodes for a given superblock. If there were any
545 * busy inodes return a non-zero value, else zero. 554 * busy inodes return a non-zero value, else zero.
555 * If @kill_dirty is set, discard dirty inodes too, otherwise treat
556 * them as busy.
546 */ 557 */
547int invalidate_inodes(struct super_block *sb) 558int invalidate_inodes(struct super_block *sb, bool kill_dirty)
548{ 559{
549 int busy = 0; 560 int busy = 0;
550 struct inode *inode, *next; 561 struct inode *inode, *next;
@@ -556,6 +567,10 @@ int invalidate_inodes(struct super_block *sb)
556 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 567 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
557 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 568 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
558 continue; 569 continue;
570 if (inode->i_state & I_DIRTY && !kill_dirty) {
571 busy = 1;
572 continue;
573 }
559 if (atomic_read(&inode->i_count)) { 574 if (atomic_read(&inode->i_count)) {
560 busy = 1; 575 busy = 1;
561 continue; 576 continue;
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b1247..9b976b57d7fe 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -112,4 +112,4 @@ extern void release_open_intent(struct nameidata *);
112 */ 112 */
113extern int get_nr_dirty_inodes(void); 113extern int get_nr_dirty_inodes(void);
114extern void evict_inodes(struct super_block *); 114extern void evict_inodes(struct super_block *);
115extern int invalidate_inodes(struct super_block *); 115extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b95371696..d1edf26025dc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1244,7 +1244,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1244 */ 1244 */
1245 br_write_lock(vfsmount_lock); 1245 br_write_lock(vfsmount_lock);
1246 if (mnt_get_count(mnt) != 2) { 1246 if (mnt_get_count(mnt) != 2) {
1247 br_write_lock(vfsmount_lock); 1247 br_write_unlock(vfsmount_lock);
1248 return -EBUSY; 1248 return -EBUSY;
1249 } 1249 }
1250 br_write_unlock(vfsmount_lock); 1250 br_write_unlock(vfsmount_lock);
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index f4f1c08807ed..609cd223eea8 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,12 +34,6 @@
34#include "page.h" 34#include "page.h"
35#include "btnode.h" 35#include "btnode.h"
36 36
37
38void nilfs_btnode_cache_init_once(struct address_space *btnc)
39{
40 nilfs_mapping_init_once(btnc);
41}
42
43void nilfs_btnode_cache_init(struct address_space *btnc, 37void nilfs_btnode_cache_init(struct address_space *btnc,
44 struct backing_dev_info *bdi) 38 struct backing_dev_info *bdi)
45{ 39{
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e0..1b8ebd888c28 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init_once(struct address_space *);
41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); 40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
42void nilfs_btnode_cache_clear(struct address_space *); 41void nilfs_btnode_cache_clear(struct address_space *);
43struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 3fdb61d79c9a..a649b05f7069 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -449,9 +449,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
449 struct backing_dev_info *bdi = inode->i_sb->s_bdi; 449 struct backing_dev_info *bdi = inode->i_sb->s_bdi;
450 450
451 INIT_LIST_HEAD(&shadow->frozen_buffers); 451 INIT_LIST_HEAD(&shadow->frozen_buffers);
452 nilfs_mapping_init_once(&shadow->frozen_data); 452 address_space_init_once(&shadow->frozen_data);
453 nilfs_mapping_init(&shadow->frozen_data, bdi); 453 nilfs_mapping_init(&shadow->frozen_data, bdi);
454 nilfs_mapping_init_once(&shadow->frozen_btnodes); 454 address_space_init_once(&shadow->frozen_btnodes);
455 nilfs_mapping_init(&shadow->frozen_btnodes, bdi); 455 nilfs_mapping_init(&shadow->frozen_btnodes, bdi);
456 mi->mi_shadow = shadow; 456 mi->mi_shadow = shadow;
457 return 0; 457 return 0;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 3da37cc5de34..4d2a1ee0eb47 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 492 return nc;
493} 493}
494 494
495void nilfs_mapping_init_once(struct address_space *mapping)
496{
497 memset(mapping, 0, sizeof(*mapping));
498 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
499 spin_lock_init(&mapping->tree_lock);
500 INIT_LIST_HEAD(&mapping->private_list);
501 spin_lock_init(&mapping->private_lock);
502
503 spin_lock_init(&mapping->i_mmap_lock);
504 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
505 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
506}
507
508void nilfs_mapping_init(struct address_space *mapping, 495void nilfs_mapping_init(struct address_space *mapping,
509 struct backing_dev_info *bdi) 496 struct backing_dev_info *bdi)
510{ 497{
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index ba4d6fd40b04..f06b79ad7493 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 62void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 63void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init_once(struct address_space *mapping);
65void nilfs_mapping_init(struct address_space *mapping, 64void nilfs_mapping_init(struct address_space *mapping,
66 struct backing_dev_info *bdi); 65 struct backing_dev_info *bdi);
67unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 66unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 58fd707174e1..1673b3d99842 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1279,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
1279#ifdef CONFIG_NILFS_XATTR 1279#ifdef CONFIG_NILFS_XATTR
1280 init_rwsem(&ii->xattr_sem); 1280 init_rwsem(&ii->xattr_sem);
1281#endif 1281#endif
1282 nilfs_btnode_cache_init_once(&ii->i_btnode_cache); 1282 address_space_init_once(&ii->i_btnode_cache);
1283 ii->i_bmap = &ii->i_bmap_data; 1283 ii->i_bmap = &ii->i_bmap_data;
1284 inode_init_once(&ii->vfs_inode); 1284 inode_init_once(&ii->vfs_inode);
1285} 1285}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c0..6180da1e37e6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
405 ocfs2_quota_trans_credits(sb); 405 ocfs2_quota_trans_credits(sb);
406} 406}
407 407
408/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + 408/* data block for new dir/symlink, allocation of directory block, dx_root
409 * bitmap block for the new bit) dx_root update for free list */ 409 * update for free list */
410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1) 410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
411 411
412static inline int ocfs2_add_dir_index_credits(struct super_block *sb) 412static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
413{ 413{
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e9..19ebc5aad391 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3228 u32 num_clusters, unsigned int e_flags) 3228 u32 num_clusters, unsigned int e_flags)
3229{ 3229{
3230 int ret, delete, index, credits = 0; 3230 int ret, delete, index, credits = 0;
3231 u32 new_bit, new_len; 3231 u32 new_bit, new_len, orig_num_clusters;
3232 unsigned int set_len; 3232 unsigned int set_len;
3233 struct ocfs2_super *osb = OCFS2_SB(sb); 3233 struct ocfs2_super *osb = OCFS2_SB(sb);
3234 handle_t *handle; 3234 handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3261 goto out; 3261 goto out;
3262 } 3262 }
3263 3263
3264 orig_num_clusters = num_clusters;
3265
3264 while (num_clusters) { 3266 while (num_clusters) {
3265 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh, 3267 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
3266 p_cluster, num_clusters, 3268 p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3348 * in write-back mode. 3350 * in write-back mode.
3349 */ 3351 */
3350 if (context->get_clusters == ocfs2_di_get_clusters) { 3352 if (context->get_clusters == ocfs2_di_get_clusters) {
3351 ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); 3353 ret = ocfs2_cow_sync_writeback(sb, context, cpos,
3354 orig_num_clusters);
3352 if (ret) 3355 if (ret)
3353 mlog_errno(ret); 3356 mlog_errno(ret);
3354 } 3357 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447e..36c423fb0635 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1316 struct mount_options *mopt, 1316 struct mount_options *mopt,
1317 int is_remount) 1317 int is_remount)
1318{ 1318{
1319 int status; 1319 int status, user_stack = 0;
1320 char *p; 1320 char *p;
1321 u32 tmp; 1321 u32 tmp;
1322 1322
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
1459 memcpy(mopt->cluster_stack, args[0].from, 1459 memcpy(mopt->cluster_stack, args[0].from,
1460 OCFS2_STACK_LABEL_LEN); 1460 OCFS2_STACK_LABEL_LEN);
1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; 1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
1462 /*
1463 * Open code the memcmp here as we don't have
1464 * an osb to pass to
1465 * ocfs2_userspace_stack().
1466 */
1467 if (memcmp(mopt->cluster_stack,
1468 OCFS2_CLASSIC_CLUSTER_STACK,
1469 OCFS2_STACK_LABEL_LEN))
1470 user_stack = 1;
1462 break; 1471 break;
1463 case Opt_inode64: 1472 case Opt_inode64:
1464 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1473 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
1514 } 1523 }
1515 } 1524 }
1516 1525
1517 /* Ensure only one heartbeat mode */ 1526 if (user_stack == 0) {
1518 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | 1527 /* Ensure only one heartbeat mode */
1519 OCFS2_MOUNT_HB_NONE); 1528 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
1520 if (hweight32(tmp) != 1) { 1529 OCFS2_MOUNT_HB_GLOBAL |
1521 mlog(ML_ERROR, "Invalid heartbeat mount options\n"); 1530 OCFS2_MOUNT_HB_NONE);
1522 status = 0; 1531 if (hweight32(tmp) != 1) {
1523 goto bail; 1532 mlog(ML_ERROR, "Invalid heartbeat mount options\n");
1533 status = 0;
1534 goto bail;
1535 }
1524 } 1536 }
1525 1537
1526 status = 1; 1538 status = 1;
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa5..b10e3540d5b7 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
251 } 251 }
252 252
253 vm->vblk_size = get_unaligned_be32(data + 0x08); 253 vm->vblk_size = get_unaligned_be32(data + 0x08);
254 if (vm->vblk_size == 0) {
255 ldm_error ("Illegal VBLK size");
256 return false;
257 }
258
254 vm->vblk_offset = get_unaligned_be32(data + 0x0C); 259 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
255 vm->last_vblk_seq = get_unaligned_be32(data + 0x04); 260 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
256 261
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e5..d61611c88012 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
152 152
153 if (!capable(CAP_SYS_ADMIN)) 153 if (!capable(CAP_SYS_ADMIN))
154 return -XFS_ERROR(EPERM); 154 return -XFS_ERROR(EPERM);
155 if (!blk_queue_discard(q))
156 return -XFS_ERROR(EOPNOTSUPP);
155 if (copy_from_user(&range, urange, sizeof(range))) 157 if (copy_from_user(&range, urange, sizeof(range)))
156 return -XFS_ERROR(EFAULT); 158 return -XFS_ERROR(EFAULT);
157 159
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d2..85668efb3e3e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
53 xfs_fsop_geom_t *geo, 53 xfs_fsop_geom_t *geo,
54 int new_version) 54 int new_version)
55{ 55{
56
57 memset(geo, 0, sizeof(*geo));
58
56 geo->blocksize = mp->m_sb.sb_blocksize; 59 geo->blocksize = mp->m_sb.sb_blocksize;
57 geo->rtextsize = mp->m_sb.sb_rextsize; 60 geo->rtextsize = mp->m_sb.sb_rextsize;
58 geo->agblocks = mp->m_sb.sb_agblocks; 61 geo->agblocks = mp->m_sb.sb_agblocks;