aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c52
-rw-r--r--fs/block_dev.c30
-rw-r--r--fs/btrfs/acl.c6
-rw-r--r--fs/btrfs/compression.c27
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c15
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/btrfs/extent-tree.c109
-rw-r--r--fs/btrfs/extent_io.c192
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/file-item.c5
-rw-r--r--fs/btrfs/file.c21
-rw-r--r--fs/btrfs/free-space-cache.c162
-rw-r--r--fs/btrfs/inode.c164
-rw-r--r--fs/btrfs/ioctl.c36
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/print-tree.c1
-rw-r--r--fs/btrfs/relocation.c43
-rw-r--r--fs/btrfs/super.c16
-rw-r--r--fs/btrfs/transaction.c5
-rw-r--r--fs/btrfs/tree-log.c35
-rw-r--r--fs/btrfs/volumes.c34
-rw-r--r--fs/ceph/dir.c25
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/snap.c14
-rw-r--r--fs/cifs/cifsacl.c4
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h4
-rw-r--r--fs/cifs/cifssmb.c5
-rw-r--r--fs/cifs/connect.c47
-rw-r--r--fs/cifs/file.c11
-rw-r--r--fs/cifs/netmisc.c8
-rw-r--r--fs/cifs/sess.c8
-rw-r--r--fs/cifs/transport.c7
-rw-r--r--fs/dlm/lowcomms.c6
-rw-r--r--fs/ecryptfs/dentry.c22
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h3
-rw-r--r--fs/ecryptfs/file.c1
-rw-r--r--fs/ecryptfs/inode.c138
-rw-r--r--fs/eventfd.c12
-rw-r--r--fs/eventpoll.c111
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/ext2/namei.c9
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/file.c60
-rw-r--r--fs/ext4/mballoc.c100
-rw-r--r--fs/ext4/page-io.c36
-rw-r--r--fs/ext4/super.c66
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/fuse/dir.c7
-rw-r--r--fs/fuse/file.c52
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/gfs2/glock.c4
-rw-r--r--fs/gfs2/main.c11
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/hfsplus/extents.c4
-rw-r--r--fs/hfsplus/part_tbl.c4
-rw-r--r--fs/hfsplus/super.c106
-rw-r--r--fs/hfsplus/wrapper.c4
-rw-r--r--fs/inode.c31
-rw-r--r--fs/internal.h2
-rw-r--r--fs/ioctl.c7
-rw-r--r--fs/jbd2/journal.c9
-rw-r--r--fs/jbd2/transaction.c21
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c151
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/nfs4proc.c44
-rw-r--r--fs/nfsd/nfs4callback.c6
-rw-r--r--fs/nfsd/nfs4state.c186
-rw-r--r--fs/nfsd/nfs4xdr.c8
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/vfs.c21
-rw-r--r--fs/nilfs2/btnode.c5
-rw-r--r--fs/nilfs2/btnode.h1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/page.c13
-rw-r--r--fs/nilfs2/page.h1
-rw-r--r--fs/nilfs2/segment.c3
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/refcounttree.c7
-rw-r--r--fs/ocfs2/super.c28
-rw-r--r--fs/open.c2
-rw-r--r--fs/partitions/ldm.c5
-rw-r--r--fs/partitions/mac.c17
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/reiserfs/namei.c2
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/udf/namei.c11
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c11
-rw-r--r--fs/xfs/xfs_fsops.c3
104 files changed, 1743 insertions, 891 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b5..789b3afb3423 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
140 candidate->first = candidate->last = index; 140 candidate->first = candidate->last = index;
141 candidate->offset_first = from; 141 candidate->offset_first = from;
142 candidate->to_last = to; 142 candidate->to_last = to;
143 INIT_LIST_HEAD(&candidate->link);
143 candidate->usage = 1; 144 candidate->usage = 1;
144 candidate->state = AFS_WBACK_PENDING; 145 candidate->state = AFS_WBACK_PENDING;
145 init_waitqueue_head(&candidate->waitq); 146 init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index fc557a3be0a9..26869cde3953 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -239,15 +239,23 @@ static void __put_ioctx(struct kioctx *ctx)
239 call_rcu(&ctx->rcu_head, ctx_rcu_free); 239 call_rcu(&ctx->rcu_head, ctx_rcu_free);
240} 240}
241 241
242#define get_ioctx(kioctx) do { \ 242static inline void get_ioctx(struct kioctx *kioctx)
243 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 243{
244 atomic_inc(&(kioctx)->users); \ 244 BUG_ON(atomic_read(&kioctx->users) <= 0);
245} while (0) 245 atomic_inc(&kioctx->users);
246#define put_ioctx(kioctx) do { \ 246}
247 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 247
248 if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ 248static inline int try_get_ioctx(struct kioctx *kioctx)
249 __put_ioctx(kioctx); \ 249{
250} while (0) 250 return atomic_inc_not_zero(&kioctx->users);
251}
252
253static inline void put_ioctx(struct kioctx *kioctx)
254{
255 BUG_ON(atomic_read(&kioctx->users) <= 0);
256 if (unlikely(atomic_dec_and_test(&kioctx->users)))
257 __put_ioctx(kioctx);
258}
251 259
252/* ioctx_alloc 260/* ioctx_alloc
253 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 261 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
@@ -601,8 +609,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
601 rcu_read_lock(); 609 rcu_read_lock();
602 610
603 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { 611 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
604 if (ctx->user_id == ctx_id && !ctx->dead) { 612 /*
605 get_ioctx(ctx); 613 * RCU protects us against accessing freed memory but
614 * we have to be careful not to get a reference when the
615 * reference count already dropped to 0 (ctx->dead test
616 * is unreliable because of races).
617 */
618 if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
606 ret = ctx; 619 ret = ctx;
607 break; 620 break;
608 } 621 }
@@ -1629,6 +1642,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1629 goto out_put_req; 1642 goto out_put_req;
1630 1643
1631 spin_lock_irq(&ctx->ctx_lock); 1644 spin_lock_irq(&ctx->ctx_lock);
1645 /*
1646 * We could have raced with io_destroy() and are currently holding a
1647 * reference to ctx which should be destroyed. We cannot submit IO
1648 * since ctx gets freed as soon as io_submit() puts its reference. The
1649 * check here is reliable: io_destroy() sets ctx->dead before waiting
1650 * for outstanding IO and the barrier between these two is realized by
1651 * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
1652 * increment ctx->reqs_active before checking for ctx->dead and the
1653 * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
1654 * don't see ctx->dead set here, io_destroy() waits for our IO to
1655 * finish.
1656 */
1657 if (ctx->dead) {
1658 spin_unlock_irq(&ctx->ctx_lock);
1659 ret = -EINVAL;
1660 goto out_put_req;
1661 }
1632 aio_run_iocb(req); 1662 aio_run_iocb(req);
1633 if (!list_empty(&ctx->run_list)) { 1663 if (!list_empty(&ctx->run_list)) {
1634 /* drain the run list */ 1664 /* drain the run list */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 333a7bb4cb9c..889287019599 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); 873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
874 if (ret) 874 if (ret)
875 goto out_del; 875 goto out_del;
876 /*
877 * bdev could be deleted beneath us which would implicitly destroy
878 * the holder directory. Hold on to it.
879 */
880 kobject_get(bdev->bd_part->holder_dir);
876 881
877 list_add(&holder->list, &bdev->bd_holder_disks); 882 list_add(&holder->list, &bdev->bd_holder_disks);
878 goto out_unlock; 883 goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
909 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); 914 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
910 del_symlink(bdev->bd_part->holder_dir, 915 del_symlink(bdev->bd_part->holder_dir,
911 &disk_to_dev(disk)->kobj); 916 &disk_to_dev(disk)->kobj);
917 kobject_put(bdev->bd_part->holder_dir);
912 list_del_init(&holder->list); 918 list_del_init(&holder->list);
913 kfree(holder); 919 kfree(holder);
914 } 920 }
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
922 * flush_disk - invalidates all buffer-cache entries on a disk 928 * flush_disk - invalidates all buffer-cache entries on a disk
923 * 929 *
924 * @bdev: struct block device to be flushed 930 * @bdev: struct block device to be flushed
931 * @kill_dirty: flag to guide handling of dirty inodes
925 * 932 *
926 * Invalidates all buffer-cache entries on a disk. It should be called 933 * Invalidates all buffer-cache entries on a disk. It should be called
927 * when a disk has been changed -- either by a media change or online 934 * when a disk has been changed -- either by a media change or online
928 * resize. 935 * resize.
929 */ 936 */
930static void flush_disk(struct block_device *bdev) 937static void flush_disk(struct block_device *bdev, bool kill_dirty)
931{ 938{
932 if (__invalidate_device(bdev)) { 939 if (__invalidate_device(bdev, kill_dirty)) {
933 char name[BDEVNAME_SIZE] = ""; 940 char name[BDEVNAME_SIZE] = "";
934 941
935 if (bdev->bd_disk) 942 if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
966 "%s: detected capacity change from %lld to %lld\n", 973 "%s: detected capacity change from %lld to %lld\n",
967 name, bdev_size, disk_size); 974 name, bdev_size, disk_size);
968 i_size_write(bdev->bd_inode, disk_size); 975 i_size_write(bdev->bd_inode, disk_size);
969 flush_disk(bdev); 976 flush_disk(bdev, false);
970 } 977 }
971} 978}
972EXPORT_SYMBOL(check_disk_size_change); 979EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
1019 if (!(events & DISK_EVENT_MEDIA_CHANGE)) 1026 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1020 return 0; 1027 return 0;
1021 1028
1022 flush_disk(bdev); 1029 flush_disk(bdev, true);
1023 if (bdops->revalidate_disk) 1030 if (bdops->revalidate_disk)
1024 bdops->revalidate_disk(bdev->bd_disk); 1031 bdops->revalidate_disk(bdev->bd_disk);
1025 return 1; 1032 return 1;
@@ -1215,12 +1222,6 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1215 1222
1216 res = __blkdev_get(bdev, mode, 0); 1223 res = __blkdev_get(bdev, mode, 0);
1217 1224
1218 /* __blkdev_get() may alter read only status, check it afterwards */
1219 if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1220 __blkdev_put(bdev, mode, 0);
1221 res = -EACCES;
1222 }
1223
1224 if (whole) { 1225 if (whole) {
1225 /* finish claiming */ 1226 /* finish claiming */
1226 mutex_lock(&bdev->bd_mutex); 1227 mutex_lock(&bdev->bd_mutex);
@@ -1298,6 +1299,11 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1298 if (err) 1299 if (err)
1299 return ERR_PTR(err); 1300 return ERR_PTR(err);
1300 1301
1302 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1303 blkdev_put(bdev, mode);
1304 return ERR_PTR(-EACCES);
1305 }
1306
1301 return bdev; 1307 return bdev;
1302} 1308}
1303EXPORT_SYMBOL(blkdev_get_by_path); 1309EXPORT_SYMBOL(blkdev_get_by_path);
@@ -1601,7 +1607,7 @@ fail:
1601} 1607}
1602EXPORT_SYMBOL(lookup_bdev); 1608EXPORT_SYMBOL(lookup_bdev);
1603 1609
1604int __invalidate_device(struct block_device *bdev) 1610int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1605{ 1611{
1606 struct super_block *sb = get_super(bdev); 1612 struct super_block *sb = get_super(bdev);
1607 int res = 0; 1613 int res = 0;
@@ -1614,7 +1620,7 @@ int __invalidate_device(struct block_device *bdev)
1614 * hold). 1620 * hold).
1615 */ 1621 */
1616 shrink_dcache_sb(sb); 1622 shrink_dcache_sb(sb);
1617 res = invalidate_inodes(sb); 1623 res = invalidate_inodes(sb, kill_dirty);
1618 drop_super(sb); 1624 drop_super(sb);
1619 } 1625 }
1620 invalidate_bdev(bdev); 1626 invalidate_bdev(bdev);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 15b5ca2a2606..9c949348510b 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -37,6 +37,9 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
37 char *value = NULL; 37 char *value = NULL;
38 struct posix_acl *acl; 38 struct posix_acl *acl;
39 39
40 if (!IS_POSIXACL(inode))
41 return NULL;
42
40 acl = get_cached_acl(inode, type); 43 acl = get_cached_acl(inode, type);
41 if (acl != ACL_NOT_CACHED) 44 if (acl != ACL_NOT_CACHED)
42 return acl; 45 return acl;
@@ -84,6 +87,9 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
84 struct posix_acl *acl; 87 struct posix_acl *acl;
85 int ret = 0; 88 int ret = 0;
86 89
90 if (!IS_POSIXACL(dentry->d_inode))
91 return -EOPNOTSUPP;
92
87 acl = btrfs_get_acl(dentry->d_inode, type); 93 acl = btrfs_get_acl(dentry->d_inode, type);
88 94
89 if (IS_ERR(acl)) 95 if (IS_ERR(acl))
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f745287fbf2e..4d2110eafe29 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -562,7 +562,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
562 u64 em_len; 562 u64 em_len;
563 u64 em_start; 563 u64 em_start;
564 struct extent_map *em; 564 struct extent_map *em;
565 int ret; 565 int ret = -ENOMEM;
566 u32 *sums; 566 u32 *sums;
567 567
568 tree = &BTRFS_I(inode)->io_tree; 568 tree = &BTRFS_I(inode)->io_tree;
@@ -577,6 +577,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
577 577
578 compressed_len = em->block_len; 578 compressed_len = em->block_len;
579 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 579 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
580 if (!cb)
581 goto out;
582
580 atomic_set(&cb->pending_bios, 0); 583 atomic_set(&cb->pending_bios, 0);
581 cb->errors = 0; 584 cb->errors = 0;
582 cb->inode = inode; 585 cb->inode = inode;
@@ -597,13 +600,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
597 600
598 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / 601 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
599 PAGE_CACHE_SIZE; 602 PAGE_CACHE_SIZE;
600 cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, 603 cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
601 GFP_NOFS); 604 GFP_NOFS);
605 if (!cb->compressed_pages)
606 goto fail1;
607
602 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 608 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
603 609
604 for (page_index = 0; page_index < nr_pages; page_index++) { 610 for (page_index = 0; page_index < nr_pages; page_index++) {
605 cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | 611 cb->compressed_pages[page_index] = alloc_page(GFP_NOFS |
606 __GFP_HIGHMEM); 612 __GFP_HIGHMEM);
613 if (!cb->compressed_pages[page_index])
614 goto fail2;
607 } 615 }
608 cb->nr_pages = nr_pages; 616 cb->nr_pages = nr_pages;
609 617
@@ -614,6 +622,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
614 cb->len = uncompressed_len; 622 cb->len = uncompressed_len;
615 623
616 comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); 624 comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
625 if (!comp_bio)
626 goto fail2;
617 comp_bio->bi_private = cb; 627 comp_bio->bi_private = cb;
618 comp_bio->bi_end_io = end_compressed_bio_read; 628 comp_bio->bi_end_io = end_compressed_bio_read;
619 atomic_inc(&cb->pending_bios); 629 atomic_inc(&cb->pending_bios);
@@ -681,6 +691,17 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
681 691
682 bio_put(comp_bio); 692 bio_put(comp_bio);
683 return 0; 693 return 0;
694
695fail2:
696 for (page_index = 0; page_index < nr_pages; page_index++)
697 free_page((unsigned long)cb->compressed_pages[page_index]);
698
699 kfree(cb->compressed_pages);
700fail1:
701 kfree(cb);
702out:
703 free_extent_map(em);
704 return ret;
684} 705}
685 706
686static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; 707static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
@@ -900,7 +921,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
900 return ret; 921 return ret;
901} 922}
902 923
903void __exit btrfs_exit_compress(void) 924void btrfs_exit_compress(void)
904{ 925{
905 free_workspaces(); 926 free_workspaces();
906} 927}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..6f820fa23df4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1254,6 +1254,7 @@ struct btrfs_root {
1254#define BTRFS_MOUNT_SPACE_CACHE (1 << 12) 1254#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1255#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) 1255#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1256#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1256#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1257#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1257 1258
1258#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1259#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1259#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1260#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2219,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
2218 u64 start, u64 end); 2219 u64 start, u64 end);
2219int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 2220int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
2220 u64 num_bytes); 2221 u64 num_bytes);
2222int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
2223 struct btrfs_root *root, u64 type);
2221 2224
2222/* ctree.c */ 2225/* ctree.c */
2223int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2226int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b531c36455d8..e1aa8d607bc7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -359,10 +359,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
359 359
360 tree = &BTRFS_I(page->mapping->host)->io_tree; 360 tree = &BTRFS_I(page->mapping->host)->io_tree;
361 361
362 if (page->private == EXTENT_PAGE_PRIVATE) 362 if (page->private == EXTENT_PAGE_PRIVATE) {
363 WARN_ON(1);
363 goto out; 364 goto out;
364 if (!page->private) 365 }
366 if (!page->private) {
367 WARN_ON(1);
365 goto out; 368 goto out;
369 }
366 len = page->private >> 2; 370 len = page->private >> 2;
367 WARN_ON(len == 0); 371 WARN_ON(len == 0);
368 372
@@ -1550,6 +1554,7 @@ static int transaction_kthread(void *arg)
1550 spin_unlock(&root->fs_info->new_trans_lock); 1554 spin_unlock(&root->fs_info->new_trans_lock);
1551 1555
1552 trans = btrfs_join_transaction(root, 1); 1556 trans = btrfs_join_transaction(root, 1);
1557 BUG_ON(IS_ERR(trans));
1553 if (transid == trans->transid) { 1558 if (transid == trans->transid) {
1554 ret = btrfs_commit_transaction(trans, root); 1559 ret = btrfs_commit_transaction(trans, root);
1555 BUG_ON(ret); 1560 BUG_ON(ret);
@@ -2453,10 +2458,14 @@ int btrfs_commit_super(struct btrfs_root *root)
2453 up_write(&root->fs_info->cleanup_work_sem); 2458 up_write(&root->fs_info->cleanup_work_sem);
2454 2459
2455 trans = btrfs_join_transaction(root, 1); 2460 trans = btrfs_join_transaction(root, 1);
2461 if (IS_ERR(trans))
2462 return PTR_ERR(trans);
2456 ret = btrfs_commit_transaction(trans, root); 2463 ret = btrfs_commit_transaction(trans, root);
2457 BUG_ON(ret); 2464 BUG_ON(ret);
2458 /* run commit again to drop the original snapshot */ 2465 /* run commit again to drop the original snapshot */
2459 trans = btrfs_join_transaction(root, 1); 2466 trans = btrfs_join_transaction(root, 1);
2467 if (IS_ERR(trans))
2468 return PTR_ERR(trans);
2460 btrfs_commit_transaction(trans, root); 2469 btrfs_commit_transaction(trans, root);
2461 ret = btrfs_write_and_wait_transaction(NULL, root); 2470 ret = btrfs_write_and_wait_transaction(NULL, root);
2462 BUG_ON(ret); 2471 BUG_ON(ret);
@@ -2554,6 +2563,8 @@ int close_ctree(struct btrfs_root *root)
2554 kfree(fs_info->chunk_root); 2563 kfree(fs_info->chunk_root);
2555 kfree(fs_info->dev_root); 2564 kfree(fs_info->dev_root);
2556 kfree(fs_info->csum_root); 2565 kfree(fs_info->csum_root);
2566 kfree(fs_info);
2567
2557 return 0; 2568 return 0;
2558} 2569}
2559 2570
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 9786963b07e5..ff27d7a477b2 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -171,6 +171,8 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
171 int ret; 171 int ret;
172 172
173 path = btrfs_alloc_path(); 173 path = btrfs_alloc_path();
174 if (!path)
175 return ERR_PTR(-ENOMEM);
174 176
175 if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 177 if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
176 key.objectid = root->root_key.objectid; 178 key.objectid = root->root_key.objectid;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b55269340cec..588ff9849873 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -320,11 +320,6 @@ static int caching_kthread(void *data)
320 if (!path) 320 if (!path)
321 return -ENOMEM; 321 return -ENOMEM;
322 322
323 exclude_super_stripes(extent_root, block_group);
324 spin_lock(&block_group->space_info->lock);
325 block_group->space_info->bytes_readonly += block_group->bytes_super;
326 spin_unlock(&block_group->space_info->lock);
327
328 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 323 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
329 324
330 /* 325 /*
@@ -467,8 +462,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
467 cache->cached = BTRFS_CACHE_NO; 462 cache->cached = BTRFS_CACHE_NO;
468 } 463 }
469 spin_unlock(&cache->lock); 464 spin_unlock(&cache->lock);
470 if (ret == 1) 465 if (ret == 1) {
466 free_excluded_extents(fs_info->extent_root, cache);
471 return 0; 467 return 0;
468 }
472 } 469 }
473 470
474 if (load_cache_only) 471 if (load_cache_only)
@@ -3344,8 +3341,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3344 u64 reserved; 3341 u64 reserved;
3345 u64 max_reclaim; 3342 u64 max_reclaim;
3346 u64 reclaimed = 0; 3343 u64 reclaimed = 0;
3344 long time_left;
3347 int pause = 1; 3345 int pause = 1;
3348 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3346 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3347 int loops = 0;
3349 3348
3350 block_rsv = &root->fs_info->delalloc_block_rsv; 3349 block_rsv = &root->fs_info->delalloc_block_rsv;
3351 space_info = block_rsv->space_info; 3350 space_info = block_rsv->space_info;
@@ -3358,7 +3357,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3358 3357
3359 max_reclaim = min(reserved, to_reclaim); 3358 max_reclaim = min(reserved, to_reclaim);
3360 3359
3361 while (1) { 3360 while (loops < 1024) {
3362 /* have the flusher threads jump in and do some IO */ 3361 /* have the flusher threads jump in and do some IO */
3363 smp_mb(); 3362 smp_mb();
3364 nr_pages = min_t(unsigned long, nr_pages, 3363 nr_pages = min_t(unsigned long, nr_pages,
@@ -3366,8 +3365,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3366 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3365 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3367 3366
3368 spin_lock(&space_info->lock); 3367 spin_lock(&space_info->lock);
3369 if (reserved > space_info->bytes_reserved) 3368 if (reserved > space_info->bytes_reserved) {
3369 loops = 0;
3370 reclaimed += reserved - space_info->bytes_reserved; 3370 reclaimed += reserved - space_info->bytes_reserved;
3371 } else {
3372 loops++;
3373 }
3371 reserved = space_info->bytes_reserved; 3374 reserved = space_info->bytes_reserved;
3372 spin_unlock(&space_info->lock); 3375 spin_unlock(&space_info->lock);
3373 3376
@@ -3378,7 +3381,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3378 return -EAGAIN; 3381 return -EAGAIN;
3379 3382
3380 __set_current_state(TASK_INTERRUPTIBLE); 3383 __set_current_state(TASK_INTERRUPTIBLE);
3381 schedule_timeout(pause); 3384 time_left = schedule_timeout(pause);
3385
3386 /* We were interrupted, exit */
3387 if (time_left)
3388 break;
3389
3382 pause <<= 1; 3390 pause <<= 1;
3383 if (pause > HZ / 10) 3391 if (pause > HZ / 10)
3384 pause = HZ / 10; 3392 pause = HZ / 10;
@@ -3588,8 +3596,20 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3588 3596
3589 if (num_bytes > 0) { 3597 if (num_bytes > 0) {
3590 if (dest) { 3598 if (dest) {
3591 block_rsv_add_bytes(dest, num_bytes, 0); 3599 spin_lock(&dest->lock);
3592 } else { 3600 if (!dest->full) {
3601 u64 bytes_to_add;
3602
3603 bytes_to_add = dest->size - dest->reserved;
3604 bytes_to_add = min(num_bytes, bytes_to_add);
3605 dest->reserved += bytes_to_add;
3606 if (dest->reserved >= dest->size)
3607 dest->full = 1;
3608 num_bytes -= bytes_to_add;
3609 }
3610 spin_unlock(&dest->lock);
3611 }
3612 if (num_bytes) {
3593 spin_lock(&space_info->lock); 3613 spin_lock(&space_info->lock);
3594 space_info->bytes_reserved -= num_bytes; 3614 space_info->bytes_reserved -= num_bytes;
3595 spin_unlock(&space_info->lock); 3615 spin_unlock(&space_info->lock);
@@ -4012,6 +4032,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4012 4032
4013 num_bytes = ALIGN(num_bytes, root->sectorsize); 4033 num_bytes = ALIGN(num_bytes, root->sectorsize);
4014 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 4034 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
4035 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
4015 4036
4016 spin_lock(&BTRFS_I(inode)->accounting_lock); 4037 spin_lock(&BTRFS_I(inode)->accounting_lock);
4017 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); 4038 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
@@ -5355,7 +5376,7 @@ again:
5355 num_bytes, data, 1); 5376 num_bytes, data, 1);
5356 goto again; 5377 goto again;
5357 } 5378 }
5358 if (ret == -ENOSPC) { 5379 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
5359 struct btrfs_space_info *sinfo; 5380 struct btrfs_space_info *sinfo;
5360 5381
5361 sinfo = __find_space_info(root->fs_info, data); 5382 sinfo = __find_space_info(root->fs_info, data);
@@ -5633,6 +5654,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5633 struct btrfs_root *root, u32 blocksize) 5654 struct btrfs_root *root, u32 blocksize)
5634{ 5655{
5635 struct btrfs_block_rsv *block_rsv; 5656 struct btrfs_block_rsv *block_rsv;
5657 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5636 int ret; 5658 int ret;
5637 5659
5638 block_rsv = get_block_rsv(trans, root); 5660 block_rsv = get_block_rsv(trans, root);
@@ -5640,14 +5662,39 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5640 if (block_rsv->size == 0) { 5662 if (block_rsv->size == 0) {
5641 ret = reserve_metadata_bytes(trans, root, block_rsv, 5663 ret = reserve_metadata_bytes(trans, root, block_rsv,
5642 blocksize, 0); 5664 blocksize, 0);
5643 if (ret) 5665 /*
5666 * If we couldn't reserve metadata bytes try and use some from
5667 * the global reserve.
5668 */
5669 if (ret && block_rsv != global_rsv) {
5670 ret = block_rsv_use_bytes(global_rsv, blocksize);
5671 if (!ret)
5672 return global_rsv;
5673 return ERR_PTR(ret);
5674 } else if (ret) {
5644 return ERR_PTR(ret); 5675 return ERR_PTR(ret);
5676 }
5645 return block_rsv; 5677 return block_rsv;
5646 } 5678 }
5647 5679
5648 ret = block_rsv_use_bytes(block_rsv, blocksize); 5680 ret = block_rsv_use_bytes(block_rsv, blocksize);
5649 if (!ret) 5681 if (!ret)
5650 return block_rsv; 5682 return block_rsv;
5683 if (ret) {
5684 WARN_ON(1);
5685 ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize,
5686 0);
5687 if (!ret) {
5688 spin_lock(&block_rsv->lock);
5689 block_rsv->size += blocksize;
5690 spin_unlock(&block_rsv->lock);
5691 return block_rsv;
5692 } else if (ret && block_rsv != global_rsv) {
5693 ret = block_rsv_use_bytes(global_rsv, blocksize);
5694 if (!ret)
5695 return global_rsv;
5696 }
5697 }
5651 5698
5652 return ERR_PTR(-ENOSPC); 5699 return ERR_PTR(-ENOSPC);
5653} 5700}
@@ -6221,6 +6268,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6221 BUG_ON(!wc); 6268 BUG_ON(!wc);
6222 6269
6223 trans = btrfs_start_transaction(tree_root, 0); 6270 trans = btrfs_start_transaction(tree_root, 0);
6271 BUG_ON(IS_ERR(trans));
6272
6224 if (block_rsv) 6273 if (block_rsv)
6225 trans->block_rsv = block_rsv; 6274 trans->block_rsv = block_rsv;
6226 6275
@@ -6318,6 +6367,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6318 6367
6319 btrfs_end_transaction_throttle(trans, tree_root); 6368 btrfs_end_transaction_throttle(trans, tree_root);
6320 trans = btrfs_start_transaction(tree_root, 0); 6369 trans = btrfs_start_transaction(tree_root, 0);
6370 BUG_ON(IS_ERR(trans));
6321 if (block_rsv) 6371 if (block_rsv)
6322 trans->block_rsv = block_rsv; 6372 trans->block_rsv = block_rsv;
6323 } 6373 }
@@ -6446,6 +6496,8 @@ static noinline int relocate_inode_pages(struct inode *inode, u64 start,
6446 int ret = 0; 6496 int ret = 0;
6447 6497
6448 ra = kzalloc(sizeof(*ra), GFP_NOFS); 6498 ra = kzalloc(sizeof(*ra), GFP_NOFS);
6499 if (!ra)
6500 return -ENOMEM;
6449 6501
6450 mutex_lock(&inode->i_mutex); 6502 mutex_lock(&inode->i_mutex);
6451 first_index = start >> PAGE_CACHE_SHIFT; 6503 first_index = start >> PAGE_CACHE_SHIFT;
@@ -6531,7 +6583,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
6531 u64 end = start + extent_key->offset - 1; 6583 u64 end = start + extent_key->offset - 1;
6532 6584
6533 em = alloc_extent_map(GFP_NOFS); 6585 em = alloc_extent_map(GFP_NOFS);
6534 BUG_ON(!em || IS_ERR(em)); 6586 BUG_ON(!em);
6535 6587
6536 em->start = start; 6588 em->start = start;
6537 em->len = extent_key->offset; 6589 em->len = extent_key->offset;
@@ -7477,7 +7529,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
7477 BUG_ON(reloc_root->commit_root != NULL); 7529 BUG_ON(reloc_root->commit_root != NULL);
7478 while (1) { 7530 while (1) {
7479 trans = btrfs_join_transaction(root, 1); 7531 trans = btrfs_join_transaction(root, 1);
7480 BUG_ON(!trans); 7532 BUG_ON(IS_ERR(trans));
7481 7533
7482 mutex_lock(&root->fs_info->drop_mutex); 7534 mutex_lock(&root->fs_info->drop_mutex);
7483 ret = btrfs_drop_snapshot(trans, reloc_root); 7535 ret = btrfs_drop_snapshot(trans, reloc_root);
@@ -7535,7 +7587,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7535 7587
7536 if (found) { 7588 if (found) {
7537 trans = btrfs_start_transaction(root, 1); 7589 trans = btrfs_start_transaction(root, 1);
7538 BUG_ON(!trans); 7590 BUG_ON(IS_ERR(trans));
7539 ret = btrfs_commit_transaction(trans, root); 7591 ret = btrfs_commit_transaction(trans, root);
7540 BUG_ON(ret); 7592 BUG_ON(ret);
7541 } 7593 }
@@ -7779,7 +7831,7 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7779 7831
7780 7832
7781 trans = btrfs_start_transaction(extent_root, 1); 7833 trans = btrfs_start_transaction(extent_root, 1);
7782 BUG_ON(!trans); 7834 BUG_ON(IS_ERR(trans));
7783 7835
7784 if (extent_key->objectid == 0) { 7836 if (extent_key->objectid == 0) {
7785 ret = del_extent_zero(trans, extent_root, path, extent_key); 7837 ret = del_extent_zero(trans, extent_root, path, extent_key);
@@ -8013,6 +8065,13 @@ out:
8013 return ret; 8065 return ret;
8014} 8066}
8015 8067
8068int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8069 struct btrfs_root *root, u64 type)
8070{
8071 u64 alloc_flags = get_alloc_profile(root, type);
8072 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
8073}
8074
8016/* 8075/*
8017 * helper to account the unused space of all the readonly block group in the 8076 * helper to account the unused space of all the readonly block group in the
8018 * list. takes mirrors into account. 8077 * list. takes mirrors into account.
@@ -8270,6 +8329,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8270 if (block_group->cached == BTRFS_CACHE_STARTED) 8329 if (block_group->cached == BTRFS_CACHE_STARTED)
8271 wait_block_group_cache_done(block_group); 8330 wait_block_group_cache_done(block_group);
8272 8331
8332 /*
8333 * We haven't cached this block group, which means we could
8334 * possibly have excluded extents on this block group.
8335 */
8336 if (block_group->cached == BTRFS_CACHE_NO)
8337 free_excluded_extents(info->extent_root, block_group);
8338
8273 btrfs_remove_free_space_cache(block_group); 8339 btrfs_remove_free_space_cache(block_group);
8274 btrfs_put_block_group(block_group); 8340 btrfs_put_block_group(block_group);
8275 8341
@@ -8385,6 +8451,13 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8385 cache->sectorsize = root->sectorsize; 8451 cache->sectorsize = root->sectorsize;
8386 8452
8387 /* 8453 /*
8454 * We need to exclude the super stripes now so that the space
8455 * info has super bytes accounted for, otherwise we'll think
8456 * we have more space than we actually do.
8457 */
8458 exclude_super_stripes(root, cache);
8459
8460 /*
8388 * check for two cases, either we are full, and therefore 8461 * check for two cases, either we are full, and therefore
8389 * don't need to bother with the caching work since we won't 8462 * don't need to bother with the caching work since we won't
8390 * find any space, or we are empty, and we can just add all 8463 * find any space, or we are empty, and we can just add all
@@ -8392,12 +8465,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8392 * time, particularly in the full case. 8465 * time, particularly in the full case.
8393 */ 8466 */
8394 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 8467 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
8395 exclude_super_stripes(root, cache);
8396 cache->last_byte_to_unpin = (u64)-1; 8468 cache->last_byte_to_unpin = (u64)-1;
8397 cache->cached = BTRFS_CACHE_FINISHED; 8469 cache->cached = BTRFS_CACHE_FINISHED;
8398 free_excluded_extents(root, cache); 8470 free_excluded_extents(root, cache);
8399 } else if (btrfs_block_group_used(&cache->item) == 0) { 8471 } else if (btrfs_block_group_used(&cache->item) == 0) {
8400 exclude_super_stripes(root, cache);
8401 cache->last_byte_to_unpin = (u64)-1; 8472 cache->last_byte_to_unpin = (u64)-1;
8402 cache->cached = BTRFS_CACHE_FINISHED; 8473 cache->cached = BTRFS_CACHE_FINISHED;
8403 add_new_free_space(cache, root->fs_info, 8474 add_new_free_space(cache, root->fs_info,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e993cf1766e..fd3f172e94e6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1433 */ 1433 */
1434u64 count_range_bits(struct extent_io_tree *tree, 1434u64 count_range_bits(struct extent_io_tree *tree,
1435 u64 *start, u64 search_end, u64 max_bytes, 1435 u64 *start, u64 search_end, u64 max_bytes,
1436 unsigned long bits) 1436 unsigned long bits, int contig)
1437{ 1437{
1438 struct rb_node *node; 1438 struct rb_node *node;
1439 struct extent_state *state; 1439 struct extent_state *state;
1440 u64 cur_start = *start; 1440 u64 cur_start = *start;
1441 u64 total_bytes = 0; 1441 u64 total_bytes = 0;
1442 u64 last = 0;
1442 int found = 0; 1443 int found = 0;
1443 1444
1444 if (search_end <= cur_start) { 1445 if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1463 state = rb_entry(node, struct extent_state, rb_node); 1464 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->start > search_end) 1465 if (state->start > search_end)
1465 break; 1466 break;
1466 if (state->end >= cur_start && (state->state & bits)) { 1467 if (contig && found && state->start > last + 1)
1468 break;
1469 if (state->end >= cur_start && (state->state & bits) == bits) {
1467 total_bytes += min(search_end, state->end) + 1 - 1470 total_bytes += min(search_end, state->end) + 1 -
1468 max(cur_start, state->start); 1471 max(cur_start, state->start);
1469 if (total_bytes >= max_bytes) 1472 if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1472 *start = state->start; 1475 *start = state->start;
1473 found = 1; 1476 found = 1;
1474 } 1477 }
1478 last = state->end;
1479 } else if (contig && found) {
1480 break;
1475 } 1481 }
1476 node = rb_next(node); 1482 node = rb_next(node);
1477 if (!node) 1483 if (!node)
@@ -1865,7 +1871,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1865 bio_get(bio); 1871 bio_get(bio);
1866 1872
1867 if (tree->ops && tree->ops->submit_bio_hook) 1873 if (tree->ops && tree->ops->submit_bio_hook)
1868 tree->ops->submit_bio_hook(page->mapping->host, rw, bio, 1874 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
1869 mirror_num, bio_flags, start); 1875 mirror_num, bio_flags, start);
1870 else 1876 else
1871 submit_bio(rw, bio); 1877 submit_bio(rw, bio);
@@ -1920,6 +1926,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
1920 nr = bio_get_nr_vecs(bdev); 1926 nr = bio_get_nr_vecs(bdev);
1921 1927
1922 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1928 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1929 if (!bio)
1930 return -ENOMEM;
1923 1931
1924 bio_add_page(bio, page, page_size, offset); 1932 bio_add_page(bio, page, page_size, offset);
1925 bio->bi_end_io = end_io_func; 1933 bio->bi_end_io = end_io_func;
@@ -1944,6 +1952,7 @@ void set_page_extent_mapped(struct page *page)
1944 1952
1945static void set_page_extent_head(struct page *page, unsigned long len) 1953static void set_page_extent_head(struct page *page, unsigned long len)
1946{ 1954{
1955 WARN_ON(!PagePrivate(page));
1947 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 1956 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1948} 1957}
1949 1958
@@ -2126,7 +2135,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2126 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, 2135 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
2127 &bio_flags); 2136 &bio_flags);
2128 if (bio) 2137 if (bio)
2129 submit_one_bio(READ, bio, 0, bio_flags); 2138 ret = submit_one_bio(READ, bio, 0, bio_flags);
2130 return ret; 2139 return ret;
2131} 2140}
2132 2141
@@ -2819,9 +2828,17 @@ int try_release_extent_state(struct extent_map_tree *map,
2819 * at this point we can safely clear everything except the 2828 * at this point we can safely clear everything except the
2820 * locked bit and the nodatasum bit 2829 * locked bit and the nodatasum bit
2821 */ 2830 */
2822 clear_extent_bit(tree, start, end, 2831 ret = clear_extent_bit(tree, start, end,
2823 ~(EXTENT_LOCKED | EXTENT_NODATASUM), 2832 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2824 0, 0, NULL, mask); 2833 0, 0, NULL, mask);
2834
2835 /* if clear_extent_bit failed for enomem reasons,
2836 * we can't allow the release to continue.
2837 */
2838 if (ret < 0)
2839 ret = 0;
2840 else
2841 ret = 1;
2825 } 2842 }
2826 return ret; 2843 return ret;
2827} 2844}
@@ -2901,6 +2918,46 @@ out:
2901 return sector; 2918 return sector;
2902} 2919}
2903 2920
2921/*
2922 * helper function for fiemap, which doesn't want to see any holes.
2923 * This maps until we find something past 'last'
2924 */
2925static struct extent_map *get_extent_skip_holes(struct inode *inode,
2926 u64 offset,
2927 u64 last,
2928 get_extent_t *get_extent)
2929{
2930 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2931 struct extent_map *em;
2932 u64 len;
2933
2934 if (offset >= last)
2935 return NULL;
2936
2937 while(1) {
2938 len = last - offset;
2939 if (len == 0)
2940 break;
2941 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2942 em = get_extent(inode, NULL, 0, offset, len, 0);
2943 if (!em || IS_ERR(em))
2944 return em;
2945
2946 /* if this isn't a hole return it */
2947 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 em->block_start != EXTENT_MAP_HOLE) {
2949 return em;
2950 }
2951
2952 /* this is a hole, advance to the next extent */
2953 offset = extent_map_end(em);
2954 free_extent_map(em);
2955 if (offset >= last)
2956 break;
2957 }
2958 return NULL;
2959}
2960
2904int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2961int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2905 __u64 start, __u64 len, get_extent_t *get_extent) 2962 __u64 start, __u64 len, get_extent_t *get_extent)
2906{ 2963{
@@ -2910,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2910 u32 flags = 0; 2967 u32 flags = 0;
2911 u32 found_type; 2968 u32 found_type;
2912 u64 last; 2969 u64 last;
2970 u64 last_for_get_extent = 0;
2913 u64 disko = 0; 2971 u64 disko = 0;
2972 u64 isize = i_size_read(inode);
2914 struct btrfs_key found_key; 2973 struct btrfs_key found_key;
2915 struct extent_map *em = NULL; 2974 struct extent_map *em = NULL;
2916 struct extent_state *cached_state = NULL; 2975 struct extent_state *cached_state = NULL;
2917 struct btrfs_path *path; 2976 struct btrfs_path *path;
2918 struct btrfs_file_extent_item *item; 2977 struct btrfs_file_extent_item *item;
2919 int end = 0; 2978 int end = 0;
2920 u64 em_start = 0, em_len = 0; 2979 u64 em_start = 0;
2980 u64 em_len = 0;
2981 u64 em_end = 0;
2921 unsigned long emflags; 2982 unsigned long emflags;
2922 int hole = 0;
2923 2983
2924 if (len == 0) 2984 if (len == 0)
2925 return -EINVAL; 2985 return -EINVAL;
@@ -2929,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2929 return -ENOMEM; 2989 return -ENOMEM;
2930 path->leave_spinning = 1; 2990 path->leave_spinning = 1;
2931 2991
2992 /*
2993 * lookup the last file extent. We're not using i_size here
2994 * because there might be preallocation past i_size
2995 */
2932 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2996 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2933 path, inode->i_ino, -1, 0); 2997 path, inode->i_ino, -1, 0);
2934 if (ret < 0) { 2998 if (ret < 0) {
@@ -2942,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2942 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 3006 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2943 found_type = btrfs_key_type(&found_key); 3007 found_type = btrfs_key_type(&found_key);
2944 3008
2945 /* No extents, just return */ 3009 /* No extents, but there might be delalloc bits */
2946 if (found_key.objectid != inode->i_ino || 3010 if (found_key.objectid != inode->i_ino ||
2947 found_type != BTRFS_EXTENT_DATA_KEY) { 3011 found_type != BTRFS_EXTENT_DATA_KEY) {
2948 btrfs_free_path(path); 3012 /* have to trust i_size as the end */
2949 return 0; 3013 last = (u64)-1;
3014 last_for_get_extent = isize;
3015 } else {
3016 /*
3017 * remember the start of the last extent. There are a
3018 * bunch of different factors that go into the length of the
3019 * extent, so its much less complex to remember where it started
3020 */
3021 last = found_key.offset;
3022 last_for_get_extent = last + 1;
2950 } 3023 }
2951 last = found_key.offset;
2952 btrfs_free_path(path); 3024 btrfs_free_path(path);
2953 3025
3026 /*
3027 * we might have some extents allocated but more delalloc past those
3028 * extents. so, we trust isize unless the start of the last extent is
3029 * beyond isize
3030 */
3031 if (last < isize) {
3032 last = (u64)-1;
3033 last_for_get_extent = isize;
3034 }
3035
2954 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3036 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2955 &cached_state, GFP_NOFS); 3037 &cached_state, GFP_NOFS);
2956 em = get_extent(inode, NULL, 0, off, max - off, 0); 3038
3039 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3040 get_extent);
2957 if (!em) 3041 if (!em)
2958 goto out; 3042 goto out;
2959 if (IS_ERR(em)) { 3043 if (IS_ERR(em)) {
@@ -2962,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2962 } 3046 }
2963 3047
2964 while (!end) { 3048 while (!end) {
2965 hole = 0; 3049 off = extent_map_end(em);
2966 off = em->start + em->len;
2967 if (off >= max) 3050 if (off >= max)
2968 end = 1; 3051 end = 1;
2969 3052
2970 if (em->block_start == EXTENT_MAP_HOLE) {
2971 hole = 1;
2972 goto next;
2973 }
2974
2975 em_start = em->start; 3053 em_start = em->start;
2976 em_len = em->len; 3054 em_len = em->len;
2977 3055 em_end = extent_map_end(em);
3056 emflags = em->flags;
2978 disko = 0; 3057 disko = 0;
2979 flags = 0; 3058 flags = 0;
2980 3059
@@ -2993,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2993 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3072 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
2994 flags |= FIEMAP_EXTENT_ENCODED; 3073 flags |= FIEMAP_EXTENT_ENCODED;
2995 3074
2996next:
2997 emflags = em->flags;
2998 free_extent_map(em); 3075 free_extent_map(em);
2999 em = NULL; 3076 em = NULL;
3000 if (!end) { 3077 if ((em_start >= last) || em_len == (u64)-1 ||
3001 em = get_extent(inode, NULL, 0, off, max - off, 0); 3078 (last == (u64)-1 && isize <= em_end)) {
3002 if (!em)
3003 goto out;
3004 if (IS_ERR(em)) {
3005 ret = PTR_ERR(em);
3006 goto out;
3007 }
3008 emflags = em->flags;
3009 }
3010
3011 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
3012 flags |= FIEMAP_EXTENT_LAST; 3079 flags |= FIEMAP_EXTENT_LAST;
3013 end = 1; 3080 end = 1;
3014 } 3081 }
3015 3082
3016 if (em_start == last) { 3083 /* now scan forward to see if this is really the last extent. */
3084 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3085 get_extent);
3086 if (IS_ERR(em)) {
3087 ret = PTR_ERR(em);
3088 goto out;
3089 }
3090 if (!em) {
3017 flags |= FIEMAP_EXTENT_LAST; 3091 flags |= FIEMAP_EXTENT_LAST;
3018 end = 1; 3092 end = 1;
3019 } 3093 }
3020 3094 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3021 if (!hole) { 3095 em_len, flags);
3022 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3096 if (ret)
3023 em_len, flags); 3097 goto out_free;
3024 if (ret)
3025 goto out_free;
3026 }
3027 } 3098 }
3028out_free: 3099out_free:
3029 free_extent_map(em); 3100 free_extent_map(em);
@@ -3192,7 +3263,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3192 } 3263 }
3193 if (!PageUptodate(p)) 3264 if (!PageUptodate(p))
3194 uptodate = 0; 3265 uptodate = 0;
3195 unlock_page(p); 3266
3267 /*
3268 * see below about how we avoid a nasty race with release page
3269 * and why we unlock later
3270 */
3271 if (i != 0)
3272 unlock_page(p);
3196 } 3273 }
3197 if (uptodate) 3274 if (uptodate)
3198 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3275 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3216,9 +3293,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3216 atomic_inc(&eb->refs); 3293 atomic_inc(&eb->refs);
3217 spin_unlock(&tree->buffer_lock); 3294 spin_unlock(&tree->buffer_lock);
3218 radix_tree_preload_end(); 3295 radix_tree_preload_end();
3296
3297 /*
3298 * there is a race where release page may have
3299 * tried to find this extent buffer in the radix
3300 * but failed. It will tell the VM it is safe to
3301 * reclaim the, and it will clear the page private bit.
3302 * We must make sure to set the page private bit properly
3303 * after the extent buffer is in the radix tree so
3304 * it doesn't get lost
3305 */
3306 set_page_extent_mapped(eb->first_page);
3307 set_page_extent_head(eb->first_page, eb->len);
3308 if (!page0)
3309 unlock_page(eb->first_page);
3219 return eb; 3310 return eb;
3220 3311
3221free_eb: 3312free_eb:
3313 if (eb->first_page && !page0)
3314 unlock_page(eb->first_page);
3315
3222 if (!atomic_dec_and_test(&eb->refs)) 3316 if (!atomic_dec_and_test(&eb->refs))
3223 return exists; 3317 return exists;
3224 btrfs_release_extent_buffer(eb); 3318 btrfs_release_extent_buffer(eb);
@@ -3269,10 +3363,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3269 continue; 3363 continue;
3270 3364
3271 lock_page(page); 3365 lock_page(page);
3366 WARN_ON(!PagePrivate(page));
3367
3368 set_page_extent_mapped(page);
3272 if (i == 0) 3369 if (i == 0)
3273 set_page_extent_head(page, eb->len); 3370 set_page_extent_head(page, eb->len);
3274 else
3275 set_page_private(page, EXTENT_PAGE_PRIVATE);
3276 3371
3277 clear_page_dirty_for_io(page); 3372 clear_page_dirty_for_io(page);
3278 spin_lock_irq(&page->mapping->tree_lock); 3373 spin_lock_irq(&page->mapping->tree_lock);
@@ -3462,6 +3557,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3462 3557
3463 for (i = start_i; i < num_pages; i++) { 3558 for (i = start_i; i < num_pages; i++) {
3464 page = extent_buffer_page(eb, i); 3559 page = extent_buffer_page(eb, i);
3560
3561 WARN_ON(!PagePrivate(page));
3562
3563 set_page_extent_mapped(page);
3564 if (i == 0)
3565 set_page_extent_head(page, eb->len);
3566
3465 if (inc_all_pages) 3567 if (inc_all_pages)
3466 page_cache_get(page); 3568 page_cache_get(page);
3467 if (!PageUptodate(page)) { 3569 if (!PageUptodate(page)) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
191 191
192u64 count_range_bits(struct extent_io_tree *tree, 192u64 count_range_bits(struct extent_io_tree *tree,
193 u64 *start, u64 search_end, 193 u64 *start, u64 search_end,
194 u64 max_bytes, unsigned long bits); 194 u64 max_bytes, unsigned long bits, int contig);
195 195
196void free_extent_state(struct extent_state *state); 196void free_extent_state(struct extent_state *state);
197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b0e1fce12530..2b6c12e983b3 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -51,8 +51,8 @@ struct extent_map *alloc_extent_map(gfp_t mask)
51{ 51{
52 struct extent_map *em; 52 struct extent_map *em;
53 em = kmem_cache_alloc(extent_map_cache, mask); 53 em = kmem_cache_alloc(extent_map_cache, mask);
54 if (!em || IS_ERR(em)) 54 if (!em)
55 return em; 55 return NULL;
56 em->in_tree = 0; 56 em->in_tree = 0;
57 em->flags = 0; 57 em->flags = 0;
58 em->compress_type = BTRFS_COMPRESS_NONE; 58 em->compress_type = BTRFS_COMPRESS_NONE;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a562a250ae77..4f19a3e1bf32 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -536,6 +536,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
536 root = root->fs_info->csum_root; 536 root = root->fs_info->csum_root;
537 537
538 path = btrfs_alloc_path(); 538 path = btrfs_alloc_path();
539 if (!path)
540 return -ENOMEM;
539 541
540 while (1) { 542 while (1) {
541 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 543 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -548,7 +550,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
548 if (path->slots[0] == 0) 550 if (path->slots[0] == 0)
549 goto out; 551 goto out;
550 path->slots[0]--; 552 path->slots[0]--;
553 } else if (ret < 0) {
554 goto out;
551 } 555 }
556
552 leaf = path->nodes[0]; 557 leaf = path->nodes[0];
553 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 558 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
554 559
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c800d58f3013..7084140d5940 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -186,6 +186,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
186 split = alloc_extent_map(GFP_NOFS); 186 split = alloc_extent_map(GFP_NOFS);
187 if (!split2) 187 if (!split2)
188 split2 = alloc_extent_map(GFP_NOFS); 188 split2 = alloc_extent_map(GFP_NOFS);
189 BUG_ON(!split || !split2);
189 190
190 write_lock(&em_tree->lock); 191 write_lock(&em_tree->lock);
191 em = lookup_extent_mapping(em_tree, start, len); 192 em = lookup_extent_mapping(em_tree, start, len);
@@ -793,8 +794,12 @@ again:
793 for (i = 0; i < num_pages; i++) { 794 for (i = 0; i < num_pages; i++) {
794 pages[i] = grab_cache_page(inode->i_mapping, index + i); 795 pages[i] = grab_cache_page(inode->i_mapping, index + i);
795 if (!pages[i]) { 796 if (!pages[i]) {
796 err = -ENOMEM; 797 int c;
797 BUG_ON(1); 798 for (c = i - 1; c >= 0; c--) {
799 unlock_page(pages[c]);
800 page_cache_release(pages[c]);
801 }
802 return -ENOMEM;
798 } 803 }
799 wait_on_page_writeback(pages[i]); 804 wait_on_page_writeback(pages[i]);
800 } 805 }
@@ -946,6 +951,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
946 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 951 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
947 (sizeof(struct page *))); 952 (sizeof(struct page *)));
948 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 953 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
954 if (!pages) {
955 ret = -ENOMEM;
956 goto out;
957 }
949 958
950 /* generic_write_checks can change our pos */ 959 /* generic_write_checks can change our pos */
951 start_pos = pos; 960 start_pos = pos;
@@ -984,8 +993,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
984 size_t write_bytes = min(iov_iter_count(&i), 993 size_t write_bytes = min(iov_iter_count(&i),
985 nrptrs * (size_t)PAGE_CACHE_SIZE - 994 nrptrs * (size_t)PAGE_CACHE_SIZE -
986 offset); 995 offset);
987 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> 996 size_t num_pages = (write_bytes + offset +
988 PAGE_CACHE_SHIFT; 997 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
989 998
990 WARN_ON(num_pages > nrptrs); 999 WARN_ON(num_pages > nrptrs);
991 memset(pages, 0, sizeof(struct page *) * nrptrs); 1000 memset(pages, 0, sizeof(struct page *) * nrptrs);
@@ -1015,8 +1024,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1015 1024
1016 copied = btrfs_copy_from_user(pos, num_pages, 1025 copied = btrfs_copy_from_user(pos, num_pages,
1017 write_bytes, pages, &i); 1026 write_bytes, pages, &i);
1018 dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> 1027 dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
1019 PAGE_CACHE_SHIFT; 1028 PAGE_CACHE_SHIFT;
1020 1029
1021 if (num_pages > dirty_pages) { 1030 if (num_pages > dirty_pages) {
1022 if (copied > 0) 1031 if (copied > 0)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 60d684266959..a0390657451b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -987,11 +987,18 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
987 return entry; 987 return entry;
988} 988}
989 989
990static void unlink_free_space(struct btrfs_block_group_cache *block_group, 990static inline void
991 struct btrfs_free_space *info) 991__unlink_free_space(struct btrfs_block_group_cache *block_group,
992 struct btrfs_free_space *info)
992{ 993{
993 rb_erase(&info->offset_index, &block_group->free_space_offset); 994 rb_erase(&info->offset_index, &block_group->free_space_offset);
994 block_group->free_extents--; 995 block_group->free_extents--;
996}
997
998static void unlink_free_space(struct btrfs_block_group_cache *block_group,
999 struct btrfs_free_space *info)
1000{
1001 __unlink_free_space(block_group, info);
995 block_group->free_space -= info->bytes; 1002 block_group->free_space -= info->bytes;
996} 1003}
997 1004
@@ -1016,14 +1023,18 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1016 u64 max_bytes; 1023 u64 max_bytes;
1017 u64 bitmap_bytes; 1024 u64 bitmap_bytes;
1018 u64 extent_bytes; 1025 u64 extent_bytes;
1026 u64 size = block_group->key.offset;
1019 1027
1020 /* 1028 /*
1021 * The goal is to keep the total amount of memory used per 1gb of space 1029 * The goal is to keep the total amount of memory used per 1gb of space
1022 * at or below 32k, so we need to adjust how much memory we allow to be 1030 * at or below 32k, so we need to adjust how much memory we allow to be
1023 * used by extent based free space tracking 1031 * used by extent based free space tracking
1024 */ 1032 */
1025 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1033 if (size < 1024 * 1024 * 1024)
1026 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); 1034 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1035 else
1036 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1037 div64_u64(size, 1024 * 1024 * 1024);
1027 1038
1028 /* 1039 /*
1029 * we want to account for 1 more bitmap than what we have so we can make 1040 * we want to account for 1 more bitmap than what we have so we can make
@@ -1171,6 +1182,16 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
1171 recalculate_thresholds(block_group); 1182 recalculate_thresholds(block_group);
1172} 1183}
1173 1184
1185static void free_bitmap(struct btrfs_block_group_cache *block_group,
1186 struct btrfs_free_space *bitmap_info)
1187{
1188 unlink_free_space(block_group, bitmap_info);
1189 kfree(bitmap_info->bitmap);
1190 kfree(bitmap_info);
1191 block_group->total_bitmaps--;
1192 recalculate_thresholds(block_group);
1193}
1194
1174static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 1195static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
1175 struct btrfs_free_space *bitmap_info, 1196 struct btrfs_free_space *bitmap_info,
1176 u64 *offset, u64 *bytes) 1197 u64 *offset, u64 *bytes)
@@ -1195,6 +1216,7 @@ again:
1195 */ 1216 */
1196 search_start = *offset; 1217 search_start = *offset;
1197 search_bytes = *bytes; 1218 search_bytes = *bytes;
1219 search_bytes = min(search_bytes, end - search_start + 1);
1198 ret = search_bitmap(block_group, bitmap_info, &search_start, 1220 ret = search_bitmap(block_group, bitmap_info, &search_start,
1199 &search_bytes); 1221 &search_bytes);
1200 BUG_ON(ret < 0 || search_start != *offset); 1222 BUG_ON(ret < 0 || search_start != *offset);
@@ -1211,13 +1233,8 @@ again:
1211 1233
1212 if (*bytes) { 1234 if (*bytes) {
1213 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1235 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1214 if (!bitmap_info->bytes) { 1236 if (!bitmap_info->bytes)
1215 unlink_free_space(block_group, bitmap_info); 1237 free_bitmap(block_group, bitmap_info);
1216 kfree(bitmap_info->bitmap);
1217 kfree(bitmap_info);
1218 block_group->total_bitmaps--;
1219 recalculate_thresholds(block_group);
1220 }
1221 1238
1222 /* 1239 /*
1223 * no entry after this bitmap, but we still have bytes to 1240 * no entry after this bitmap, but we still have bytes to
@@ -1250,13 +1267,8 @@ again:
1250 return -EAGAIN; 1267 return -EAGAIN;
1251 1268
1252 goto again; 1269 goto again;
1253 } else if (!bitmap_info->bytes) { 1270 } else if (!bitmap_info->bytes)
1254 unlink_free_space(block_group, bitmap_info); 1271 free_bitmap(block_group, bitmap_info);
1255 kfree(bitmap_info->bitmap);
1256 kfree(bitmap_info);
1257 block_group->total_bitmaps--;
1258 recalculate_thresholds(block_group);
1259 }
1260 1272
1261 return 0; 1273 return 0;
1262} 1274}
@@ -1359,22 +1371,14 @@ out:
1359 return ret; 1371 return ret;
1360} 1372}
1361 1373
1362int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 1374bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1363 u64 offset, u64 bytes) 1375 struct btrfs_free_space *info, bool update_stat)
1364{ 1376{
1365 struct btrfs_free_space *right_info = NULL; 1377 struct btrfs_free_space *left_info;
1366 struct btrfs_free_space *left_info = NULL; 1378 struct btrfs_free_space *right_info;
1367 struct btrfs_free_space *info = NULL; 1379 bool merged = false;
1368 int ret = 0; 1380 u64 offset = info->offset;
1369 1381 u64 bytes = info->bytes;
1370 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
1371 if (!info)
1372 return -ENOMEM;
1373
1374 info->offset = offset;
1375 info->bytes = bytes;
1376
1377 spin_lock(&block_group->tree_lock);
1378 1382
1379 /* 1383 /*
1380 * first we want to see if there is free space adjacent to the range we 1384 * first we want to see if there is free space adjacent to the range we
@@ -1388,37 +1392,62 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1388 else 1392 else
1389 left_info = tree_search_offset(block_group, offset - 1, 0, 0); 1393 left_info = tree_search_offset(block_group, offset - 1, 0, 0);
1390 1394
1391 /*
1392 * If there was no extent directly to the left or right of this new
1393 * extent then we know we're going to have to allocate a new extent, so
1394 * before we do that see if we need to drop this into a bitmap
1395 */
1396 if ((!left_info || left_info->bitmap) &&
1397 (!right_info || right_info->bitmap)) {
1398 ret = insert_into_bitmap(block_group, info);
1399
1400 if (ret < 0) {
1401 goto out;
1402 } else if (ret) {
1403 ret = 0;
1404 goto out;
1405 }
1406 }
1407
1408 if (right_info && !right_info->bitmap) { 1395 if (right_info && !right_info->bitmap) {
1409 unlink_free_space(block_group, right_info); 1396 if (update_stat)
1397 unlink_free_space(block_group, right_info);
1398 else
1399 __unlink_free_space(block_group, right_info);
1410 info->bytes += right_info->bytes; 1400 info->bytes += right_info->bytes;
1411 kfree(right_info); 1401 kfree(right_info);
1402 merged = true;
1412 } 1403 }
1413 1404
1414 if (left_info && !left_info->bitmap && 1405 if (left_info && !left_info->bitmap &&
1415 left_info->offset + left_info->bytes == offset) { 1406 left_info->offset + left_info->bytes == offset) {
1416 unlink_free_space(block_group, left_info); 1407 if (update_stat)
1408 unlink_free_space(block_group, left_info);
1409 else
1410 __unlink_free_space(block_group, left_info);
1417 info->offset = left_info->offset; 1411 info->offset = left_info->offset;
1418 info->bytes += left_info->bytes; 1412 info->bytes += left_info->bytes;
1419 kfree(left_info); 1413 kfree(left_info);
1414 merged = true;
1420 } 1415 }
1421 1416
1417 return merged;
1418}
1419
1420int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1421 u64 offset, u64 bytes)
1422{
1423 struct btrfs_free_space *info;
1424 int ret = 0;
1425
1426 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
1427 if (!info)
1428 return -ENOMEM;
1429
1430 info->offset = offset;
1431 info->bytes = bytes;
1432
1433 spin_lock(&block_group->tree_lock);
1434
1435 if (try_merge_free_space(block_group, info, true))
1436 goto link;
1437
1438 /*
1439 * There was no extent directly to the left or right of this new
1440 * extent then we know we're going to have to allocate a new extent, so
1441 * before we do that see if we need to drop this into a bitmap
1442 */
1443 ret = insert_into_bitmap(block_group, info);
1444 if (ret < 0) {
1445 goto out;
1446 } else if (ret) {
1447 ret = 0;
1448 goto out;
1449 }
1450link:
1422 ret = link_free_space(block_group, info); 1451 ret = link_free_space(block_group, info);
1423 if (ret) 1452 if (ret)
1424 kfree(info); 1453 kfree(info);
@@ -1621,6 +1650,7 @@ __btrfs_return_cluster_to_free_space(
1621 node = rb_next(&entry->offset_index); 1650 node = rb_next(&entry->offset_index);
1622 rb_erase(&entry->offset_index, &cluster->root); 1651 rb_erase(&entry->offset_index, &cluster->root);
1623 BUG_ON(entry->bitmap); 1652 BUG_ON(entry->bitmap);
1653 try_merge_free_space(block_group, entry, false);
1624 tree_insert_offset(&block_group->free_space_offset, 1654 tree_insert_offset(&block_group->free_space_offset,
1625 entry->offset, &entry->offset_index, 0); 1655 entry->offset, &entry->offset_index, 0);
1626 } 1656 }
@@ -1685,13 +1715,8 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1685 ret = offset; 1715 ret = offset;
1686 if (entry->bitmap) { 1716 if (entry->bitmap) {
1687 bitmap_clear_bits(block_group, entry, offset, bytes); 1717 bitmap_clear_bits(block_group, entry, offset, bytes);
1688 if (!entry->bytes) { 1718 if (!entry->bytes)
1689 unlink_free_space(block_group, entry); 1719 free_bitmap(block_group, entry);
1690 kfree(entry->bitmap);
1691 kfree(entry);
1692 block_group->total_bitmaps--;
1693 recalculate_thresholds(block_group);
1694 }
1695 } else { 1720 } else {
1696 unlink_free_space(block_group, entry); 1721 unlink_free_space(block_group, entry);
1697 entry->offset += bytes; 1722 entry->offset += bytes;
@@ -1789,6 +1814,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1789 1814
1790 ret = search_start; 1815 ret = search_start;
1791 bitmap_clear_bits(block_group, entry, ret, bytes); 1816 bitmap_clear_bits(block_group, entry, ret, bytes);
1817 if (entry->bytes == 0)
1818 free_bitmap(block_group, entry);
1792out: 1819out:
1793 spin_unlock(&cluster->lock); 1820 spin_unlock(&cluster->lock);
1794 spin_unlock(&block_group->tree_lock); 1821 spin_unlock(&block_group->tree_lock);
@@ -1842,15 +1869,26 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1842 entry->offset += bytes; 1869 entry->offset += bytes;
1843 entry->bytes -= bytes; 1870 entry->bytes -= bytes;
1844 1871
1845 if (entry->bytes == 0) { 1872 if (entry->bytes == 0)
1846 rb_erase(&entry->offset_index, &cluster->root); 1873 rb_erase(&entry->offset_index, &cluster->root);
1847 kfree(entry);
1848 }
1849 break; 1874 break;
1850 } 1875 }
1851out: 1876out:
1852 spin_unlock(&cluster->lock); 1877 spin_unlock(&cluster->lock);
1853 1878
1879 if (!ret)
1880 return 0;
1881
1882 spin_lock(&block_group->tree_lock);
1883
1884 block_group->free_space -= bytes;
1885 if (entry->bytes == 0) {
1886 block_group->free_extents--;
1887 kfree(entry);
1888 }
1889
1890 spin_unlock(&block_group->tree_lock);
1891
1854 return ret; 1892 return ret;
1855} 1893}
1856 1894
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 160b55b3e132..0efdb65953c5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -416,7 +416,7 @@ again:
416 } 416 }
417 if (start == 0) { 417 if (start == 0) {
418 trans = btrfs_join_transaction(root, 1); 418 trans = btrfs_join_transaction(root, 1);
419 BUG_ON(!trans); 419 BUG_ON(IS_ERR(trans));
420 btrfs_set_trans_block_group(trans, inode); 420 btrfs_set_trans_block_group(trans, inode);
421 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 421 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
422 422
@@ -612,6 +612,7 @@ retry:
612 GFP_NOFS); 612 GFP_NOFS);
613 613
614 trans = btrfs_join_transaction(root, 1); 614 trans = btrfs_join_transaction(root, 1);
615 BUG_ON(IS_ERR(trans));
615 ret = btrfs_reserve_extent(trans, root, 616 ret = btrfs_reserve_extent(trans, root,
616 async_extent->compressed_size, 617 async_extent->compressed_size,
617 async_extent->compressed_size, 618 async_extent->compressed_size,
@@ -643,6 +644,7 @@ retry:
643 async_extent->ram_size - 1, 0); 644 async_extent->ram_size - 1, 0);
644 645
645 em = alloc_extent_map(GFP_NOFS); 646 em = alloc_extent_map(GFP_NOFS);
647 BUG_ON(!em);
646 em->start = async_extent->start; 648 em->start = async_extent->start;
647 em->len = async_extent->ram_size; 649 em->len = async_extent->ram_size;
648 em->orig_start = em->start; 650 em->orig_start = em->start;
@@ -771,7 +773,7 @@ static noinline int cow_file_range(struct inode *inode,
771 773
772 BUG_ON(root == root->fs_info->tree_root); 774 BUG_ON(root == root->fs_info->tree_root);
773 trans = btrfs_join_transaction(root, 1); 775 trans = btrfs_join_transaction(root, 1);
774 BUG_ON(!trans); 776 BUG_ON(IS_ERR(trans));
775 btrfs_set_trans_block_group(trans, inode); 777 btrfs_set_trans_block_group(trans, inode);
776 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 778 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
777 779
@@ -819,6 +821,7 @@ static noinline int cow_file_range(struct inode *inode,
819 BUG_ON(ret); 821 BUG_ON(ret);
820 822
821 em = alloc_extent_map(GFP_NOFS); 823 em = alloc_extent_map(GFP_NOFS);
824 BUG_ON(!em);
822 em->start = start; 825 em->start = start;
823 em->orig_start = em->start; 826 em->orig_start = em->start;
824 ram_size = ins.offset; 827 ram_size = ins.offset;
@@ -1049,7 +1052,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1049 } else { 1052 } else {
1050 trans = btrfs_join_transaction(root, 1); 1053 trans = btrfs_join_transaction(root, 1);
1051 } 1054 }
1052 BUG_ON(!trans); 1055 BUG_ON(IS_ERR(trans));
1053 1056
1054 cow_start = (u64)-1; 1057 cow_start = (u64)-1;
1055 cur_offset = start; 1058 cur_offset = start;
@@ -1168,6 +1171,7 @@ out_check:
1168 struct extent_map_tree *em_tree; 1171 struct extent_map_tree *em_tree;
1169 em_tree = &BTRFS_I(inode)->extent_tree; 1172 em_tree = &BTRFS_I(inode)->extent_tree;
1170 em = alloc_extent_map(GFP_NOFS); 1173 em = alloc_extent_map(GFP_NOFS);
1174 BUG_ON(!em);
1171 em->start = cur_offset; 1175 em->start = cur_offset;
1172 em->orig_start = em->start; 1176 em->orig_start = em->start;
1173 em->len = num_bytes; 1177 em->len = num_bytes;
@@ -1557,6 +1561,7 @@ out:
1557out_page: 1561out_page:
1558 unlock_page(page); 1562 unlock_page(page);
1559 page_cache_release(page); 1563 page_cache_release(page);
1564 kfree(fixup);
1560} 1565}
1561 1566
1562/* 1567/*
@@ -1703,7 +1708,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1703 trans = btrfs_join_transaction_nolock(root, 1); 1708 trans = btrfs_join_transaction_nolock(root, 1);
1704 else 1709 else
1705 trans = btrfs_join_transaction(root, 1); 1710 trans = btrfs_join_transaction(root, 1);
1706 BUG_ON(!trans); 1711 BUG_ON(IS_ERR(trans));
1707 btrfs_set_trans_block_group(trans, inode); 1712 btrfs_set_trans_block_group(trans, inode);
1708 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1713 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1709 ret = btrfs_update_inode(trans, root, inode); 1714 ret = btrfs_update_inode(trans, root, inode);
@@ -1720,6 +1725,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1720 trans = btrfs_join_transaction_nolock(root, 1); 1725 trans = btrfs_join_transaction_nolock(root, 1);
1721 else 1726 else
1722 trans = btrfs_join_transaction(root, 1); 1727 trans = btrfs_join_transaction(root, 1);
1728 BUG_ON(IS_ERR(trans));
1723 btrfs_set_trans_block_group(trans, inode); 1729 btrfs_set_trans_block_group(trans, inode);
1724 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1730 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1725 1731
@@ -1907,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
1907 1913
1908 private = 0; 1914 private = 0;
1909 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, 1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1910 (u64)-1, 1, EXTENT_DIRTY)) { 1916 (u64)-1, 1, EXTENT_DIRTY, 0)) {
1911 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, 1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
1912 start, &private_failure); 1918 start, &private_failure);
1913 if (ret == 0) { 1919 if (ret == 0) {
@@ -2354,6 +2360,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2354 */ 2360 */
2355 if (is_bad_inode(inode)) { 2361 if (is_bad_inode(inode)) {
2356 trans = btrfs_start_transaction(root, 0); 2362 trans = btrfs_start_transaction(root, 0);
2363 BUG_ON(IS_ERR(trans));
2357 btrfs_orphan_del(trans, inode); 2364 btrfs_orphan_del(trans, inode);
2358 btrfs_end_transaction(trans, root); 2365 btrfs_end_transaction(trans, root);
2359 iput(inode); 2366 iput(inode);
@@ -2381,6 +2388,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2381 2388
2382 if (root->orphan_block_rsv || root->orphan_item_inserted) { 2389 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2383 trans = btrfs_join_transaction(root, 1); 2390 trans = btrfs_join_transaction(root, 1);
2391 BUG_ON(IS_ERR(trans));
2384 btrfs_end_transaction(trans, root); 2392 btrfs_end_transaction(trans, root);
2385 } 2393 }
2386 2394
@@ -2641,7 +2649,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2641 path = btrfs_alloc_path(); 2649 path = btrfs_alloc_path();
2642 if (!path) { 2650 if (!path) {
2643 ret = -ENOMEM; 2651 ret = -ENOMEM;
2644 goto err; 2652 goto out;
2645 } 2653 }
2646 2654
2647 path->leave_spinning = 1; 2655 path->leave_spinning = 1;
@@ -2714,9 +2722,10 @@ static int check_path_shared(struct btrfs_root *root,
2714 struct extent_buffer *eb; 2722 struct extent_buffer *eb;
2715 int level; 2723 int level;
2716 u64 refs = 1; 2724 u64 refs = 1;
2717 int uninitialized_var(ret);
2718 2725
2719 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2726 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2727 int ret;
2728
2720 if (!path->nodes[level]) 2729 if (!path->nodes[level])
2721 break; 2730 break;
2722 eb = path->nodes[level]; 2731 eb = path->nodes[level];
@@ -2727,7 +2736,7 @@ static int check_path_shared(struct btrfs_root *root,
2727 if (refs > 1) 2736 if (refs > 1)
2728 return 1; 2737 return 1;
2729 } 2738 }
2730 return ret; /* XXX callers? */ 2739 return 0;
2731} 2740}
2732 2741
2733/* 2742/*
@@ -4134,7 +4143,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4134 } 4143 }
4135 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 4144 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
4136 4145
4137 if (root != sub_root) { 4146 if (!IS_ERR(inode) && root != sub_root) {
4138 down_read(&root->fs_info->cleanup_work_sem); 4147 down_read(&root->fs_info->cleanup_work_sem);
4139 if (!(inode->i_sb->s_flags & MS_RDONLY)) 4148 if (!(inode->i_sb->s_flags & MS_RDONLY))
4140 btrfs_orphan_cleanup(sub_root); 4149 btrfs_orphan_cleanup(sub_root);
@@ -4347,6 +4356,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4347 trans = btrfs_join_transaction_nolock(root, 1); 4356 trans = btrfs_join_transaction_nolock(root, 1);
4348 else 4357 else
4349 trans = btrfs_join_transaction(root, 1); 4358 trans = btrfs_join_transaction(root, 1);
4359 if (IS_ERR(trans))
4360 return PTR_ERR(trans);
4350 btrfs_set_trans_block_group(trans, inode); 4361 btrfs_set_trans_block_group(trans, inode);
4351 if (nolock) 4362 if (nolock)
4352 ret = btrfs_end_transaction_nolock(trans, root); 4363 ret = btrfs_end_transaction_nolock(trans, root);
@@ -4372,6 +4383,7 @@ void btrfs_dirty_inode(struct inode *inode)
4372 return; 4383 return;
4373 4384
4374 trans = btrfs_join_transaction(root, 1); 4385 trans = btrfs_join_transaction(root, 1);
4386 BUG_ON(IS_ERR(trans));
4375 btrfs_set_trans_block_group(trans, inode); 4387 btrfs_set_trans_block_group(trans, inode);
4376 4388
4377 ret = btrfs_update_inode(trans, root, inode); 4389 ret = btrfs_update_inode(trans, root, inode);
@@ -5176,6 +5188,8 @@ again:
5176 em = NULL; 5188 em = NULL;
5177 btrfs_release_path(root, path); 5189 btrfs_release_path(root, path);
5178 trans = btrfs_join_transaction(root, 1); 5190 trans = btrfs_join_transaction(root, 1);
5191 if (IS_ERR(trans))
5192 return ERR_CAST(trans);
5179 goto again; 5193 goto again;
5180 } 5194 }
5181 map = kmap(page); 5195 map = kmap(page);
@@ -5266,6 +5280,128 @@ out:
5266 return em; 5280 return em;
5267} 5281}
5268 5282
5283struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
5284 size_t pg_offset, u64 start, u64 len,
5285 int create)
5286{
5287 struct extent_map *em;
5288 struct extent_map *hole_em = NULL;
5289 u64 range_start = start;
5290 u64 end;
5291 u64 found;
5292 u64 found_end;
5293 int err = 0;
5294
5295 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
5296 if (IS_ERR(em))
5297 return em;
5298 if (em) {
5299 /*
5300 * if our em maps to a hole, there might
5301 * actually be delalloc bytes behind it
5302 */
5303 if (em->block_start != EXTENT_MAP_HOLE)
5304 return em;
5305 else
5306 hole_em = em;
5307 }
5308
5309 /* check to see if we've wrapped (len == -1 or similar) */
5310 end = start + len;
5311 if (end < start)
5312 end = (u64)-1;
5313 else
5314 end -= 1;
5315
5316 em = NULL;
5317
5318 /* ok, we didn't find anything, lets look for delalloc */
5319 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
5320 end, len, EXTENT_DELALLOC, 1);
5321 found_end = range_start + found;
5322 if (found_end < range_start)
5323 found_end = (u64)-1;
5324
5325 /*
5326 * we didn't find anything useful, return
5327 * the original results from get_extent()
5328 */
5329 if (range_start > end || found_end <= start) {
5330 em = hole_em;
5331 hole_em = NULL;
5332 goto out;
5333 }
5334
5335 /* adjust the range_start to make sure it doesn't
5336 * go backwards from the start they passed in
5337 */
5338 range_start = max(start,range_start);
5339 found = found_end - range_start;
5340
5341 if (found > 0) {
5342 u64 hole_start = start;
5343 u64 hole_len = len;
5344
5345 em = alloc_extent_map(GFP_NOFS);
5346 if (!em) {
5347 err = -ENOMEM;
5348 goto out;
5349 }
5350 /*
5351 * when btrfs_get_extent can't find anything it
5352 * returns one huge hole
5353 *
5354 * make sure what it found really fits our range, and
5355 * adjust to make sure it is based on the start from
5356 * the caller
5357 */
5358 if (hole_em) {
5359 u64 calc_end = extent_map_end(hole_em);
5360
5361 if (calc_end <= start || (hole_em->start > end)) {
5362 free_extent_map(hole_em);
5363 hole_em = NULL;
5364 } else {
5365 hole_start = max(hole_em->start, start);
5366 hole_len = calc_end - hole_start;
5367 }
5368 }
5369 em->bdev = NULL;
5370 if (hole_em && range_start > hole_start) {
5371 /* our hole starts before our delalloc, so we
5372 * have to return just the parts of the hole
5373 * that go until the delalloc starts
5374 */
5375 em->len = min(hole_len,
5376 range_start - hole_start);
5377 em->start = hole_start;
5378 em->orig_start = hole_start;
5379 /*
5380 * don't adjust block start at all,
5381 * it is fixed at EXTENT_MAP_HOLE
5382 */
5383 em->block_start = hole_em->block_start;
5384 em->block_len = hole_len;
5385 } else {
5386 em->start = range_start;
5387 em->len = found;
5388 em->orig_start = range_start;
5389 em->block_start = EXTENT_MAP_DELALLOC;
5390 em->block_len = found;
5391 }
5392 } else if (hole_em) {
5393 return hole_em;
5394 }
5395out:
5396
5397 free_extent_map(hole_em);
5398 if (err) {
5399 free_extent_map(em);
5400 return ERR_PTR(err);
5401 }
5402 return em;
5403}
5404
5269static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5405static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5270 u64 start, u64 len) 5406 u64 start, u64 len)
5271{ 5407{
@@ -5280,8 +5416,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5280 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5416 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5281 5417
5282 trans = btrfs_join_transaction(root, 0); 5418 trans = btrfs_join_transaction(root, 0);
5283 if (!trans) 5419 if (IS_ERR(trans))
5284 return ERR_PTR(-ENOMEM); 5420 return ERR_CAST(trans);
5285 5421
5286 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 5422 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5287 5423
@@ -5505,7 +5641,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5505 * while we look for nocow cross refs 5641 * while we look for nocow cross refs
5506 */ 5642 */
5507 trans = btrfs_join_transaction(root, 0); 5643 trans = btrfs_join_transaction(root, 0);
5508 if (!trans) 5644 if (IS_ERR(trans))
5509 goto must_cow; 5645 goto must_cow;
5510 5646
5511 if (can_nocow_odirect(trans, inode, start, len) == 1) { 5647 if (can_nocow_odirect(trans, inode, start, len) == 1) {
@@ -5640,7 +5776,7 @@ again:
5640 BUG_ON(!ordered); 5776 BUG_ON(!ordered);
5641 5777
5642 trans = btrfs_join_transaction(root, 1); 5778 trans = btrfs_join_transaction(root, 1);
5643 if (!trans) { 5779 if (IS_ERR(trans)) {
5644 err = -ENOMEM; 5780 err = -ENOMEM;
5645 goto out; 5781 goto out;
5646 } 5782 }
@@ -6088,7 +6224,7 @@ out:
6088static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6224static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
6089 __u64 start, __u64 len) 6225 __u64 start, __u64 len)
6090{ 6226{
6091 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); 6227 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
6092} 6228}
6093 6229
6094int btrfs_readpage(struct file *file, struct page *page) 6230int btrfs_readpage(struct file *file, struct page *page)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a506a22b522a..5fdb2abc4fa7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -203,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
203 203
204 204
205 trans = btrfs_join_transaction(root, 1); 205 trans = btrfs_join_transaction(root, 1);
206 BUG_ON(!trans); 206 BUG_ON(IS_ERR(trans));
207 207
208 ret = btrfs_update_inode(trans, root, inode); 208 ret = btrfs_update_inode(trans, root, inode);
209 BUG_ON(ret); 209 BUG_ON(ret);
@@ -907,6 +907,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
907 907
908 if (new_size > old_size) { 908 if (new_size > old_size) {
909 trans = btrfs_start_transaction(root, 0); 909 trans = btrfs_start_transaction(root, 0);
910 if (IS_ERR(trans)) {
911 ret = PTR_ERR(trans);
912 goto out_unlock;
913 }
910 ret = btrfs_grow_device(trans, device, new_size); 914 ret = btrfs_grow_device(trans, device, new_size);
911 btrfs_commit_transaction(trans, root); 915 btrfs_commit_transaction(trans, root);
912 } else { 916 } else {
@@ -1067,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1067 if (copy_from_user(&flags, arg, sizeof(flags))) 1071 if (copy_from_user(&flags, arg, sizeof(flags)))
1068 return -EFAULT; 1072 return -EFAULT;
1069 1073
1070 if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) 1074 if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
1071 return -EINVAL; 1075 return -EINVAL;
1072 1076
1073 if (flags & ~BTRFS_SUBVOL_RDONLY) 1077 if (flags & ~BTRFS_SUBVOL_RDONLY)
1074 return -EOPNOTSUPP; 1078 return -EOPNOTSUPP;
1075 1079
1080 if (!is_owner_or_cap(inode))
1081 return -EACCES;
1082
1076 down_write(&root->fs_info->subvol_sem); 1083 down_write(&root->fs_info->subvol_sem);
1077 1084
1078 /* nothing to do */ 1085 /* nothing to do */
@@ -1093,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1093 goto out_reset; 1100 goto out_reset;
1094 } 1101 }
1095 1102
1096 ret = btrfs_update_root(trans, root, 1103 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1097 &root->root_key, &root->root_item); 1104 &root->root_key, &root->root_item);
1098 1105
1099 btrfs_commit_transaction(trans, root); 1106 btrfs_commit_transaction(trans, root);
@@ -1898,7 +1905,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1898 1905
1899 memcpy(&new_key, &key, sizeof(new_key)); 1906 memcpy(&new_key, &key, sizeof(new_key));
1900 new_key.objectid = inode->i_ino; 1907 new_key.objectid = inode->i_ino;
1901 new_key.offset = key.offset + destoff - off; 1908 if (off <= key.offset)
1909 new_key.offset = key.offset + destoff - off;
1910 else
1911 new_key.offset = destoff;
1902 1912
1903 trans = btrfs_start_transaction(root, 1); 1913 trans = btrfs_start_transaction(root, 1);
1904 if (IS_ERR(trans)) { 1914 if (IS_ERR(trans)) {
@@ -2082,7 +2092,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2082 2092
2083 ret = -ENOMEM; 2093 ret = -ENOMEM;
2084 trans = btrfs_start_ioctl_transaction(root, 0); 2094 trans = btrfs_start_ioctl_transaction(root, 0);
2085 if (!trans) 2095 if (IS_ERR(trans))
2086 goto out_drop; 2096 goto out_drop;
2087 2097
2088 file->private_data = trans; 2098 file->private_data = trans;
@@ -2138,9 +2148,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
2138 path->leave_spinning = 1; 2148 path->leave_spinning = 1;
2139 2149
2140 trans = btrfs_start_transaction(root, 1); 2150 trans = btrfs_start_transaction(root, 1);
2141 if (!trans) { 2151 if (IS_ERR(trans)) {
2142 btrfs_free_path(path); 2152 btrfs_free_path(path);
2143 return -ENOMEM; 2153 return PTR_ERR(trans);
2144 } 2154 }
2145 2155
2146 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 2156 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
@@ -2201,7 +2211,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2201 int num_types = 4; 2211 int num_types = 4;
2202 int alloc_size; 2212 int alloc_size;
2203 int ret = 0; 2213 int ret = 0;
2204 int slot_count = 0; 2214 u64 slot_count = 0;
2205 int i, c; 2215 int i, c;
2206 2216
2207 if (copy_from_user(&space_args, 2217 if (copy_from_user(&space_args,
@@ -2240,7 +2250,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2240 goto out; 2250 goto out;
2241 } 2251 }
2242 2252
2243 slot_count = min_t(int, space_args.space_slots, slot_count); 2253 slot_count = min_t(u64, space_args.space_slots, slot_count);
2244 2254
2245 alloc_size = sizeof(*dest) * slot_count; 2255 alloc_size = sizeof(*dest) * slot_count;
2246 2256
@@ -2260,6 +2270,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2260 for (i = 0; i < num_types; i++) { 2270 for (i = 0; i < num_types; i++) {
2261 struct btrfs_space_info *tmp; 2271 struct btrfs_space_info *tmp;
2262 2272
2273 if (!slot_count)
2274 break;
2275
2263 info = NULL; 2276 info = NULL;
2264 rcu_read_lock(); 2277 rcu_read_lock();
2265 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 2278 list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
@@ -2281,7 +2294,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2281 memcpy(dest, &space, sizeof(space)); 2294 memcpy(dest, &space, sizeof(space));
2282 dest++; 2295 dest++;
2283 space_args.total_spaces++; 2296 space_args.total_spaces++;
2297 slot_count--;
2284 } 2298 }
2299 if (!slot_count)
2300 break;
2285 } 2301 }
2286 up_read(&info->groups_sem); 2302 up_read(&info->groups_sem);
2287 } 2303 }
@@ -2334,6 +2350,8 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
2334 u64 transid; 2350 u64 transid;
2335 2351
2336 trans = btrfs_start_transaction(root, 0); 2352 trans = btrfs_start_transaction(root, 0);
2353 if (IS_ERR(trans))
2354 return PTR_ERR(trans);
2337 transid = trans->transid; 2355 transid = trans->transid;
2338 btrfs_commit_transaction_async(trans, root, 0); 2356 btrfs_commit_transaction_async(trans, root, 0);
2339 2357
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
280 unsigned long tot_out; 280 unsigned long tot_out;
281 unsigned long tot_len; 281 unsigned long tot_len;
282 char *buf; 282 char *buf;
283 bool may_late_unmap, need_unmap;
283 284
284 data_in = kmap(pages_in[0]); 285 data_in = kmap(pages_in[0]);
285 tot_len = read_compress_length(data_in); 286 tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
300 301
301 tot_in += in_len; 302 tot_in += in_len;
302 working_bytes = in_len; 303 working_bytes = in_len;
304 may_late_unmap = need_unmap = false;
303 305
304 /* fast path: avoid using the working buffer */ 306 /* fast path: avoid using the working buffer */
305 if (in_page_bytes_left >= in_len) { 307 if (in_page_bytes_left >= in_len) {
306 buf = data_in + in_offset; 308 buf = data_in + in_offset;
307 bytes = in_len; 309 bytes = in_len;
310 may_late_unmap = true;
308 goto cont; 311 goto cont;
309 } 312 }
310 313
@@ -329,14 +332,17 @@ cont:
329 if (working_bytes == 0 && tot_in >= tot_len) 332 if (working_bytes == 0 && tot_in >= tot_len)
330 break; 333 break;
331 334
332 kunmap(pages_in[page_in_index]); 335 if (page_in_index + 1 >= total_pages_in) {
333 page_in_index++;
334 if (page_in_index >= total_pages_in) {
335 ret = -1; 336 ret = -1;
336 data_in = NULL;
337 goto done; 337 goto done;
338 } 338 }
339 data_in = kmap(pages_in[page_in_index]); 339
340 if (may_late_unmap)
341 need_unmap = true;
342 else
343 kunmap(pages_in[page_in_index]);
344
345 data_in = kmap(pages_in[++page_in_index]);
340 346
341 in_page_bytes_left = PAGE_CACHE_SIZE; 347 in_page_bytes_left = PAGE_CACHE_SIZE;
342 in_offset = 0; 348 in_offset = 0;
@@ -346,6 +352,8 @@ cont:
346 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); 352 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
347 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, 353 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
348 &out_len); 354 &out_len);
355 if (need_unmap)
356 kunmap(pages_in[page_in_index - 1]);
349 if (ret != LZO_E_OK) { 357 if (ret != LZO_E_OK) {
350 printk(KERN_WARNING "btrfs decompress failed\n"); 358 printk(KERN_WARNING "btrfs decompress failed\n");
351 ret = -1; 359 ret = -1;
@@ -363,8 +371,7 @@ cont:
363 break; 371 break;
364 } 372 }
365done: 373done:
366 if (data_in) 374 kunmap(pages_in[page_in_index]);
367 kunmap(pages_in[page_in_index]);
368 return ret; 375 return ret;
369} 376}
370 377
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2b61e1ddcd99..083a55477375 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
141 u64 file_offset) 141 u64 file_offset)
142{ 142{
143 struct rb_root *root = &tree->tree; 143 struct rb_root *root = &tree->tree;
144 struct rb_node *prev; 144 struct rb_node *prev = NULL;
145 struct rb_node *ret; 145 struct rb_node *ret;
146 struct btrfs_ordered_extent *entry; 146 struct btrfs_ordered_extent *entry;
147 147
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 0d126be22b63..fb2605d998e9 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -260,6 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
260#else 260#else
261 BUG(); 261 BUG();
262#endif 262#endif
263 break;
263 case BTRFS_BLOCK_GROUP_ITEM_KEY: 264 case BTRFS_BLOCK_GROUP_ITEM_KEY:
264 bi = btrfs_item_ptr(l, i, 265 bi = btrfs_item_ptr(l, i,
265 struct btrfs_block_group_item); 266 struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 045c9c2b2d7e..31ade5802ae8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
1157 new_node->bytenr = dest->node->start; 1157 new_node->bytenr = dest->node->start;
1158 new_node->level = node->level; 1158 new_node->level = node->level;
1159 new_node->lowest = node->lowest; 1159 new_node->lowest = node->lowest;
1160 new_node->checked = 1;
1160 new_node->root = dest; 1161 new_node->root = dest;
1161 1162
1162 if (!node->lowest) { 1163 if (!node->lowest) {
@@ -2028,6 +2029,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
2028 2029
2029 while (1) { 2030 while (1) {
2030 trans = btrfs_start_transaction(root, 0); 2031 trans = btrfs_start_transaction(root, 0);
2032 BUG_ON(IS_ERR(trans));
2031 trans->block_rsv = rc->block_rsv; 2033 trans->block_rsv = rc->block_rsv;
2032 2034
2033 ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, 2035 ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
@@ -2147,6 +2149,12 @@ again:
2147 } 2149 }
2148 2150
2149 trans = btrfs_join_transaction(rc->extent_root, 1); 2151 trans = btrfs_join_transaction(rc->extent_root, 1);
2152 if (IS_ERR(trans)) {
2153 if (!err)
2154 btrfs_block_rsv_release(rc->extent_root,
2155 rc->block_rsv, num_bytes);
2156 return PTR_ERR(trans);
2157 }
2150 2158
2151 if (!err) { 2159 if (!err) {
2152 if (num_bytes != rc->merging_rsv_size) { 2160 if (num_bytes != rc->merging_rsv_size) {
@@ -3222,6 +3230,7 @@ truncate:
3222 trans = btrfs_join_transaction(root, 0); 3230 trans = btrfs_join_transaction(root, 0);
3223 if (IS_ERR(trans)) { 3231 if (IS_ERR(trans)) {
3224 btrfs_free_path(path); 3232 btrfs_free_path(path);
3233 ret = PTR_ERR(trans);
3225 goto out; 3234 goto out;
3226 } 3235 }
3227 3236
@@ -3628,6 +3637,7 @@ int prepare_to_relocate(struct reloc_control *rc)
3628 set_reloc_control(rc); 3637 set_reloc_control(rc);
3629 3638
3630 trans = btrfs_join_transaction(rc->extent_root, 1); 3639 trans = btrfs_join_transaction(rc->extent_root, 1);
3640 BUG_ON(IS_ERR(trans));
3631 btrfs_commit_transaction(trans, rc->extent_root); 3641 btrfs_commit_transaction(trans, rc->extent_root);
3632 return 0; 3642 return 0;
3633} 3643}
@@ -3644,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3644 u32 item_size; 3654 u32 item_size;
3645 int ret; 3655 int ret;
3646 int err = 0; 3656 int err = 0;
3657 int progress = 0;
3647 3658
3648 path = btrfs_alloc_path(); 3659 path = btrfs_alloc_path();
3649 if (!path) 3660 if (!path)
@@ -3656,8 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3656 } 3667 }
3657 3668
3658 while (1) { 3669 while (1) {
3670 progress++;
3659 trans = btrfs_start_transaction(rc->extent_root, 0); 3671 trans = btrfs_start_transaction(rc->extent_root, 0);
3660 3672 BUG_ON(IS_ERR(trans));
3673restart:
3661 if (update_backref_cache(trans, &rc->backref_cache)) { 3674 if (update_backref_cache(trans, &rc->backref_cache)) {
3662 btrfs_end_transaction(trans, rc->extent_root); 3675 btrfs_end_transaction(trans, rc->extent_root);
3663 continue; 3676 continue;
@@ -3770,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3770 } 3783 }
3771 } 3784 }
3772 } 3785 }
3786 if (trans && progress && err == -ENOSPC) {
3787 ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
3788 rc->block_group->flags);
3789 if (ret == 0) {
3790 err = 0;
3791 progress = 0;
3792 goto restart;
3793 }
3794 }
3773 3795
3774 btrfs_release_path(rc->extent_root, path); 3796 btrfs_release_path(rc->extent_root, path);
3775 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3797 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
@@ -3804,7 +3826,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3804 3826
3805 /* get rid of pinned extents */ 3827 /* get rid of pinned extents */
3806 trans = btrfs_join_transaction(rc->extent_root, 1); 3828 trans = btrfs_join_transaction(rc->extent_root, 1);
3807 btrfs_commit_transaction(trans, rc->extent_root); 3829 if (IS_ERR(trans))
3830 err = PTR_ERR(trans);
3831 else
3832 btrfs_commit_transaction(trans, rc->extent_root);
3808out_free: 3833out_free:
3809 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 3834 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
3810 btrfs_free_path(path); 3835 btrfs_free_path(path);
@@ -4022,6 +4047,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4022 int ret; 4047 int ret;
4023 4048
4024 trans = btrfs_start_transaction(root->fs_info->tree_root, 0); 4049 trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
4050 BUG_ON(IS_ERR(trans));
4025 4051
4026 memset(&root->root_item.drop_progress, 0, 4052 memset(&root->root_item.drop_progress, 0,
4027 sizeof(root->root_item.drop_progress)); 4053 sizeof(root->root_item.drop_progress));
@@ -4125,6 +4151,11 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4125 set_reloc_control(rc); 4151 set_reloc_control(rc);
4126 4152
4127 trans = btrfs_join_transaction(rc->extent_root, 1); 4153 trans = btrfs_join_transaction(rc->extent_root, 1);
4154 if (IS_ERR(trans)) {
4155 unset_reloc_control(rc);
4156 err = PTR_ERR(trans);
4157 goto out_free;
4158 }
4128 4159
4129 rc->merge_reloc_tree = 1; 4160 rc->merge_reloc_tree = 1;
4130 4161
@@ -4154,9 +4185,13 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4154 unset_reloc_control(rc); 4185 unset_reloc_control(rc);
4155 4186
4156 trans = btrfs_join_transaction(rc->extent_root, 1); 4187 trans = btrfs_join_transaction(rc->extent_root, 1);
4157 btrfs_commit_transaction(trans, rc->extent_root); 4188 if (IS_ERR(trans))
4158out: 4189 err = PTR_ERR(trans);
4190 else
4191 btrfs_commit_transaction(trans, rc->extent_root);
4192out_free:
4159 kfree(rc); 4193 kfree(rc);
4194out:
4160 while (!list_empty(&reloc_roots)) { 4195 while (!list_empty(&reloc_roots)) {
4161 reloc_root = list_entry(reloc_roots.next, 4196 reloc_root = list_entry(reloc_roots.next,
4162 struct btrfs_root, root_list); 4197 struct btrfs_root, root_list);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b2130c46fdb5..d39a9895d932 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, 158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
159 Opt_enospc_debug, Opt_err,
159}; 160};
160 161
161static match_table_t tokens = { 162static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
184 {Opt_space_cache, "space_cache"}, 185 {Opt_space_cache, "space_cache"},
185 {Opt_clear_cache, "clear_cache"}, 186 {Opt_clear_cache, "clear_cache"},
186 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 187 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
188 {Opt_enospc_debug, "enospc_debug"},
187 {Opt_err, NULL}, 189 {Opt_err, NULL},
188}; 190};
189 191
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
358 case Opt_user_subvol_rm_allowed: 360 case Opt_user_subvol_rm_allowed:
359 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 361 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
360 break; 362 break;
363 case Opt_enospc_debug:
364 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
365 break;
361 case Opt_err: 366 case Opt_err:
362 printk(KERN_INFO "btrfs: unrecognized mount option " 367 printk(KERN_INFO "btrfs: unrecognized mount option "
363 "'%s'\n", p); 368 "'%s'\n", p);
@@ -383,7 +388,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
383 struct btrfs_fs_devices **fs_devices) 388 struct btrfs_fs_devices **fs_devices)
384{ 389{
385 substring_t args[MAX_OPT_ARGS]; 390 substring_t args[MAX_OPT_ARGS];
386 char *opts, *p; 391 char *opts, *orig, *p;
387 int error = 0; 392 int error = 0;
388 int intarg; 393 int intarg;
389 394
@@ -397,6 +402,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
397 opts = kstrdup(options, GFP_KERNEL); 402 opts = kstrdup(options, GFP_KERNEL);
398 if (!opts) 403 if (!opts)
399 return -ENOMEM; 404 return -ENOMEM;
405 orig = opts;
400 406
401 while ((p = strsep(&opts, ",")) != NULL) { 407 while ((p = strsep(&opts, ",")) != NULL) {
402 int token; 408 int token;
@@ -432,7 +438,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
432 } 438 }
433 439
434 out_free_opts: 440 out_free_opts:
435 kfree(opts); 441 kfree(orig);
436 out: 442 out:
437 /* 443 /*
438 * If no subvolume name is specified we use the default one. Allocate 444 * If no subvolume name is specified we use the default one. Allocate
@@ -623,6 +629,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
623 btrfs_wait_ordered_extents(root, 0, 0); 629 btrfs_wait_ordered_extents(root, 0, 0);
624 630
625 trans = btrfs_start_transaction(root, 0); 631 trans = btrfs_start_transaction(root, 0);
632 if (IS_ERR(trans))
633 return PTR_ERR(trans);
626 ret = btrfs_commit_transaction(trans, root); 634 ret = btrfs_commit_transaction(trans, root);
627 return ret; 635 return ret;
628} 636}
@@ -761,6 +769,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
761 } 769 }
762 770
763 btrfs_close_devices(fs_devices); 771 btrfs_close_devices(fs_devices);
772 kfree(fs_info);
773 kfree(tree_root);
764 } else { 774 } else {
765 char b[BDEVNAME_SIZE]; 775 char b[BDEVNAME_SIZE];
766 776
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index bae5c7b8bbe2..3d73c8d93bbb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1161,6 +1161,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1161 INIT_DELAYED_WORK(&ac->work, do_async_commit); 1161 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1162 ac->root = root; 1162 ac->root = root;
1163 ac->newtrans = btrfs_join_transaction(root, 0); 1163 ac->newtrans = btrfs_join_transaction(root, 0);
1164 if (IS_ERR(ac->newtrans)) {
1165 int err = PTR_ERR(ac->newtrans);
1166 kfree(ac);
1167 return err;
1168 }
1164 1169
1165 /* take transaction reference */ 1170 /* take transaction reference */
1166 mutex_lock(&root->fs_info->trans_mutex); 1171 mutex_lock(&root->fs_info->trans_mutex);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 054744ac5719..a4bbb854dfd2 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -338,6 +338,12 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
338 } 338 }
339 dst_copy = kmalloc(item_size, GFP_NOFS); 339 dst_copy = kmalloc(item_size, GFP_NOFS);
340 src_copy = kmalloc(item_size, GFP_NOFS); 340 src_copy = kmalloc(item_size, GFP_NOFS);
341 if (!dst_copy || !src_copy) {
342 btrfs_release_path(root, path);
343 kfree(dst_copy);
344 kfree(src_copy);
345 return -ENOMEM;
346 }
341 347
342 read_extent_buffer(eb, src_copy, src_ptr, item_size); 348 read_extent_buffer(eb, src_copy, src_ptr, item_size);
343 349
@@ -665,6 +671,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
665 btrfs_dir_item_key_to_cpu(leaf, di, &location); 671 btrfs_dir_item_key_to_cpu(leaf, di, &location);
666 name_len = btrfs_dir_name_len(leaf, di); 672 name_len = btrfs_dir_name_len(leaf, di);
667 name = kmalloc(name_len, GFP_NOFS); 673 name = kmalloc(name_len, GFP_NOFS);
674 if (!name)
675 return -ENOMEM;
676
668 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); 677 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
669 btrfs_release_path(root, path); 678 btrfs_release_path(root, path);
670 679
@@ -744,6 +753,9 @@ static noinline int backref_in_log(struct btrfs_root *log,
744 int match = 0; 753 int match = 0;
745 754
746 path = btrfs_alloc_path(); 755 path = btrfs_alloc_path();
756 if (!path)
757 return -ENOMEM;
758
747 ret = btrfs_search_slot(NULL, log, key, path, 0, 0); 759 ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
748 if (ret != 0) 760 if (ret != 0)
749 goto out; 761 goto out;
@@ -967,6 +979,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
967 key.offset = (u64)-1; 979 key.offset = (u64)-1;
968 980
969 path = btrfs_alloc_path(); 981 path = btrfs_alloc_path();
982 if (!path)
983 return -ENOMEM;
970 984
971 while (1) { 985 while (1) {
972 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 986 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -1178,6 +1192,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1178 1192
1179 name_len = btrfs_dir_name_len(eb, di); 1193 name_len = btrfs_dir_name_len(eb, di);
1180 name = kmalloc(name_len, GFP_NOFS); 1194 name = kmalloc(name_len, GFP_NOFS);
1195 if (!name)
1196 return -ENOMEM;
1197
1181 log_type = btrfs_dir_type(eb, di); 1198 log_type = btrfs_dir_type(eb, di);
1182 read_extent_buffer(eb, name, (unsigned long)(di + 1), 1199 read_extent_buffer(eb, name, (unsigned long)(di + 1),
1183 name_len); 1200 name_len);
@@ -1692,6 +1709,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1692 root_owner = btrfs_header_owner(parent); 1709 root_owner = btrfs_header_owner(parent);
1693 1710
1694 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 1711 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
1712 if (!next)
1713 return -ENOMEM;
1695 1714
1696 if (*level == 1) { 1715 if (*level == 1) {
1697 wc->process_func(root, next, wc, ptr_gen); 1716 wc->process_func(root, next, wc, ptr_gen);
@@ -2032,6 +2051,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2032 wait_log_commit(trans, log_root_tree, 2051 wait_log_commit(trans, log_root_tree,
2033 log_root_tree->log_transid); 2052 log_root_tree->log_transid);
2034 mutex_unlock(&log_root_tree->log_mutex); 2053 mutex_unlock(&log_root_tree->log_mutex);
2054 ret = 0;
2035 goto out; 2055 goto out;
2036 } 2056 }
2037 atomic_set(&log_root_tree->log_commit[index2], 1); 2057 atomic_set(&log_root_tree->log_commit[index2], 1);
@@ -2096,7 +2116,7 @@ out:
2096 smp_mb(); 2116 smp_mb();
2097 if (waitqueue_active(&root->log_commit_wait[index1])) 2117 if (waitqueue_active(&root->log_commit_wait[index1]))
2098 wake_up(&root->log_commit_wait[index1]); 2118 wake_up(&root->log_commit_wait[index1]);
2099 return 0; 2119 return ret;
2100} 2120}
2101 2121
2102static void free_log_tree(struct btrfs_trans_handle *trans, 2122static void free_log_tree(struct btrfs_trans_handle *trans,
@@ -2194,6 +2214,9 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2194 2214
2195 log = root->log_root; 2215 log = root->log_root;
2196 path = btrfs_alloc_path(); 2216 path = btrfs_alloc_path();
2217 if (!path)
2218 return -ENOMEM;
2219
2197 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, 2220 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
2198 name, name_len, -1); 2221 name, name_len, -1);
2199 if (IS_ERR(di)) { 2222 if (IS_ERR(di)) {
@@ -2594,6 +2617,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2594 2617
2595 ins_data = kmalloc(nr * sizeof(struct btrfs_key) + 2618 ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
2596 nr * sizeof(u32), GFP_NOFS); 2619 nr * sizeof(u32), GFP_NOFS);
2620 if (!ins_data)
2621 return -ENOMEM;
2622
2597 ins_sizes = (u32 *)ins_data; 2623 ins_sizes = (u32 *)ins_data;
2598 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); 2624 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
2599 2625
@@ -2725,7 +2751,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2725 log = root->log_root; 2751 log = root->log_root;
2726 2752
2727 path = btrfs_alloc_path(); 2753 path = btrfs_alloc_path();
2754 if (!path)
2755 return -ENOMEM;
2728 dst_path = btrfs_alloc_path(); 2756 dst_path = btrfs_alloc_path();
2757 if (!dst_path) {
2758 btrfs_free_path(path);
2759 return -ENOMEM;
2760 }
2729 2761
2730 min_key.objectid = inode->i_ino; 2762 min_key.objectid = inode->i_ino;
2731 min_key.type = BTRFS_INODE_ITEM_KEY; 2763 min_key.type = BTRFS_INODE_ITEM_KEY;
@@ -3080,6 +3112,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
3080 BUG_ON(!path); 3112 BUG_ON(!path);
3081 3113
3082 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3114 trans = btrfs_start_transaction(fs_info->tree_root, 0);
3115 BUG_ON(IS_ERR(trans));
3083 3116
3084 wc.trans = trans; 3117 wc.trans = trans;
3085 wc.pin = 1; 3118 wc.pin = 1;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d158530233b7..dd13eb81ee40 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1213,6 +1213,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
1213 return -ENOMEM; 1213 return -ENOMEM;
1214 1214
1215 trans = btrfs_start_transaction(root, 0); 1215 trans = btrfs_start_transaction(root, 0);
1216 if (IS_ERR(trans)) {
1217 btrfs_free_path(path);
1218 return PTR_ERR(trans);
1219 }
1216 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 1220 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1217 key.type = BTRFS_DEV_ITEM_KEY; 1221 key.type = BTRFS_DEV_ITEM_KEY;
1218 key.offset = device->devid; 1222 key.offset = device->devid;
@@ -1334,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1334 1338
1335 ret = btrfs_shrink_device(device, 0); 1339 ret = btrfs_shrink_device(device, 0);
1336 if (ret) 1340 if (ret)
1337 goto error_brelse; 1341 goto error_undo;
1338 1342
1339 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); 1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1340 if (ret) 1344 if (ret)
1341 goto error_brelse; 1345 goto error_undo;
1342 1346
1343 device->in_fs_metadata = 0; 1347 device->in_fs_metadata = 0;
1344 1348
@@ -1412,6 +1416,13 @@ out:
1412 mutex_unlock(&root->fs_info->volume_mutex); 1416 mutex_unlock(&root->fs_info->volume_mutex);
1413 mutex_unlock(&uuid_mutex); 1417 mutex_unlock(&uuid_mutex);
1414 return ret; 1418 return ret;
1419error_undo:
1420 if (device->writeable) {
1421 list_add(&device->dev_alloc_list,
1422 &root->fs_info->fs_devices->alloc_list);
1423 root->fs_info->fs_devices->rw_devices++;
1424 }
1425 goto error_brelse;
1415} 1426}
1416 1427
1417/* 1428/*
@@ -1601,11 +1612,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1601 1612
1602 ret = find_next_devid(root, &device->devid); 1613 ret = find_next_devid(root, &device->devid);
1603 if (ret) { 1614 if (ret) {
1615 kfree(device->name);
1604 kfree(device); 1616 kfree(device);
1605 goto error; 1617 goto error;
1606 } 1618 }
1607 1619
1608 trans = btrfs_start_transaction(root, 0); 1620 trans = btrfs_start_transaction(root, 0);
1621 if (IS_ERR(trans)) {
1622 kfree(device->name);
1623 kfree(device);
1624 ret = PTR_ERR(trans);
1625 goto error;
1626 }
1627
1609 lock_chunks(root); 1628 lock_chunks(root);
1610 1629
1611 device->writeable = 1; 1630 device->writeable = 1;
@@ -1621,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1621 device->dev_root = root->fs_info->dev_root; 1640 device->dev_root = root->fs_info->dev_root;
1622 device->bdev = bdev; 1641 device->bdev = bdev;
1623 device->in_fs_metadata = 1; 1642 device->in_fs_metadata = 1;
1624 device->mode = 0; 1643 device->mode = FMODE_EXCL;
1625 set_blocksize(device->bdev, 4096); 1644 set_blocksize(device->bdev, 4096);
1626 1645
1627 if (seeding_dev) { 1646 if (seeding_dev) {
@@ -1873,7 +1892,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1873 return ret; 1892 return ret;
1874 1893
1875 trans = btrfs_start_transaction(root, 0); 1894 trans = btrfs_start_transaction(root, 0);
1876 BUG_ON(!trans); 1895 BUG_ON(IS_ERR(trans));
1877 1896
1878 lock_chunks(root); 1897 lock_chunks(root);
1879 1898
@@ -2047,7 +2066,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
2047 BUG_ON(ret); 2066 BUG_ON(ret);
2048 2067
2049 trans = btrfs_start_transaction(dev_root, 0); 2068 trans = btrfs_start_transaction(dev_root, 0);
2050 BUG_ON(!trans); 2069 BUG_ON(IS_ERR(trans));
2051 2070
2052 ret = btrfs_grow_device(trans, device, old_size); 2071 ret = btrfs_grow_device(trans, device, old_size);
2053 BUG_ON(ret); 2072 BUG_ON(ret);
@@ -2213,6 +2232,11 @@ again:
2213 2232
2214 /* Shrinking succeeded, else we would be at "done". */ 2233 /* Shrinking succeeded, else we would be at "done". */
2215 trans = btrfs_start_transaction(root, 0); 2234 trans = btrfs_start_transaction(root, 0);
2235 if (IS_ERR(trans)) {
2236 ret = PTR_ERR(trans);
2237 goto done;
2238 }
2239
2216 lock_chunks(root); 2240 lock_chunks(root);
2217 2241
2218 device->disk_total_bytes = new_size; 2242 device->disk_total_bytes = new_size;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0bc68de8edd7..099a58615b90 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -409,7 +409,7 @@ more:
409 spin_lock(&inode->i_lock); 409 spin_lock(&inode->i_lock);
410 if (ci->i_release_count == fi->dir_release_count) { 410 if (ci->i_release_count == fi->dir_release_count) {
411 dout(" marking %p complete\n", inode); 411 dout(" marking %p complete\n", inode);
412 ci->i_ceph_flags |= CEPH_I_COMPLETE; 412 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
413 ci->i_max_offset = filp->f_pos; 413 ci->i_max_offset = filp->f_pos;
414 } 414 }
415 spin_unlock(&inode->i_lock); 415 spin_unlock(&inode->i_lock);
@@ -496,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
496 496
497 /* .snap dir? */ 497 /* .snap dir? */
498 if (err == -ENOENT && 498 if (err == -ENOENT &&
499 ceph_snap(parent) == CEPH_NOSNAP &&
499 strcmp(dentry->d_name.name, 500 strcmp(dentry->d_name.name,
500 fsc->mount_options->snapdir_name) == 0) { 501 fsc->mount_options->snapdir_name) == 0) {
501 struct inode *inode = ceph_get_snapdir(parent); 502 struct inode *inode = ceph_get_snapdir(parent);
@@ -1029,28 +1030,8 @@ out_touch:
1029static void ceph_dentry_release(struct dentry *dentry) 1030static void ceph_dentry_release(struct dentry *dentry)
1030{ 1031{
1031 struct ceph_dentry_info *di = ceph_dentry(dentry); 1032 struct ceph_dentry_info *di = ceph_dentry(dentry);
1032 struct inode *parent_inode = NULL;
1033 u64 snapid = CEPH_NOSNAP;
1034 1033
1035 if (!IS_ROOT(dentry)) { 1034 dout("dentry_release %p\n", dentry);
1036 parent_inode = dentry->d_parent->d_inode;
1037 if (parent_inode)
1038 snapid = ceph_snap(parent_inode);
1039 }
1040 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1041 if (parent_inode && snapid != CEPH_SNAPDIR) {
1042 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1043
1044 spin_lock(&parent_inode->i_lock);
1045 if (ci->i_shared_gen == di->lease_shared_gen ||
1046 snapid <= CEPH_MAXSNAP) {
1047 dout(" clearing %p complete (d_release)\n",
1048 parent_inode);
1049 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
1050 ci->i_release_count++;
1051 }
1052 spin_unlock(&parent_inode->i_lock);
1053 }
1054 if (di) { 1035 if (di) {
1055 ceph_dentry_lru_del(dentry); 1036 ceph_dentry_lru_del(dentry);
1056 if (di->lease_session) 1037 if (di->lease_session)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5625463aa479..193bfa5e9cbd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -707,7 +707,7 @@ static int fill_inode(struct inode *inode,
707 (issued & CEPH_CAP_FILE_EXCL) == 0 && 707 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
709 dout(" marking %p complete (empty)\n", inode); 709 dout(" marking %p complete (empty)\n", inode);
710 ci->i_ceph_flags |= CEPH_I_COMPLETE; 710 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
711 ci->i_max_offset = 2; 711 ci->i_max_offset = 2;
712 } 712 }
713 break; 713 break;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 39c243acd062..f40b9139e437 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -584,10 +584,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
584 if (lastinode) 584 if (lastinode)
585 iput(lastinode); 585 iput(lastinode);
586 586
587 dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino); 587 list_for_each_entry(child, &realm->children, child_item) {
588 list_for_each_entry(child, &realm->children, child_item) 588 dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
589 queue_realm_cap_snaps(child); 589 realm, realm->ino, child, child->ino);
590 list_del_init(&child->dirty_item);
591 list_add(&child->dirty_item, &realm->dirty_item);
592 }
590 593
594 list_del_init(&realm->dirty_item);
591 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); 595 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
592} 596}
593 597
@@ -683,7 +687,9 @@ more:
683 * queue cap snaps _after_ we've built the new snap contexts, 687 * queue cap snaps _after_ we've built the new snap contexts,
684 * so that i_head_snapc can be set appropriately. 688 * so that i_head_snapc can be set appropriately.
685 */ 689 */
686 list_for_each_entry(realm, &dirty_realms, dirty_item) { 690 while (!list_empty(&dirty_realms)) {
691 realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
692 dirty_item);
687 queue_realm_cap_snaps(realm); 693 queue_realm_cap_snaps(realm);
688 } 694 }
689 695
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1e7636b145a8..beeebf194234 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -372,6 +372,10 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
372 372
373 ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), 373 ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
374 GFP_KERNEL); 374 GFP_KERNEL);
375 if (!ppace) {
376 cERROR(1, "DACL memory allocation error");
377 return;
378 }
375 379
376 for (i = 0; i < num_aces; ++i) { 380 for (i = 0; i < num_aces; ++i) {
377 ppace[i] = (struct cifs_ace *) (acl_base + acl_size); 381 ppace[i] = (struct cifs_ace *) (acl_base + acl_size);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 4a3330235d55..a9371b6578c0 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -127,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
127extern const struct export_operations cifs_export_ops; 127extern const struct export_operations cifs_export_ops;
128#endif /* EXPERIMENTAL */ 128#endif /* EXPERIMENTAL */
129 129
130#define CIFS_VERSION "1.70" 130#define CIFS_VERSION "1.71"
131#endif /* _CIFSFS_H */ 131#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index edd5b29b53c9..17afb0fbcaed 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -188,6 +188,8 @@ struct TCP_Server_Info {
188 /* multiplexed reads or writes */ 188 /* multiplexed reads or writes */
189 unsigned int maxBuf; /* maxBuf specifies the maximum */ 189 unsigned int maxBuf; /* maxBuf specifies the maximum */
190 /* message size the server can send or receive for non-raw SMBs */ 190 /* message size the server can send or receive for non-raw SMBs */
191 /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */
192 /* when socket is setup (and during reconnect) before NegProt sent */
191 unsigned int max_rw; /* maxRw specifies the maximum */ 193 unsigned int max_rw; /* maxRw specifies the maximum */
192 /* message size the server can send or receive for */ 194 /* message size the server can send or receive for */
193 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ 195 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
@@ -652,7 +654,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
652#define MID_REQUEST_SUBMITTED 2 654#define MID_REQUEST_SUBMITTED 2
653#define MID_RESPONSE_RECEIVED 4 655#define MID_RESPONSE_RECEIVED 4
654#define MID_RETRY_NEEDED 8 /* session closed while this request out */ 656#define MID_RETRY_NEEDED 8 /* session closed while this request out */
655#define MID_NO_RESP_NEEDED 0x10 657#define MID_RESPONSE_MALFORMED 0x10
656 658
657/* Types of response buffer returned from SendReceive2 */ 659/* Types of response buffer returned from SendReceive2 */
658#define CIFS_NO_BUFFER 0 /* Response buffer not returned */ 660#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 46c66ed01af4..904aa47e3515 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -136,9 +136,6 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
136 } 136 }
137 } 137 }
138 138
139 if (ses->status == CifsExiting)
140 return -EIO;
141
142 /* 139 /*
143 * Give demultiplex thread up to 10 seconds to reconnect, should be 140 * Give demultiplex thread up to 10 seconds to reconnect, should be
144 * greater than cifs socket timeout which is 7 seconds 141 * greater than cifs socket timeout which is 7 seconds
@@ -156,7 +153,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
156 * retrying until process is killed or server comes 153 * retrying until process is killed or server comes
157 * back on-line 154 * back on-line
158 */ 155 */
159 if (!tcon->retry || ses->status == CifsExiting) { 156 if (!tcon->retry) {
160 cFYI(1, "gave up waiting on reconnect in smb_init"); 157 cFYI(1, "gave up waiting on reconnect in smb_init");
161 return -EHOSTDOWN; 158 return -EHOSTDOWN;
162 } 159 }
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 47d8ff623683..8d6c17ab593d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -337,8 +337,13 @@ cifs_echo_request(struct work_struct *work)
337 struct TCP_Server_Info *server = container_of(work, 337 struct TCP_Server_Info *server = container_of(work,
338 struct TCP_Server_Info, echo.work); 338 struct TCP_Server_Info, echo.work);
339 339
340 /* no need to ping if we got a response recently */ 340 /*
341 if (time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) 341 * We cannot send an echo until the NEGOTIATE_PROTOCOL request is
342 * done, which is indicated by maxBuf != 0. Also, no need to ping if
343 * we got a response recently
344 */
345 if (server->maxBuf == 0 ||
346 time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
342 goto requeue_echo; 347 goto requeue_echo;
343 348
344 rc = CIFSSMBEcho(server); 349 rc = CIFSSMBEcho(server);
@@ -578,14 +583,23 @@ incomplete_rcv:
578 else if (reconnect == 1) 583 else if (reconnect == 1)
579 continue; 584 continue;
580 585
581 length += 4; /* account for rfc1002 hdr */ 586 total_read += 4; /* account for rfc1002 hdr */
582 587
588 dump_smb(smb_buffer, total_read);
583 589
584 dump_smb(smb_buffer, length); 590 /*
585 if (checkSMB(smb_buffer, smb_buffer->Mid, total_read+4)) { 591 * We know that we received enough to get to the MID as we
586 cifs_dump_mem("Bad SMB: ", smb_buffer, 48); 592 * checked the pdu_length earlier. Now check to see
587 continue; 593 * if the rest of the header is OK. We borrow the length
588 } 594 * var for the rest of the loop to avoid a new stack var.
595 *
596 * 48 bytes is enough to display the header and a little bit
597 * into the payload for debugging purposes.
598 */
599 length = checkSMB(smb_buffer, smb_buffer->Mid, total_read);
600 if (length != 0)
601 cifs_dump_mem("Bad SMB: ", smb_buffer,
602 min_t(unsigned int, total_read, 48));
589 603
590 mid_entry = NULL; 604 mid_entry = NULL;
591 server->lstrp = jiffies; 605 server->lstrp = jiffies;
@@ -597,7 +611,8 @@ incomplete_rcv:
597 if ((mid_entry->mid == smb_buffer->Mid) && 611 if ((mid_entry->mid == smb_buffer->Mid) &&
598 (mid_entry->midState == MID_REQUEST_SUBMITTED) && 612 (mid_entry->midState == MID_REQUEST_SUBMITTED) &&
599 (mid_entry->command == smb_buffer->Command)) { 613 (mid_entry->command == smb_buffer->Command)) {
600 if (check2ndT2(smb_buffer,server->maxBuf) > 0) { 614 if (length == 0 &&
615 check2ndT2(smb_buffer, server->maxBuf) > 0) {
601 /* We have a multipart transact2 resp */ 616 /* We have a multipart transact2 resp */
602 isMultiRsp = true; 617 isMultiRsp = true;
603 if (mid_entry->resp_buf) { 618 if (mid_entry->resp_buf) {
@@ -632,12 +647,17 @@ incomplete_rcv:
632 mid_entry->resp_buf = smb_buffer; 647 mid_entry->resp_buf = smb_buffer;
633 mid_entry->largeBuf = isLargeBuf; 648 mid_entry->largeBuf = isLargeBuf;
634multi_t2_fnd: 649multi_t2_fnd:
635 mid_entry->midState = MID_RESPONSE_RECEIVED; 650 if (length == 0)
636 list_del_init(&mid_entry->qhead); 651 mid_entry->midState =
637 mid_entry->callback(mid_entry); 652 MID_RESPONSE_RECEIVED;
653 else
654 mid_entry->midState =
655 MID_RESPONSE_MALFORMED;
638#ifdef CONFIG_CIFS_STATS2 656#ifdef CONFIG_CIFS_STATS2
639 mid_entry->when_received = jiffies; 657 mid_entry->when_received = jiffies;
640#endif 658#endif
659 list_del_init(&mid_entry->qhead);
660 mid_entry->callback(mid_entry);
641 break; 661 break;
642 } 662 }
643 mid_entry = NULL; 663 mid_entry = NULL;
@@ -653,6 +673,9 @@ multi_t2_fnd:
653 else 673 else
654 smallbuf = NULL; 674 smallbuf = NULL;
655 } 675 }
676 } else if (length != 0) {
677 /* response sanity checks failed */
678 continue;
656 } else if (!is_valid_oplock_break(smb_buffer, server) && 679 } else if (!is_valid_oplock_break(smb_buffer, server) &&
657 !isMultiRsp) { 680 !isMultiRsp) {
658 cERROR(1, "No task to wake, unknown frame received! " 681 cERROR(1, "No task to wake, unknown frame received! "
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 74c0a282d012..e964b1cd5dd0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1662,10 +1662,10 @@ static ssize_t
1662cifs_iovec_write(struct file *file, const struct iovec *iov, 1662cifs_iovec_write(struct file *file, const struct iovec *iov,
1663 unsigned long nr_segs, loff_t *poffset) 1663 unsigned long nr_segs, loff_t *poffset)
1664{ 1664{
1665 size_t total_written = 0; 1665 unsigned int written;
1666 unsigned int written = 0; 1666 unsigned long num_pages, npages, i;
1667 unsigned long num_pages, npages; 1667 size_t copied, len, cur_len;
1668 size_t copied, len, cur_len, i; 1668 ssize_t total_written = 0;
1669 struct kvec *to_send; 1669 struct kvec *to_send;
1670 struct page **pages; 1670 struct page **pages;
1671 struct iov_iter it; 1671 struct iov_iter it;
@@ -1821,7 +1821,8 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1821{ 1821{
1822 int rc; 1822 int rc;
1823 int xid; 1823 int xid;
1824 unsigned int total_read, bytes_read = 0; 1824 ssize_t total_read;
1825 unsigned int bytes_read = 0;
1825 size_t len, cur_len; 1826 size_t len, cur_len;
1826 int iov_offset = 0; 1827 int iov_offset = 0;
1827 struct cifs_sb_info *cifs_sb; 1828 struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 8d9189f64477..79f641eeda30 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -170,7 +170,7 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
170{ 170{
171 int rc, alen, slen; 171 int rc, alen, slen;
172 const char *pct; 172 const char *pct;
173 char *endp, scope_id[13]; 173 char scope_id[13];
174 struct sockaddr_in *s4 = (struct sockaddr_in *) dst; 174 struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
175 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; 175 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
176 176
@@ -197,9 +197,9 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
197 memcpy(scope_id, pct + 1, slen); 197 memcpy(scope_id, pct + 1, slen);
198 scope_id[slen] = '\0'; 198 scope_id[slen] = '\0';
199 199
200 s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); 200 rc = strict_strtoul(scope_id, 0,
201 if (endp != scope_id + slen) 201 (unsigned long *)&s6->sin6_scope_id);
202 return 0; 202 rc = (rc == 0) ? 1 : 0;
203 } 203 }
204 204
205 return rc; 205 return rc;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 1adc9625a344..16765703131b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -656,13 +656,13 @@ ssetup_ntlmssp_authenticate:
656 656
657 if (type == LANMAN) { 657 if (type == LANMAN) {
658#ifdef CONFIG_CIFS_WEAK_PW_HASH 658#ifdef CONFIG_CIFS_WEAK_PW_HASH
659 char lnm_session_key[CIFS_SESS_KEY_SIZE]; 659 char lnm_session_key[CIFS_AUTH_RESP_SIZE];
660 660
661 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; 661 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
662 662
663 /* no capabilities flags in old lanman negotiation */ 663 /* no capabilities flags in old lanman negotiation */
664 664
665 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); 665 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
666 666
667 /* Calculate hash with password and copy into bcc_ptr. 667 /* Calculate hash with password and copy into bcc_ptr.
668 * Encryption Key (stored as in cryptkey) gets used if the 668 * Encryption Key (stored as in cryptkey) gets used if the
@@ -675,8 +675,8 @@ ssetup_ntlmssp_authenticate:
675 true : false, lnm_session_key); 675 true : false, lnm_session_key);
676 676
677 ses->flags |= CIFS_SES_LANMAN; 677 ses->flags |= CIFS_SES_LANMAN;
678 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE); 678 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
679 bcc_ptr += CIFS_SESS_KEY_SIZE; 679 bcc_ptr += CIFS_AUTH_RESP_SIZE;
680 680
681 /* can not sign if LANMAN negotiated so no need 681 /* can not sign if LANMAN negotiated so no need
682 to calculate signing key? but what if server 682 to calculate signing key? but what if server
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index b8c5e2eb43d0..46d8756f2b24 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -359,6 +359,10 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
359 if (rc) 359 if (rc)
360 return rc; 360 return rc;
361 361
362 /* enable signing if server requires it */
363 if (server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
364 in_buf->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
365
362 mutex_lock(&server->srv_mutex); 366 mutex_lock(&server->srv_mutex);
363 mid = AllocMidQEntry(in_buf, server); 367 mid = AllocMidQEntry(in_buf, server);
364 if (mid == NULL) { 368 if (mid == NULL) {
@@ -453,6 +457,9 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
453 case MID_RETRY_NEEDED: 457 case MID_RETRY_NEEDED:
454 rc = -EAGAIN; 458 rc = -EAGAIN;
455 break; 459 break;
460 case MID_RESPONSE_MALFORMED:
461 rc = -EIO;
462 break;
456 default: 463 default:
457 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__, 464 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__,
458 mid->mid, mid->midState); 465 mid->mid, mid->midState);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9c64ae9e4c1a..2d8c87b951c2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1468,15 +1468,13 @@ static void work_stop(void)
1468 1468
1469static int work_start(void) 1469static int work_start(void)
1470{ 1470{
1471 recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM | 1471 recv_workqueue = create_singlethread_workqueue("dlm_recv");
1472 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1473 if (!recv_workqueue) { 1472 if (!recv_workqueue) {
1474 log_print("can't start dlm_recv"); 1473 log_print("can't start dlm_recv");
1475 return -ENOMEM; 1474 return -ENOMEM;
1476 } 1475 }
1477 1476
1478 send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM | 1477 send_workqueue = create_singlethread_workqueue("dlm_send");
1479 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1480 if (!send_workqueue) { 1478 if (!send_workqueue) {
1481 log_print("can't start dlm_send"); 1479 log_print("can't start dlm_send");
1482 destroy_workqueue(recv_workqueue); 1480 destroy_workqueue(recv_workqueue);
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 6fc4f319b550..534c1d46e69e 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -46,24 +46,28 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
46{ 46{
47 struct dentry *lower_dentry; 47 struct dentry *lower_dentry;
48 struct vfsmount *lower_mnt; 48 struct vfsmount *lower_mnt;
49 struct dentry *dentry_save; 49 struct dentry *dentry_save = NULL;
50 struct vfsmount *vfsmount_save; 50 struct vfsmount *vfsmount_save = NULL;
51 int rc = 1; 51 int rc = 1;
52 52
53 if (nd->flags & LOOKUP_RCU) 53 if (nd && nd->flags & LOOKUP_RCU)
54 return -ECHILD; 54 return -ECHILD;
55 55
56 lower_dentry = ecryptfs_dentry_to_lower(dentry); 56 lower_dentry = ecryptfs_dentry_to_lower(dentry);
57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) 58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
59 goto out; 59 goto out;
60 dentry_save = nd->path.dentry; 60 if (nd) {
61 vfsmount_save = nd->path.mnt; 61 dentry_save = nd->path.dentry;
62 nd->path.dentry = lower_dentry; 62 vfsmount_save = nd->path.mnt;
63 nd->path.mnt = lower_mnt; 63 nd->path.dentry = lower_dentry;
64 nd->path.mnt = lower_mnt;
65 }
64 rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); 66 rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
65 nd->path.dentry = dentry_save; 67 if (nd) {
66 nd->path.mnt = vfsmount_save; 68 nd->path.dentry = dentry_save;
69 nd->path.mnt = vfsmount_save;
70 }
67 if (dentry->d_inode) { 71 if (dentry->d_inode) {
68 struct inode *lower_inode = 72 struct inode *lower_inode =
69 ecryptfs_inode_to_lower(dentry->d_inode); 73 ecryptfs_inode_to_lower(dentry->d_inode);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index dbc84ed96336..e00753496e3e 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -632,8 +632,7 @@ int ecryptfs_interpose(struct dentry *hidden_dentry,
632 u32 flags); 632 u32 flags);
633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
634 struct dentry *lower_dentry, 634 struct dentry *lower_dentry,
635 struct inode *ecryptfs_dir_inode, 635 struct inode *ecryptfs_dir_inode);
636 struct nameidata *ecryptfs_nd);
637int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, 636int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
638 size_t *decrypted_name_size, 637 size_t *decrypted_name_size,
639 struct dentry *ecryptfs_dentry, 638 struct dentry *ecryptfs_dentry,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 81e10e6a9443..7d1050e254f9 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -317,6 +317,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
317 317
318const struct file_operations ecryptfs_dir_fops = { 318const struct file_operations ecryptfs_dir_fops = {
319 .readdir = ecryptfs_readdir, 319 .readdir = ecryptfs_readdir,
320 .read = generic_read_dir,
320 .unlocked_ioctl = ecryptfs_unlocked_ioctl, 321 .unlocked_ioctl = ecryptfs_unlocked_ioctl,
321#ifdef CONFIG_COMPAT 322#ifdef CONFIG_COMPAT
322 .compat_ioctl = ecryptfs_compat_ioctl, 323 .compat_ioctl = ecryptfs_compat_ioctl,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index bd33f87a1907..b592938a84bc 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -74,16 +74,20 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
74 unsigned int flags_save; 74 unsigned int flags_save;
75 int rc; 75 int rc;
76 76
77 dentry_save = nd->path.dentry; 77 if (nd) {
78 vfsmount_save = nd->path.mnt; 78 dentry_save = nd->path.dentry;
79 flags_save = nd->flags; 79 vfsmount_save = nd->path.mnt;
80 nd->path.dentry = lower_dentry; 80 flags_save = nd->flags;
81 nd->path.mnt = lower_mnt; 81 nd->path.dentry = lower_dentry;
82 nd->flags &= ~LOOKUP_OPEN; 82 nd->path.mnt = lower_mnt;
83 nd->flags &= ~LOOKUP_OPEN;
84 }
83 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); 85 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
84 nd->path.dentry = dentry_save; 86 if (nd) {
85 nd->path.mnt = vfsmount_save; 87 nd->path.dentry = dentry_save;
86 nd->flags = flags_save; 88 nd->path.mnt = vfsmount_save;
89 nd->flags = flags_save;
90 }
87 return rc; 91 return rc;
88} 92}
89 93
@@ -241,8 +245,7 @@ out:
241 */ 245 */
242int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 246int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
243 struct dentry *lower_dentry, 247 struct dentry *lower_dentry,
244 struct inode *ecryptfs_dir_inode, 248 struct inode *ecryptfs_dir_inode)
245 struct nameidata *ecryptfs_nd)
246{ 249{
247 struct dentry *lower_dir_dentry; 250 struct dentry *lower_dir_dentry;
248 struct vfsmount *lower_mnt; 251 struct vfsmount *lower_mnt;
@@ -290,8 +293,6 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
290 goto out; 293 goto out;
291 if (special_file(lower_inode->i_mode)) 294 if (special_file(lower_inode->i_mode))
292 goto out; 295 goto out;
293 if (!ecryptfs_nd)
294 goto out;
295 /* Released in this function */ 296 /* Released in this function */
296 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER); 297 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER);
297 if (!page_virt) { 298 if (!page_virt) {
@@ -349,75 +350,6 @@ out:
349} 350}
350 351
351/** 352/**
352 * ecryptfs_new_lower_dentry
353 * @name: The name of the new dentry.
354 * @lower_dir_dentry: Parent directory of the new dentry.
355 * @nd: nameidata from last lookup.
356 *
357 * Create a new dentry or get it from lower parent dir.
358 */
359static struct dentry *
360ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
361 struct nameidata *nd)
362{
363 struct dentry *new_dentry;
364 struct dentry *tmp;
365 struct inode *lower_dir_inode;
366
367 lower_dir_inode = lower_dir_dentry->d_inode;
368
369 tmp = d_alloc(lower_dir_dentry, name);
370 if (!tmp)
371 return ERR_PTR(-ENOMEM);
372
373 mutex_lock(&lower_dir_inode->i_mutex);
374 new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd);
375 mutex_unlock(&lower_dir_inode->i_mutex);
376
377 if (!new_dentry)
378 new_dentry = tmp;
379 else
380 dput(tmp);
381
382 return new_dentry;
383}
384
385
386/**
387 * ecryptfs_lookup_one_lower
388 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
389 * @lower_dir_dentry: lower parent directory
390 * @name: lower file name
391 *
392 * Get the lower dentry from vfs. If lower dentry does not exist yet,
393 * create it.
394 */
395static struct dentry *
396ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
397 struct dentry *lower_dir_dentry, struct qstr *name)
398{
399 struct nameidata nd;
400 struct vfsmount *lower_mnt;
401 int err;
402
403 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
404 ecryptfs_dentry->d_parent));
405 err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
406 mntput(lower_mnt);
407
408 if (!err) {
409 /* we dont need the mount */
410 mntput(nd.path.mnt);
411 return nd.path.dentry;
412 }
413 if (err != -ENOENT)
414 return ERR_PTR(err);
415
416 /* create a new lower dentry */
417 return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd);
418}
419
420/**
421 * ecryptfs_lookup 353 * ecryptfs_lookup
422 * @ecryptfs_dir_inode: The eCryptfs directory inode 354 * @ecryptfs_dir_inode: The eCryptfs directory inode
423 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up 355 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
@@ -434,7 +366,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
434 size_t encrypted_and_encoded_name_size; 366 size_t encrypted_and_encoded_name_size;
435 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; 367 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
436 struct dentry *lower_dir_dentry, *lower_dentry; 368 struct dentry *lower_dir_dentry, *lower_dentry;
437 struct qstr lower_name;
438 int rc = 0; 369 int rc = 0;
439 370
440 if ((ecryptfs_dentry->d_name.len == 1 371 if ((ecryptfs_dentry->d_name.len == 1
@@ -444,20 +375,14 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
444 goto out_d_drop; 375 goto out_d_drop;
445 } 376 }
446 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); 377 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
447 lower_name.name = ecryptfs_dentry->d_name.name; 378 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
448 lower_name.len = ecryptfs_dentry->d_name.len; 379 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
449 lower_name.hash = ecryptfs_dentry->d_name.hash; 380 lower_dir_dentry,
450 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 381 ecryptfs_dentry->d_name.len);
451 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 382 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
452 lower_dir_dentry->d_inode, &lower_name);
453 if (rc < 0)
454 goto out_d_drop;
455 }
456 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
457 lower_dir_dentry, &lower_name);
458 if (IS_ERR(lower_dentry)) { 383 if (IS_ERR(lower_dentry)) {
459 rc = PTR_ERR(lower_dentry); 384 rc = PTR_ERR(lower_dentry);
460 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " 385 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
461 "[%d] on lower_dentry = [%s]\n", __func__, rc, 386 "[%d] on lower_dentry = [%s]\n", __func__, rc,
462 encrypted_and_encoded_name); 387 encrypted_and_encoded_name);
463 goto out_d_drop; 388 goto out_d_drop;
@@ -479,28 +404,21 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
479 "filename; rc = [%d]\n", __func__, rc); 404 "filename; rc = [%d]\n", __func__, rc);
480 goto out_d_drop; 405 goto out_d_drop;
481 } 406 }
482 lower_name.name = encrypted_and_encoded_name; 407 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
483 lower_name.len = encrypted_and_encoded_name_size; 408 lower_dentry = lookup_one_len(encrypted_and_encoded_name,
484 lower_name.hash = full_name_hash(lower_name.name, lower_name.len); 409 lower_dir_dentry,
485 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 410 encrypted_and_encoded_name_size);
486 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 411 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
487 lower_dir_dentry->d_inode, &lower_name);
488 if (rc < 0)
489 goto out_d_drop;
490 }
491 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
492 lower_dir_dentry, &lower_name);
493 if (IS_ERR(lower_dentry)) { 412 if (IS_ERR(lower_dentry)) {
494 rc = PTR_ERR(lower_dentry); 413 rc = PTR_ERR(lower_dentry);
495 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " 414 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
496 "[%d] on lower_dentry = [%s]\n", __func__, rc, 415 "[%d] on lower_dentry = [%s]\n", __func__, rc,
497 encrypted_and_encoded_name); 416 encrypted_and_encoded_name);
498 goto out_d_drop; 417 goto out_d_drop;
499 } 418 }
500lookup_and_interpose: 419lookup_and_interpose:
501 rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry, 420 rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
502 ecryptfs_dir_inode, 421 ecryptfs_dir_inode);
503 ecryptfs_nd);
504 goto out; 422 goto out;
505out_d_drop: 423out_d_drop:
506 d_drop(ecryptfs_dentry); 424 d_drop(ecryptfs_dentry);
@@ -1092,6 +1010,8 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1092 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry), 1010 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
1093 ecryptfs_dentry_to_lower(dentry), &lower_stat); 1011 ecryptfs_dentry_to_lower(dentry), &lower_stat);
1094 if (!rc) { 1012 if (!rc) {
1013 fsstack_copy_attr_all(dentry->d_inode,
1014 ecryptfs_inode_to_lower(dentry->d_inode));
1095 generic_fillattr(dentry->d_inode, stat); 1015 generic_fillattr(dentry->d_inode, stat);
1096 stat->blocks = lower_stat.blocks; 1016 stat->blocks = lower_stat.blocks;
1097 } 1017 }
diff --git a/fs/eventfd.c b/fs/eventfd.c
index e0194b3e14d6..d9a591773919 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -99,7 +99,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_get);
99 * @ctx: [in] Pointer to eventfd context. 99 * @ctx: [in] Pointer to eventfd context.
100 * 100 *
101 * The eventfd context reference must have been previously acquired either 101 * The eventfd context reference must have been previously acquired either
102 * with eventfd_ctx_get() or eventfd_ctx_fdget()). 102 * with eventfd_ctx_get() or eventfd_ctx_fdget().
103 */ 103 */
104void eventfd_ctx_put(struct eventfd_ctx *ctx) 104void eventfd_ctx_put(struct eventfd_ctx *ctx)
105{ 105{
@@ -146,9 +146,9 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
146 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. 146 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
147 * @ctx: [in] Pointer to eventfd context. 147 * @ctx: [in] Pointer to eventfd context.
148 * @wait: [in] Wait queue to be removed. 148 * @wait: [in] Wait queue to be removed.
149 * @cnt: [out] Pointer to the 64bit conter value. 149 * @cnt: [out] Pointer to the 64-bit counter value.
150 * 150 *
151 * Returns zero if successful, or the following error codes: 151 * Returns %0 if successful, or the following error codes:
152 * 152 *
153 * -EAGAIN : The operation would have blocked. 153 * -EAGAIN : The operation would have blocked.
154 * 154 *
@@ -175,11 +175,11 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
175 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero. 175 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
176 * @ctx: [in] Pointer to eventfd context. 176 * @ctx: [in] Pointer to eventfd context.
177 * @no_wait: [in] Different from zero if the operation should not block. 177 * @no_wait: [in] Different from zero if the operation should not block.
178 * @cnt: [out] Pointer to the 64bit conter value. 178 * @cnt: [out] Pointer to the 64-bit counter value.
179 * 179 *
180 * Returns zero if successful, or the following error codes: 180 * Returns %0 if successful, or the following error codes:
181 * 181 *
182 * -EAGAIN : The operation would have blocked but @no_wait was nonzero. 182 * -EAGAIN : The operation would have blocked but @no_wait was non-zero.
183 * -ERESTARTSYS : A signal interrupted the wait operation. 183 * -ERESTARTSYS : A signal interrupted the wait operation.
184 * 184 *
185 * If @no_wait is zero, the function might sleep until the eventfd internal 185 * If @no_wait is zero, the function might sleep until the eventfd internal
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cc8a9b7d6064..4a09af9e9a63 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
63 * cleanup path and it is also acquired by eventpoll_release_file() 63 * cleanup path and it is also acquired by eventpoll_release_file()
64 * if a file has been pushed inside an epoll set and it is then 64 * if a file has been pushed inside an epoll set and it is then
65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). 65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
66 * It is also acquired when inserting an epoll fd onto another epoll
67 * fd. We do this so that we walk the epoll tree and ensure that this
68 * insertion does not create a cycle of epoll file descriptors, which
69 * could lead to deadlock. We need a global mutex to prevent two
70 * simultaneous inserts (A into B and B into A) from racing and
71 * constructing a cycle without either insert observing that it is
72 * going to.
66 * It is possible to drop the "ep->mtx" and to use the global 73 * It is possible to drop the "ep->mtx" and to use the global
67 * mutex "epmutex" (together with "ep->lock") to have it working, 74 * mutex "epmutex" (together with "ep->lock") to have it working,
68 * but having "ep->mtx" will make the interface more scalable. 75 * but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
224 */ 231 */
225static DEFINE_MUTEX(epmutex); 232static DEFINE_MUTEX(epmutex);
226 233
234/* Used to check for epoll file descriptor inclusion loops */
235static struct nested_calls poll_loop_ncalls;
236
227/* Used for safe wake up implementation */ 237/* Used for safe wake up implementation */
228static struct nested_calls poll_safewake_ncalls; 238static struct nested_calls poll_safewake_ncalls;
229 239
@@ -1114,6 +1124,17 @@ static int ep_send_events(struct eventpoll *ep,
1114 return ep_scan_ready_list(ep, ep_send_events_proc, &esed); 1124 return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
1115} 1125}
1116 1126
1127static inline struct timespec ep_set_mstimeout(long ms)
1128{
1129 struct timespec now, ts = {
1130 .tv_sec = ms / MSEC_PER_SEC,
1131 .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
1132 };
1133
1134 ktime_get_ts(&now);
1135 return timespec_add_safe(now, ts);
1136}
1137
1117static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, 1138static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1118 int maxevents, long timeout) 1139 int maxevents, long timeout)
1119{ 1140{
@@ -1121,12 +1142,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1121 unsigned long flags; 1142 unsigned long flags;
1122 long slack; 1143 long slack;
1123 wait_queue_t wait; 1144 wait_queue_t wait;
1124 struct timespec end_time;
1125 ktime_t expires, *to = NULL; 1145 ktime_t expires, *to = NULL;
1126 1146
1127 if (timeout > 0) { 1147 if (timeout > 0) {
1128 ktime_get_ts(&end_time); 1148 struct timespec end_time = ep_set_mstimeout(timeout);
1129 timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); 1149
1130 slack = select_estimate_accuracy(&end_time); 1150 slack = select_estimate_accuracy(&end_time);
1131 to = &expires; 1151 to = &expires;
1132 *to = timespec_to_ktime(end_time); 1152 *to = timespec_to_ktime(end_time);
@@ -1188,6 +1208,62 @@ retry:
1188 return res; 1208 return res;
1189} 1209}
1190 1210
1211/**
1212 * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
1213 * API, to verify that adding an epoll file inside another
1214 * epoll structure, does not violate the constraints, in
1215 * terms of closed loops, or too deep chains (which can
1216 * result in excessive stack usage).
1217 *
1218 * @priv: Pointer to the epoll file to be currently checked.
1219 * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
1220 * data structure pointer.
1221 * @call_nests: Current dept of the @ep_call_nested() call stack.
1222 *
1223 * Returns: Returns zero if adding the epoll @file inside current epoll
1224 * structure @ep does not violate the constraints, or -1 otherwise.
1225 */
1226static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1227{
1228 int error = 0;
1229 struct file *file = priv;
1230 struct eventpoll *ep = file->private_data;
1231 struct rb_node *rbp;
1232 struct epitem *epi;
1233
1234 mutex_lock(&ep->mtx);
1235 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1236 epi = rb_entry(rbp, struct epitem, rbn);
1237 if (unlikely(is_file_epoll(epi->ffd.file))) {
1238 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1239 ep_loop_check_proc, epi->ffd.file,
1240 epi->ffd.file->private_data, current);
1241 if (error != 0)
1242 break;
1243 }
1244 }
1245 mutex_unlock(&ep->mtx);
1246
1247 return error;
1248}
1249
1250/**
1251 * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
1252 * another epoll file (represented by @ep) does not create
1253 * closed loops or too deep chains.
1254 *
1255 * @ep: Pointer to the epoll private data structure.
1256 * @file: Pointer to the epoll file to be checked.
1257 *
1258 * Returns: Returns zero if adding the epoll @file inside current epoll
1259 * structure @ep does not violate the constraints, or -1 otherwise.
1260 */
1261static int ep_loop_check(struct eventpoll *ep, struct file *file)
1262{
1263 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1264 ep_loop_check_proc, file, ep, current);
1265}
1266
1191/* 1267/*
1192 * Open an eventpoll file descriptor. 1268 * Open an eventpoll file descriptor.
1193 */ 1269 */
@@ -1236,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1236 struct epoll_event __user *, event) 1312 struct epoll_event __user *, event)
1237{ 1313{
1238 int error; 1314 int error;
1315 int did_lock_epmutex = 0;
1239 struct file *file, *tfile; 1316 struct file *file, *tfile;
1240 struct eventpoll *ep; 1317 struct eventpoll *ep;
1241 struct epitem *epi; 1318 struct epitem *epi;
@@ -1277,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1277 */ 1354 */
1278 ep = file->private_data; 1355 ep = file->private_data;
1279 1356
1357 /*
1358 * When we insert an epoll file descriptor, inside another epoll file
1359 * descriptor, there is the change of creating closed loops, which are
1360 * better be handled here, than in more critical paths.
1361 *
1362 * We hold epmutex across the loop check and the insert in this case, in
1363 * order to prevent two separate inserts from racing and each doing the
1364 * insert "at the same time" such that ep_loop_check passes on both
1365 * before either one does the insert, thereby creating a cycle.
1366 */
1367 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
1368 mutex_lock(&epmutex);
1369 did_lock_epmutex = 1;
1370 error = -ELOOP;
1371 if (ep_loop_check(ep, tfile) != 0)
1372 goto error_tgt_fput;
1373 }
1374
1375
1280 mutex_lock(&ep->mtx); 1376 mutex_lock(&ep->mtx);
1281 1377
1282 /* 1378 /*
@@ -1312,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1312 mutex_unlock(&ep->mtx); 1408 mutex_unlock(&ep->mtx);
1313 1409
1314error_tgt_fput: 1410error_tgt_fput:
1411 if (unlikely(did_lock_epmutex))
1412 mutex_unlock(&epmutex);
1413
1315 fput(tfile); 1414 fput(tfile);
1316error_fput: 1415error_fput:
1317 fput(file); 1416 fput(file);
@@ -1431,6 +1530,12 @@ static int __init eventpoll_init(void)
1431 EP_ITEM_COST; 1530 EP_ITEM_COST;
1432 BUG_ON(max_user_watches < 0); 1531 BUG_ON(max_user_watches < 0);
1433 1532
1533 /*
1534 * Initialize the structure used to perform epoll file descriptor
1535 * inclusion loops checks.
1536 */
1537 ep_nested_calls_init(&poll_loop_ncalls);
1538
1434 /* Initialize the structure used to perform safe poll wait head wake ups */ 1539 /* Initialize the structure used to perform safe poll wait head wake ups */
1435 ep_nested_calls_init(&poll_safewake_ncalls); 1540 ep_nested_calls_init(&poll_safewake_ncalls);
1436 1541
diff --git a/fs/exec.c b/fs/exec.c
index c62efcb959c7..52a447d9b6ab 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -120,7 +120,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
120 goto out; 120 goto out;
121 121
122 file = do_filp_open(AT_FDCWD, tmp, 122 file = do_filp_open(AT_FDCWD, tmp,
123 O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, 123 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
124 MAY_READ | MAY_EXEC | MAY_OPEN); 124 MAY_READ | MAY_EXEC | MAY_OPEN);
125 putname(tmp); 125 putname(tmp);
126 error = PTR_ERR(file); 126 error = PTR_ERR(file);
@@ -723,7 +723,7 @@ struct file *open_exec(const char *name)
723 int err; 723 int err;
724 724
725 file = do_filp_open(AT_FDCWD, name, 725 file = do_filp_open(AT_FDCWD, name,
726 O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, 726 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
727 MAY_EXEC | MAY_OPEN); 727 MAY_EXEC | MAY_OPEN);
728 if (IS_ERR(file)) 728 if (IS_ERR(file))
729 goto out; 729 goto out;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 42685424817b..a7555238c41a 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1030,7 +1030,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1030 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1030 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
1031 } 1031 }
1032 1032
1033 inode->i_mapping->backing_dev_info = sb->s_bdi;
1034 if (S_ISREG(inode->i_mode)) { 1033 if (S_ISREG(inode->i_mode)) {
1035 inode->i_op = &exofs_file_inode_operations; 1034 inode->i_op = &exofs_file_inode_operations;
1036 inode->i_fop = &exofs_file_operations; 1035 inode->i_fop = &exofs_file_operations;
@@ -1131,7 +1130,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1131 1130
1132 sbi = sb->s_fs_info; 1131 sbi = sb->s_fs_info;
1133 1132
1134 inode->i_mapping->backing_dev_info = sb->s_bdi;
1135 sb->s_dirt = 1; 1133 sb->s_dirt = 1;
1136 inode_init_owner(inode, dir, mode); 1134 inode_init_owner(inode, dir, mode);
1137 inode->i_ino = sbi->s_nextid++; 1135 inode->i_ino = sbi->s_nextid++;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d02830..4d70db110cfc 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page); 272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
273 if (!new_de) 273 if (!new_de)
274 goto out_dir; 274 goto out_dir;
275 inode_inc_link_count(old_inode);
276 err = exofs_set_link(new_dir, new_de, new_page, old_inode); 275 err = exofs_set_link(new_dir, new_de, new_page, old_inode);
277 new_inode->i_ctime = CURRENT_TIME; 276 new_inode->i_ctime = CURRENT_TIME;
278 if (dir_de) 277 if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
286 if (new_dir->i_nlink >= EXOFS_LINK_MAX) 285 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
287 goto out_dir; 286 goto out_dir;
288 } 287 }
289 inode_inc_link_count(old_inode);
290 err = exofs_add_link(new_dentry, old_inode); 288 err = exofs_add_link(new_dentry, old_inode);
291 if (err) { 289 if (err)
292 inode_dec_link_count(old_inode);
293 goto out_dir; 290 goto out_dir;
294 }
295 if (dir_de) 291 if (dir_de)
296 inode_inc_link_count(new_dir); 292 inode_inc_link_count(new_dir);
297 } 293 }
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
299 old_inode->i_ctime = CURRENT_TIME; 295 old_inode->i_ctime = CURRENT_TIME;
300 296
301 exofs_delete_entry(old_de, old_page); 297 exofs_delete_entry(old_de, old_page);
302 inode_dec_link_count(old_inode); 298 mark_inode_dirty(old_inode);
303 299
304 if (dir_de) { 300 if (dir_de) {
305 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir); 301 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d8341d827..adb91855ccd0 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page); 344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
345 if (!new_de) 345 if (!new_de)
346 goto out_dir; 346 goto out_dir;
347 inode_inc_link_count(old_inode);
348 ext2_set_link(new_dir, new_de, new_page, old_inode, 1); 347 ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
349 new_inode->i_ctime = CURRENT_TIME_SEC; 348 new_inode->i_ctime = CURRENT_TIME_SEC;
350 if (dir_de) 349 if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
356 if (new_dir->i_nlink >= EXT2_LINK_MAX) 355 if (new_dir->i_nlink >= EXT2_LINK_MAX)
357 goto out_dir; 356 goto out_dir;
358 } 357 }
359 inode_inc_link_count(old_inode);
360 err = ext2_add_link(new_dentry, old_inode); 358 err = ext2_add_link(new_dentry, old_inode);
361 if (err) { 359 if (err)
362 inode_dec_link_count(old_inode);
363 goto out_dir; 360 goto out_dir;
364 }
365 if (dir_de) 361 if (dir_de)
366 inode_inc_link_count(new_dir); 362 inode_inc_link_count(new_dir);
367 } 363 }
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
369 /* 365 /*
370 * Like most other Unix systems, set the ctime for inodes on a 366 * Like most other Unix systems, set the ctime for inodes on a
371 * rename. 367 * rename.
372 * inode_dec_link_count() will mark the inode dirty.
373 */ 368 */
374 old_inode->i_ctime = CURRENT_TIME_SEC; 369 old_inode->i_ctime = CURRENT_TIME_SEC;
370 mark_inode_dirty(old_inode);
375 371
376 ext2_delete_entry (old_de, old_page); 372 ext2_delete_entry (old_de, old_page);
377 inode_dec_link_count(old_inode);
378 373
379 if (dir_de) { 374 if (dir_de) {
380 if (old_dir != new_dir) 375 if (old_dir != new_dir)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0c8d97b56f34..3aa0b72b3b94 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -848,6 +848,7 @@ struct ext4_inode_info {
848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
849 /* current io_end structure for async DIO write*/ 849 /* current io_end structure for async DIO write*/
850 ext4_io_end_t *cur_aio_dio; 850 ext4_io_end_t *cur_aio_dio;
851 atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
851 852
852 spinlock_t i_block_reservation_lock; 853 spinlock_t i_block_reservation_lock;
853 854
@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
2119 2120
2120#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 2121#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
2121 2122
2123/* For ioend & aio unwritten conversion wait queues */
2124#define EXT4_WQ_HASH_SZ 37
2125#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
2126 EXT4_WQ_HASH_SZ])
2127#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
2128 EXT4_WQ_HASH_SZ])
2129extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
2130extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
2131
2122#endif /* __KERNEL__ */ 2132#endif /* __KERNEL__ */
2123 2133
2124#endif /* _EXT4_H */ 2134#endif /* _EXT4_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 63a75810b7c3..ccce8a7e94ed 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3174 * that this IO needs to convertion to written when IO is 3174 * that this IO needs to convertion to written when IO is
3175 * completed 3175 * completed
3176 */ 3176 */
3177 if (io) 3177 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3178 io->flag = EXT4_IO_END_UNWRITTEN; 3178 io->flag = EXT4_IO_END_UNWRITTEN;
3179 else 3179 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3180 } else
3180 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3181 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3181 if (ext4_should_dioread_nolock(inode)) 3182 if (ext4_should_dioread_nolock(inode))
3182 map->m_flags |= EXT4_MAP_UNINIT; 3183 map->m_flags |= EXT4_MAP_UNINIT;
@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3463 * that we need to perform convertion when IO is done. 3464 * that we need to perform convertion when IO is done.
3464 */ 3465 */
3465 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3466 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3466 if (io) 3467 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3467 io->flag = EXT4_IO_END_UNWRITTEN; 3468 io->flag = EXT4_IO_END_UNWRITTEN;
3468 else 3469 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3470 } else
3469 ext4_set_inode_state(inode, 3471 ext4_set_inode_state(inode,
3470 EXT4_STATE_DIO_UNWRITTEN); 3472 EXT4_STATE_DIO_UNWRITTEN);
3471 } 3473 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2e8322c8aa88..7b80d543b89e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
55 return 0; 55 return 0;
56} 56}
57 57
58static void ext4_aiodio_wait(struct inode *inode)
59{
60 wait_queue_head_t *wq = ext4_ioend_wq(inode);
61
62 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
63}
64
65/*
66 * This tests whether the IO in question is block-aligned or not.
67 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
68 * are converted to written only after the IO is complete. Until they are
69 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
70 * it needs to zero out portions of the start and/or end block. If 2 AIO
71 * threads are at work on the same unwritten block, they must be synchronized
72 * or one thread will zero the other's data, causing corruption.
73 */
74static int
75ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
76 unsigned long nr_segs, loff_t pos)
77{
78 struct super_block *sb = inode->i_sb;
79 int blockmask = sb->s_blocksize - 1;
80 size_t count = iov_length(iov, nr_segs);
81 loff_t final_size = pos + count;
82
83 if (pos >= inode->i_size)
84 return 0;
85
86 if ((pos & blockmask) || (final_size & blockmask))
87 return 1;
88
89 return 0;
90}
91
58static ssize_t 92static ssize_t
59ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 93ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
60 unsigned long nr_segs, loff_t pos) 94 unsigned long nr_segs, loff_t pos)
61{ 95{
62 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 96 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
97 int unaligned_aio = 0;
98 int ret;
63 99
64 /* 100 /*
65 * If we have encountered a bitmap-format file, the size limit 101 * If we have encountered a bitmap-format file, the size limit
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
78 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, 114 nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
79 sbi->s_bitmap_maxbytes - pos); 115 sbi->s_bitmap_maxbytes - pos);
80 } 116 }
117 } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
118 !is_sync_kiocb(iocb))) {
119 unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
81 } 120 }
82 121
83 return generic_file_aio_write(iocb, iov, nr_segs, pos); 122 /* Unaligned direct AIO must be serialized; see comment above */
123 if (unaligned_aio) {
124 static unsigned long unaligned_warn_time;
125
126 /* Warn about this once per day */
127 if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
128 ext4_msg(inode->i_sb, KERN_WARNING,
129 "Unaligned AIO/DIO on inode %ld by %s; "
130 "performance will be poor.",
131 inode->i_ino, current->comm);
132 mutex_lock(ext4_aio_mutex(inode));
133 ext4_aiodio_wait(inode);
134 }
135
136 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
137
138 if (unaligned_aio)
139 mutex_unlock(ext4_aio_mutex(inode));
140
141 return ret;
84} 142}
85 143
86static const struct vm_operations_struct ext4_file_vm_ops = { 144static const struct vm_operations_struct ext4_file_vm_ops = {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 851f49b2f9d2..d1fe09aea73d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep;
342/* We create slab caches for groupinfo data structures based on the 342/* We create slab caches for groupinfo data structures based on the
343 * superblock block size. There will be one per mounted filesystem for 343 * superblock block size. There will be one per mounted filesystem for
344 * each unique s_blocksize_bits */ 344 * each unique s_blocksize_bits */
345#define NR_GRPINFO_CACHES \ 345#define NR_GRPINFO_CACHES 8
346 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; 346static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348 347
348static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
349 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
350 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
351 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
352};
353
349static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 354static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
350 ext4_group_t group); 355 ext4_group_t group);
351static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 356static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -2414,6 +2419,55 @@ err_freesgi:
2414 return -ENOMEM; 2419 return -ENOMEM;
2415} 2420}
2416 2421
2422static void ext4_groupinfo_destroy_slabs(void)
2423{
2424 int i;
2425
2426 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2427 if (ext4_groupinfo_caches[i])
2428 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2429 ext4_groupinfo_caches[i] = NULL;
2430 }
2431}
2432
2433static int ext4_groupinfo_create_slab(size_t size)
2434{
2435 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2436 int slab_size;
2437 int blocksize_bits = order_base_2(size);
2438 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2439 struct kmem_cache *cachep;
2440
2441 if (cache_index >= NR_GRPINFO_CACHES)
2442 return -EINVAL;
2443
2444 if (unlikely(cache_index < 0))
2445 cache_index = 0;
2446
2447 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2448 if (ext4_groupinfo_caches[cache_index]) {
2449 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2450 return 0; /* Already created */
2451 }
2452
2453 slab_size = offsetof(struct ext4_group_info,
2454 bb_counters[blocksize_bits + 2]);
2455
2456 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2457 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2458 NULL);
2459
2460 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2461 if (!cachep) {
2462 printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
2463 return -ENOMEM;
2464 }
2465
2466 ext4_groupinfo_caches[cache_index] = cachep;
2467
2468 return 0;
2469}
2470
2417int ext4_mb_init(struct super_block *sb, int needs_recovery) 2471int ext4_mb_init(struct super_block *sb, int needs_recovery)
2418{ 2472{
2419 struct ext4_sb_info *sbi = EXT4_SB(sb); 2473 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2421 unsigned offset; 2475 unsigned offset;
2422 unsigned max; 2476 unsigned max;
2423 int ret; 2477 int ret;
2424 int cache_index;
2425 struct kmem_cache *cachep;
2426 char *namep = NULL;
2427 2478
2428 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); 2479 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2429 2480
@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2440 goto out; 2491 goto out;
2441 } 2492 }
2442 2493
2443 cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; 2494 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2444 cachep = ext4_groupinfo_caches[cache_index]; 2495 if (ret < 0)
2445 if (!cachep) { 2496 goto out;
2446 char name[32];
2447 int len = offsetof(struct ext4_group_info,
2448 bb_counters[sb->s_blocksize_bits + 2]);
2449
2450 sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
2451 namep = kstrdup(name, GFP_KERNEL);
2452 if (!namep) {
2453 ret = -ENOMEM;
2454 goto out;
2455 }
2456
2457 /* Need to free the kmem_cache_name() when we
2458 * destroy the slab */
2459 cachep = kmem_cache_create(namep, len, 0,
2460 SLAB_RECLAIM_ACCOUNT, NULL);
2461 if (!cachep) {
2462 ret = -ENOMEM;
2463 goto out;
2464 }
2465 ext4_groupinfo_caches[cache_index] = cachep;
2466 }
2467 2497
2468 /* order 0 is regular bitmap */ 2498 /* order 0 is regular bitmap */
2469 sbi->s_mb_maxs[0] = sb->s_blocksize << 3; 2499 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
@@ -2520,7 +2550,6 @@ out:
2520 if (ret) { 2550 if (ret) {
2521 kfree(sbi->s_mb_offsets); 2551 kfree(sbi->s_mb_offsets);
2522 kfree(sbi->s_mb_maxs); 2552 kfree(sbi->s_mb_maxs);
2523 kfree(namep);
2524 } 2553 }
2525 return ret; 2554 return ret;
2526} 2555}
@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void)
2734 2763
2735void ext4_exit_mballoc(void) 2764void ext4_exit_mballoc(void)
2736{ 2765{
2737 int i;
2738 /* 2766 /*
2739 * Wait for completion of call_rcu()'s on ext4_pspace_cachep 2767 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
2740 * before destroying the slab cache. 2768 * before destroying the slab cache.
@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void)
2743 kmem_cache_destroy(ext4_pspace_cachep); 2771 kmem_cache_destroy(ext4_pspace_cachep);
2744 kmem_cache_destroy(ext4_ac_cachep); 2772 kmem_cache_destroy(ext4_ac_cachep);
2745 kmem_cache_destroy(ext4_free_ext_cachep); 2773 kmem_cache_destroy(ext4_free_ext_cachep);
2746 2774 ext4_groupinfo_destroy_slabs();
2747 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2748 struct kmem_cache *cachep = ext4_groupinfo_caches[i];
2749 if (cachep) {
2750 char *name = (char *)kmem_cache_name(cachep);
2751 kmem_cache_destroy(cachep);
2752 kfree(name);
2753 }
2754 }
2755 ext4_remove_debugfs_entry(); 2775 ext4_remove_debugfs_entry();
2756} 2776}
2757 2777
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7270dcfca92a..955cc309142f 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,14 +32,8 @@
32 32
33static struct kmem_cache *io_page_cachep, *io_end_cachep; 33static struct kmem_cache *io_page_cachep, *io_end_cachep;
34 34
35#define WQ_HASH_SZ 37
36#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
37static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
38
39int __init ext4_init_pageio(void) 35int __init ext4_init_pageio(void)
40{ 36{
41 int i;
42
43 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); 37 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
44 if (io_page_cachep == NULL) 38 if (io_page_cachep == NULL)
45 return -ENOMEM; 39 return -ENOMEM;
@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void)
48 kmem_cache_destroy(io_page_cachep); 42 kmem_cache_destroy(io_page_cachep);
49 return -ENOMEM; 43 return -ENOMEM;
50 } 44 }
51 for (i = 0; i < WQ_HASH_SZ; i++)
52 init_waitqueue_head(&ioend_wq[i]);
53
54 return 0; 45 return 0;
55} 46}
56 47
@@ -62,7 +53,7 @@ void ext4_exit_pageio(void)
62 53
63void ext4_ioend_wait(struct inode *inode) 54void ext4_ioend_wait(struct inode *inode)
64{ 55{
65 wait_queue_head_t *wq = to_ioend_wq(inode); 56 wait_queue_head_t *wq = ext4_ioend_wq(inode);
66 57
67 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); 58 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
68} 59}
@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
87 for (i = 0; i < io->num_io_pages; i++) 78 for (i = 0; i < io->num_io_pages; i++)
88 put_io_page(io->pages[i]); 79 put_io_page(io->pages[i]);
89 io->num_io_pages = 0; 80 io->num_io_pages = 0;
90 wq = to_ioend_wq(io->inode); 81 wq = ext4_ioend_wq(io->inode);
91 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && 82 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
92 waitqueue_active(wq)) 83 waitqueue_active(wq))
93 wake_up_all(wq); 84 wake_up_all(wq);
@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
102 struct inode *inode = io->inode; 93 struct inode *inode = io->inode;
103 loff_t offset = io->offset; 94 loff_t offset = io->offset;
104 ssize_t size = io->size; 95 ssize_t size = io->size;
96 wait_queue_head_t *wq;
105 int ret = 0; 97 int ret = 0;
106 98
107 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," 99 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
126 if (io->iocb) 118 if (io->iocb)
127 aio_complete(io->iocb, io->result, 0); 119 aio_complete(io->iocb, io->result, 0);
128 /* clear the DIO AIO unwritten flag */ 120 /* clear the DIO AIO unwritten flag */
129 io->flag &= ~EXT4_IO_END_UNWRITTEN; 121 if (io->flag & EXT4_IO_END_UNWRITTEN) {
122 io->flag &= ~EXT4_IO_END_UNWRITTEN;
123 /* Wake up anyone waiting on unwritten extent conversion */
124 wq = ext4_ioend_wq(io->inode);
125 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
126 waitqueue_active(wq)) {
127 wake_up_all(wq);
128 }
129 }
130
130 return ret; 131 return ret;
131} 132}
132 133
@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error)
190 struct inode *inode; 191 struct inode *inode;
191 unsigned long flags; 192 unsigned long flags;
192 int i; 193 int i;
194 sector_t bi_sector = bio->bi_sector;
193 195
194 BUG_ON(!io_end); 196 BUG_ON(!io_end);
195 bio->bi_private = NULL; 197 bio->bi_private = NULL;
@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error)
207 if (error) 209 if (error)
208 SetPageError(page); 210 SetPageError(page);
209 BUG_ON(!head); 211 BUG_ON(!head);
210 if (head->b_size == PAGE_CACHE_SIZE) 212 if (head->b_size != PAGE_CACHE_SIZE) {
211 clear_buffer_dirty(head);
212 else {
213 loff_t offset; 213 loff_t offset;
214 loff_t io_end_offset = io_end->offset + io_end->size; 214 loff_t io_end_offset = io_end->offset + io_end->size;
215 215
@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error)
221 if (error) 221 if (error)
222 buffer_io_error(bh); 222 buffer_io_error(bh);
223 223
224 clear_buffer_dirty(bh);
225 } 224 }
226 if (buffer_delay(bh)) 225 if (buffer_delay(bh))
227 partial_write = 1; 226 partial_write = 1;
@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error)
257 (unsigned long long) io_end->offset, 256 (unsigned long long) io_end->offset,
258 (long) io_end->size, 257 (long) io_end->size,
259 (unsigned long long) 258 (unsigned long long)
260 bio->bi_sector >> (inode->i_blkbits - 9)); 259 bi_sector >> (inode->i_blkbits - 9));
261 } 260 }
262 261
263 /* Add the io_end to per-inode completed io list*/ 262 /* Add the io_end to per-inode completed io list*/
@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
380 379
381 blocksize = 1 << inode->i_blkbits; 380 blocksize = 1 << inode->i_blkbits;
382 381
382 BUG_ON(!PageLocked(page));
383 BUG_ON(PageWriteback(page)); 383 BUG_ON(PageWriteback(page));
384 set_page_writeback(page); 384 set_page_writeback(page);
385 ClearPageError(page); 385 ClearPageError(page);
@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
397 for (bh = head = page_buffers(page), block_start = 0; 397 for (bh = head = page_buffers(page), block_start = 0;
398 bh != head || !block_start; 398 bh != head || !block_start;
399 block_start = block_end, bh = bh->b_this_page) { 399 block_start = block_end, bh = bh->b_this_page) {
400
400 block_end = block_start + blocksize; 401 block_end = block_start + blocksize;
401 if (block_start >= len) { 402 if (block_start >= len) {
402 clear_buffer_dirty(bh); 403 clear_buffer_dirty(bh);
403 set_buffer_uptodate(bh); 404 set_buffer_uptodate(bh);
404 continue; 405 continue;
405 } 406 }
407 clear_buffer_dirty(bh);
406 ret = io_submit_add_bh(io, io_page, inode, wbc, bh); 408 ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
407 if (ret) { 409 if (ret) {
408 /* 410 /*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 48ce561fafac..f6a318f836b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
77 const char *dev_name, void *data); 77 const char *dev_name, void *data);
78static void ext4_destroy_lazyinit_thread(void); 78static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb); 79static void ext4_unregister_li_request(struct super_block *sb);
80static void ext4_clear_request_list(void);
80 81
81#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 82#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
82static struct file_system_type ext3_fs_type = { 83static struct file_system_type ext3_fs_type = {
@@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
832 ei->i_sync_tid = 0; 833 ei->i_sync_tid = 0;
833 ei->i_datasync_tid = 0; 834 ei->i_datasync_tid = 0;
834 atomic_set(&ei->i_ioend_count, 0); 835 atomic_set(&ei->i_ioend_count, 0);
836 atomic_set(&ei->i_aiodio_unwritten, 0);
835 837
836 return &ei->vfs_inode; 838 return &ei->vfs_inode;
837} 839}
@@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb)
2716 mutex_unlock(&ext4_li_info->li_list_mtx); 2718 mutex_unlock(&ext4_li_info->li_list_mtx);
2717} 2719}
2718 2720
2721static struct task_struct *ext4_lazyinit_task;
2722
2719/* 2723/*
2720 * This is the function where ext4lazyinit thread lives. It walks 2724 * This is the function where ext4lazyinit thread lives. It walks
2721 * through the request list searching for next scheduled filesystem. 2725 * through the request list searching for next scheduled filesystem.
@@ -2784,6 +2788,10 @@ cont_thread:
2784 if (time_before(jiffies, next_wakeup)) 2788 if (time_before(jiffies, next_wakeup))
2785 schedule(); 2789 schedule();
2786 finish_wait(&eli->li_wait_daemon, &wait); 2790 finish_wait(&eli->li_wait_daemon, &wait);
2791 if (kthread_should_stop()) {
2792 ext4_clear_request_list();
2793 goto exit_thread;
2794 }
2787 } 2795 }
2788 2796
2789exit_thread: 2797exit_thread:
@@ -2808,6 +2816,7 @@ exit_thread:
2808 wake_up(&eli->li_wait_task); 2816 wake_up(&eli->li_wait_task);
2809 2817
2810 kfree(ext4_li_info); 2818 kfree(ext4_li_info);
2819 ext4_lazyinit_task = NULL;
2811 ext4_li_info = NULL; 2820 ext4_li_info = NULL;
2812 mutex_unlock(&ext4_li_mtx); 2821 mutex_unlock(&ext4_li_mtx);
2813 2822
@@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void)
2830 2839
2831static int ext4_run_lazyinit_thread(void) 2840static int ext4_run_lazyinit_thread(void)
2832{ 2841{
2833 struct task_struct *t; 2842 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
2834 2843 ext4_li_info, "ext4lazyinit");
2835 t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); 2844 if (IS_ERR(ext4_lazyinit_task)) {
2836 if (IS_ERR(t)) { 2845 int err = PTR_ERR(ext4_lazyinit_task);
2837 int err = PTR_ERR(t);
2838 ext4_clear_request_list(); 2846 ext4_clear_request_list();
2839 del_timer_sync(&ext4_li_info->li_timer); 2847 del_timer_sync(&ext4_li_info->li_timer);
2840 kfree(ext4_li_info); 2848 kfree(ext4_li_info);
@@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void)
2985 * If thread exited earlier 2993 * If thread exited earlier
2986 * there's nothing to be done. 2994 * there's nothing to be done.
2987 */ 2995 */
2988 if (!ext4_li_info) 2996 if (!ext4_li_info || !ext4_lazyinit_task)
2989 return; 2997 return;
2990 2998
2991 ext4_clear_request_list(); 2999 kthread_stop(ext4_lazyinit_task);
2992
2993 while (ext4_li_info->li_task) {
2994 wake_up(&ext4_li_info->li_wait_daemon);
2995 wait_event(ext4_li_info->li_wait_task,
2996 ext4_li_info->li_task == NULL);
2997 }
2998} 3000}
2999 3001
3000static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3002static int ext4_fill_super(struct super_block *sb, void *data, int silent)
@@ -4768,7 +4770,7 @@ static struct file_system_type ext4_fs_type = {
4768 .fs_flags = FS_REQUIRES_DEV, 4770 .fs_flags = FS_REQUIRES_DEV,
4769}; 4771};
4770 4772
4771int __init ext4_init_feat_adverts(void) 4773static int __init ext4_init_feat_adverts(void)
4772{ 4774{
4773 struct ext4_features *ef; 4775 struct ext4_features *ef;
4774 int ret = -ENOMEM; 4776 int ret = -ENOMEM;
@@ -4792,23 +4794,44 @@ out:
4792 return ret; 4794 return ret;
4793} 4795}
4794 4796
4797static void ext4_exit_feat_adverts(void)
4798{
4799 kobject_put(&ext4_feat->f_kobj);
4800 wait_for_completion(&ext4_feat->f_kobj_unregister);
4801 kfree(ext4_feat);
4802}
4803
4804/* Shared across all ext4 file systems */
4805wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
4806struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
4807
4795static int __init ext4_init_fs(void) 4808static int __init ext4_init_fs(void)
4796{ 4809{
4797 int err; 4810 int i, err;
4798 4811
4799 ext4_check_flag_values(); 4812 ext4_check_flag_values();
4813
4814 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
4815 mutex_init(&ext4__aio_mutex[i]);
4816 init_waitqueue_head(&ext4__ioend_wq[i]);
4817 }
4818
4800 err = ext4_init_pageio(); 4819 err = ext4_init_pageio();
4801 if (err) 4820 if (err)
4802 return err; 4821 return err;
4803 err = ext4_init_system_zone(); 4822 err = ext4_init_system_zone();
4804 if (err) 4823 if (err)
4805 goto out5; 4824 goto out7;
4806 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4825 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
4807 if (!ext4_kset) 4826 if (!ext4_kset)
4808 goto out4; 4827 goto out6;
4809 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4828 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4829 if (!ext4_proc_root)
4830 goto out5;
4810 4831
4811 err = ext4_init_feat_adverts(); 4832 err = ext4_init_feat_adverts();
4833 if (err)
4834 goto out4;
4812 4835
4813 err = ext4_init_mballoc(); 4836 err = ext4_init_mballoc();
4814 if (err) 4837 if (err)
@@ -4838,12 +4861,14 @@ out1:
4838out2: 4861out2:
4839 ext4_exit_mballoc(); 4862 ext4_exit_mballoc();
4840out3: 4863out3:
4841 kfree(ext4_feat); 4864 ext4_exit_feat_adverts();
4865out4:
4842 remove_proc_entry("fs/ext4", NULL); 4866 remove_proc_entry("fs/ext4", NULL);
4867out5:
4843 kset_unregister(ext4_kset); 4868 kset_unregister(ext4_kset);
4844out4: 4869out6:
4845 ext4_exit_system_zone(); 4870 ext4_exit_system_zone();
4846out5: 4871out7:
4847 ext4_exit_pageio(); 4872 ext4_exit_pageio();
4848 return err; 4873 return err;
4849} 4874}
@@ -4857,6 +4882,7 @@ static void __exit ext4_exit_fs(void)
4857 destroy_inodecache(); 4882 destroy_inodecache();
4858 ext4_exit_xattr(); 4883 ext4_exit_xattr();
4859 ext4_exit_mballoc(); 4884 ext4_exit_mballoc();
4885 ext4_exit_feat_adverts();
4860 remove_proc_entry("fs/ext4", NULL); 4886 remove_proc_entry("fs/ext4", NULL);
4861 kset_unregister(ext4_kset); 4887 kset_unregister(ext4_kset);
4862 ext4_exit_system_zone(); 4888 ext4_exit_system_zone();
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ecc8b3954ed6..cb1026181bdc 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -815,7 +815,7 @@ static int __init fcntl_init(void)
815 __O_SYNC | O_DSYNC | FASYNC | 815 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 816 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 FMODE_EXEC 818 __FMODE_EXEC
819 )); 819 ));
820 820
821 fasync_cache = kmem_cache_create("fasync_cache", 821 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/file_table.c b/fs/file_table.c
index c3e89adf53c0..eb36b6b17e26 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -125,13 +125,13 @@ struct file *get_empty_filp(void)
125 goto fail; 125 goto fail;
126 126
127 percpu_counter_inc(&nr_files); 127 percpu_counter_inc(&nr_files);
128 f->f_cred = get_cred(cred);
128 if (security_file_alloc(f)) 129 if (security_file_alloc(f))
129 goto fail_sec; 130 goto fail_sec;
130 131
131 INIT_LIST_HEAD(&f->f_u.fu_list); 132 INIT_LIST_HEAD(&f->f_u.fu_list);
132 atomic_long_set(&f->f_count, 1); 133 atomic_long_set(&f->f_count, 1);
133 rwlock_init(&f->f_owner.lock); 134 rwlock_init(&f->f_owner.lock);
134 f->f_cred = get_cred(cred);
135 spin_lock_init(&f->f_lock); 135 spin_lock_init(&f->f_lock);
136 eventpoll_init_file(f); 136 eventpoll_init_file(f);
137 /* f->f_version: 0 */ 137 /* f->f_version: 0 */
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed80..83543b5ff941 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1283 if (err) 1283 if (err)
1284 return err; 1284 return err;
1285 1285
1286 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) 1286 if (attr->ia_valid & ATTR_OPEN) {
1287 return 0; 1287 if (fc->atomic_o_trunc)
1288 return 0;
1289 file = NULL;
1290 }
1288 1291
1289 if (attr->ia_valid & ATTR_SIZE) 1292 if (attr->ia_valid & ATTR_SIZE)
1290 is_truncate = true; 1293 is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c826..9e0832dbb1e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
86 return ff; 86 return ff;
87} 87}
88 88
89static void fuse_release_async(struct work_struct *work)
90{
91 struct fuse_req *req;
92 struct fuse_conn *fc;
93 struct path path;
94
95 req = container_of(work, struct fuse_req, misc.release.work);
96 path = req->misc.release.path;
97 fc = get_fuse_conn(path.dentry->d_inode);
98
99 fuse_put_request(fc, req);
100 path_put(&path);
101}
102
89static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 103static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
90{ 104{
91 path_put(&req->misc.release.path); 105 if (fc->destroy_req) {
106 /*
107 * If this is a fuseblk mount, then it's possible that
108 * releasing the path will result in releasing the
109 * super block and sending the DESTROY request. If
110 * the server is single threaded, this would hang.
111 * For this reason do the path_put() in a separate
112 * thread.
113 */
114 atomic_inc(&req->count);
115 INIT_WORK(&req->misc.release.work, fuse_release_async);
116 schedule_work(&req->misc.release.work);
117 } else {
118 path_put(&req->misc.release.path);
119 }
92} 120}
93 121
94static void fuse_file_put(struct fuse_file *ff) 122static void fuse_file_put(struct fuse_file *ff, bool sync)
95{ 123{
96 if (atomic_dec_and_test(&ff->count)) { 124 if (atomic_dec_and_test(&ff->count)) {
97 struct fuse_req *req = ff->reserved_req; 125 struct fuse_req *req = ff->reserved_req;
98 126
99 req->end = fuse_release_end; 127 if (sync) {
100 fuse_request_send_background(ff->fc, req); 128 fuse_request_send(ff->fc, req);
129 path_put(&req->misc.release.path);
130 fuse_put_request(ff->fc, req);
131 } else {
132 req->end = fuse_release_end;
133 fuse_request_send_background(ff->fc, req);
134 }
101 kfree(ff); 135 kfree(ff);
102 } 136 }
103} 137}
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
219 * Normally this will send the RELEASE request, however if 253 * Normally this will send the RELEASE request, however if
220 * some asynchronous READ or WRITE requests are outstanding, 254 * some asynchronous READ or WRITE requests are outstanding,
221 * the sending will be delayed. 255 * the sending will be delayed.
256 *
257 * Make the release synchronous if this is a fuseblk mount,
258 * synchronous RELEASE is allowed (and desirable) in this case
259 * because the server can be trusted not to screw up.
222 */ 260 */
223 fuse_file_put(ff); 261 fuse_file_put(ff, ff->fc->destroy_req != NULL);
224} 262}
225 263
226static int fuse_open(struct inode *inode, struct file *file) 264static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
558 page_cache_release(page); 596 page_cache_release(page);
559 } 597 }
560 if (req->ff) 598 if (req->ff)
561 fuse_file_put(req->ff); 599 fuse_file_put(req->ff, false);
562} 600}
563 601
564static void fuse_send_readpages(struct fuse_req *req, struct file *file) 602static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1137static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) 1175static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1138{ 1176{
1139 __free_page(req->pages[0]); 1177 __free_page(req->pages[0]);
1140 fuse_file_put(req->ff); 1178 fuse_file_put(req->ff, false);
1141} 1179}
1142 1180
1143static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) 1181static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e9..d4286947bc2c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22#include <linux/rbtree.h> 22#include <linux/rbtree.h>
23#include <linux/poll.h> 23#include <linux/poll.h>
24#include <linux/workqueue.h>
24 25
25/** Max number of pages that can be used in a single read request */ 26/** Max number of pages that can be used in a single read request */
26#define FUSE_MAX_PAGES_PER_REQ 32 27#define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
262 /** Data for asynchronous requests */ 263 /** Data for asynchronous requests */
263 union { 264 union {
264 struct { 265 struct {
265 struct fuse_release_in in; 266 union {
267 struct fuse_release_in in;
268 struct work_struct work;
269 };
266 struct path path; 270 struct path path;
267 } release; 271 } release;
268 struct fuse_init_in init_in; 272 struct fuse_init_in init_in;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 08a8beb152e6..7cd9a5a68d59 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1779,11 +1779,11 @@ int __init gfs2_glock_init(void)
1779#endif 1779#endif
1780 1780
1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1782 WQ_HIGHPRI | WQ_FREEZEABLE, 0); 1782 WQ_HIGHPRI | WQ_FREEZABLE, 0);
1783 if (IS_ERR(glock_workqueue)) 1783 if (IS_ERR(glock_workqueue))
1784 return PTR_ERR(glock_workqueue); 1784 return PTR_ERR(glock_workqueue);
1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", 1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
1786 WQ_MEM_RECLAIM | WQ_FREEZEABLE, 1786 WQ_MEM_RECLAIM | WQ_FREEZABLE,
1787 0); 1787 0);
1788 if (IS_ERR(gfs2_delete_workqueue)) { 1788 if (IS_ERR(gfs2_delete_workqueue)) {
1789 destroy_workqueue(glock_workqueue); 1789 destroy_workqueue(glock_workqueue);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index ebef7ab6e17e..72c31a315d96 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -59,14 +59,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
59 struct address_space *mapping = (struct address_space *)(gl + 1); 59 struct address_space *mapping = (struct address_space *)(gl + 1);
60 60
61 gfs2_init_glock_once(gl); 61 gfs2_init_glock_once(gl);
62 memset(mapping, 0, sizeof(*mapping)); 62 address_space_init_once(mapping);
63 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
64 spin_lock_init(&mapping->tree_lock);
65 spin_lock_init(&mapping->i_mmap_lock);
66 INIT_LIST_HEAD(&mapping->private_list);
67 spin_lock_init(&mapping->private_lock);
68 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
69 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
70} 63}
71 64
72/** 65/**
@@ -144,7 +137,7 @@ static int __init init_gfs2_fs(void)
144 137
145 error = -ENOMEM; 138 error = -ENOMEM;
146 gfs_recovery_wq = alloc_workqueue("gfs_recovery", 139 gfs_recovery_wq = alloc_workqueue("gfs_recovery",
147 WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); 140 WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
148 if (!gfs_recovery_wq) 141 if (!gfs_recovery_wq)
149 goto fail_wq; 142 goto fail_wq;
150 143
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aaa2237..b4d70b13be92 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
238} 238}
239 239
240/* 240/*
241 * hfs_unlink() 241 * hfs_remove()
242 * 242 *
243 * This is the unlink() entry in the inode_operations structure for 243 * This serves as both unlink() and rmdir() in the inode_operations
244 * regular HFS directories. The purpose is to delete an existing 244 * structure for regular HFS directories. The purpose is to delete
245 * file, given the inode for the parent directory and the name 245 * an existing child, given the inode for the parent directory and
246 * (and its length) of the existing file. 246 * the name (and its length) of the existing directory.
247 */
248static int hfs_unlink(struct inode *dir, struct dentry *dentry)
249{
250 struct inode *inode;
251 int res;
252
253 inode = dentry->d_inode;
254 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
255 if (res)
256 return res;
257
258 drop_nlink(inode);
259 hfs_delete_inode(inode);
260 inode->i_ctime = CURRENT_TIME_SEC;
261 mark_inode_dirty(inode);
262
263 return res;
264}
265
266/*
267 * hfs_rmdir()
268 * 247 *
269 * This is the rmdir() entry in the inode_operations structure for 248 * HFS does not have hardlinks, so both rmdir and unlink set the
270 * regular HFS directories. The purpose is to delete an existing 249 * link count to 0. The only difference is the emptiness check.
271 * directory, given the inode for the parent directory and the name
272 * (and its length) of the existing directory.
273 */ 250 */
274static int hfs_rmdir(struct inode *dir, struct dentry *dentry) 251static int hfs_remove(struct inode *dir, struct dentry *dentry)
275{ 252{
276 struct inode *inode; 253 struct inode *inode = dentry->d_inode;
277 int res; 254 int res;
278 255
279 inode = dentry->d_inode; 256 if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
280 if (inode->i_size != 2)
281 return -ENOTEMPTY; 257 return -ENOTEMPTY;
282 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); 258 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
283 if (res) 259 if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
307 283
308 /* Unlink destination if it already exists */ 284 /* Unlink destination if it already exists */
309 if (new_dentry->d_inode) { 285 if (new_dentry->d_inode) {
310 res = hfs_unlink(new_dir, new_dentry); 286 res = hfs_remove(new_dir, new_dentry);
311 if (res) 287 if (res)
312 return res; 288 return res;
313 } 289 }
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
332const struct inode_operations hfs_dir_inode_operations = { 308const struct inode_operations hfs_dir_inode_operations = {
333 .create = hfs_create, 309 .create = hfs_create,
334 .lookup = hfs_lookup, 310 .lookup = hfs_lookup,
335 .unlink = hfs_unlink, 311 .unlink = hfs_remove,
336 .mkdir = hfs_mkdir, 312 .mkdir = hfs_mkdir,
337 .rmdir = hfs_rmdir, 313 .rmdir = hfs_remove,
338 .rename = hfs_rename, 314 .rename = hfs_rename,
339 .setattr = hfs_inode_setattr, 315 .setattr = hfs_inode_setattr,
340}; 316};
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 52a0bcaa7b6d..b1991a2a08e0 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -397,8 +397,8 @@ int hfsplus_file_extend(struct inode *inode)
397 u32 start, len, goal; 397 u32 start, len, goal;
398 int res; 398 int res;
399 399
400 if (sbi->total_blocks - sbi->free_blocks + 8 > 400 if (sbi->alloc_file->i_size * 8 <
401 sbi->alloc_file->i_size * 8) { 401 sbi->total_blocks - sbi->free_blocks + 8) {
402 /* extend alloc file */ 402 /* extend alloc file */
403 printk(KERN_ERR "hfs: extend alloc file! " 403 printk(KERN_ERR "hfs: extend alloc file! "
404 "(%llu,%u,%u)\n", 404 "(%llu,%u,%u)\n",
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index d66ad113b1cc..40ad88c12c64 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -134,7 +134,7 @@ int hfs_part_find(struct super_block *sb,
134 res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, 134 res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK,
135 data, READ); 135 data, READ);
136 if (res) 136 if (res)
137 return res; 137 goto out;
138 138
139 switch (be16_to_cpu(*((__be16 *)data))) { 139 switch (be16_to_cpu(*((__be16 *)data))) {
140 case HFS_OLD_PMAP_MAGIC: 140 case HFS_OLD_PMAP_MAGIC:
@@ -147,7 +147,7 @@ int hfs_part_find(struct super_block *sb,
147 res = -ENOENT; 147 res = -ENOENT;
148 break; 148 break;
149 } 149 }
150 150out:
151 kfree(data); 151 kfree(data);
152 return res; 152 return res;
153} 153}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a3b4795f43c..b49b55584c84 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -338,20 +338,22 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
338 struct inode *root, *inode; 338 struct inode *root, *inode;
339 struct qstr str; 339 struct qstr str;
340 struct nls_table *nls = NULL; 340 struct nls_table *nls = NULL;
341 int err = -EINVAL; 341 int err;
342 342
343 err = -EINVAL;
343 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 344 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
344 if (!sbi) 345 if (!sbi)
345 return -ENOMEM; 346 goto out;
346 347
347 sb->s_fs_info = sbi; 348 sb->s_fs_info = sbi;
348 mutex_init(&sbi->alloc_mutex); 349 mutex_init(&sbi->alloc_mutex);
349 mutex_init(&sbi->vh_mutex); 350 mutex_init(&sbi->vh_mutex);
350 hfsplus_fill_defaults(sbi); 351 hfsplus_fill_defaults(sbi);
352
353 err = -EINVAL;
351 if (!hfsplus_parse_options(data, sbi)) { 354 if (!hfsplus_parse_options(data, sbi)) {
352 printk(KERN_ERR "hfs: unable to parse mount options\n"); 355 printk(KERN_ERR "hfs: unable to parse mount options\n");
353 err = -EINVAL; 356 goto out_unload_nls;
354 goto cleanup;
355 } 357 }
356 358
357 /* temporarily use utf8 to correctly find the hidden dir below */ 359 /* temporarily use utf8 to correctly find the hidden dir below */
@@ -359,16 +361,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
359 sbi->nls = load_nls("utf8"); 361 sbi->nls = load_nls("utf8");
360 if (!sbi->nls) { 362 if (!sbi->nls) {
361 printk(KERN_ERR "hfs: unable to load nls for utf8\n"); 363 printk(KERN_ERR "hfs: unable to load nls for utf8\n");
362 err = -EINVAL; 364 goto out_unload_nls;
363 goto cleanup;
364 } 365 }
365 366
366 /* Grab the volume header */ 367 /* Grab the volume header */
367 if (hfsplus_read_wrapper(sb)) { 368 if (hfsplus_read_wrapper(sb)) {
368 if (!silent) 369 if (!silent)
369 printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n"); 370 printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n");
370 err = -EINVAL; 371 goto out_unload_nls;
371 goto cleanup;
372 } 372 }
373 vhdr = sbi->s_vhdr; 373 vhdr = sbi->s_vhdr;
374 374
@@ -377,7 +377,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
377 if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION || 377 if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION ||
378 be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) { 378 be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) {
379 printk(KERN_ERR "hfs: wrong filesystem version\n"); 379 printk(KERN_ERR "hfs: wrong filesystem version\n");
380 goto cleanup; 380 goto out_free_vhdr;
381 } 381 }
382 sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); 382 sbi->total_blocks = be32_to_cpu(vhdr->total_blocks);
383 sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); 383 sbi->free_blocks = be32_to_cpu(vhdr->free_blocks);
@@ -421,19 +421,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
421 sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); 421 sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
422 if (!sbi->ext_tree) { 422 if (!sbi->ext_tree) {
423 printk(KERN_ERR "hfs: failed to load extents file\n"); 423 printk(KERN_ERR "hfs: failed to load extents file\n");
424 goto cleanup; 424 goto out_free_vhdr;
425 } 425 }
426 sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); 426 sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID);
427 if (!sbi->cat_tree) { 427 if (!sbi->cat_tree) {
428 printk(KERN_ERR "hfs: failed to load catalog file\n"); 428 printk(KERN_ERR "hfs: failed to load catalog file\n");
429 goto cleanup; 429 goto out_close_ext_tree;
430 } 430 }
431 431
432 inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); 432 inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID);
433 if (IS_ERR(inode)) { 433 if (IS_ERR(inode)) {
434 printk(KERN_ERR "hfs: failed to load allocation file\n"); 434 printk(KERN_ERR "hfs: failed to load allocation file\n");
435 err = PTR_ERR(inode); 435 err = PTR_ERR(inode);
436 goto cleanup; 436 goto out_close_cat_tree;
437 } 437 }
438 sbi->alloc_file = inode; 438 sbi->alloc_file = inode;
439 439
@@ -442,14 +442,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
442 if (IS_ERR(root)) { 442 if (IS_ERR(root)) {
443 printk(KERN_ERR "hfs: failed to load root directory\n"); 443 printk(KERN_ERR "hfs: failed to load root directory\n");
444 err = PTR_ERR(root); 444 err = PTR_ERR(root);
445 goto cleanup; 445 goto out_put_alloc_file;
446 }
447 sb->s_d_op = &hfsplus_dentry_operations;
448 sb->s_root = d_alloc_root(root);
449 if (!sb->s_root) {
450 iput(root);
451 err = -ENOMEM;
452 goto cleanup;
453 } 446 }
454 447
455 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; 448 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
@@ -459,46 +452,69 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
459 if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { 452 if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
460 hfs_find_exit(&fd); 453 hfs_find_exit(&fd);
461 if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) 454 if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
462 goto cleanup; 455 goto out_put_root;
463 inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id)); 456 inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id));
464 if (IS_ERR(inode)) { 457 if (IS_ERR(inode)) {
465 err = PTR_ERR(inode); 458 err = PTR_ERR(inode);
466 goto cleanup; 459 goto out_put_root;
467 } 460 }
468 sbi->hidden_dir = inode; 461 sbi->hidden_dir = inode;
469 } else 462 } else
470 hfs_find_exit(&fd); 463 hfs_find_exit(&fd);
471 464
472 if (sb->s_flags & MS_RDONLY) 465 if (!(sb->s_flags & MS_RDONLY)) {
473 goto out; 466 /*
467 * H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused
468 * all three are registered with Apple for our use
469 */
470 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
471 vhdr->modify_date = hfsp_now2mt();
472 be32_add_cpu(&vhdr->write_count, 1);
473 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
474 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
475 hfsplus_sync_fs(sb, 1);
474 476
475 /* H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused 477 if (!sbi->hidden_dir) {
476 * all three are registered with Apple for our use 478 mutex_lock(&sbi->vh_mutex);
477 */ 479 sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
478 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); 480 hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str,
479 vhdr->modify_date = hfsp_now2mt(); 481 sbi->hidden_dir);
480 be32_add_cpu(&vhdr->write_count, 1); 482 mutex_unlock(&sbi->vh_mutex);
481 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); 483
482 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); 484 hfsplus_mark_inode_dirty(sbi->hidden_dir,
483 hfsplus_sync_fs(sb, 1); 485 HFSPLUS_I_CAT_DIRTY);
484 486 }
485 if (!sbi->hidden_dir) {
486 mutex_lock(&sbi->vh_mutex);
487 sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
488 hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode,
489 &str, sbi->hidden_dir);
490 mutex_unlock(&sbi->vh_mutex);
491
492 hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY);
493 } 487 }
494out: 488
489 sb->s_d_op = &hfsplus_dentry_operations;
490 sb->s_root = d_alloc_root(root);
491 if (!sb->s_root) {
492 err = -ENOMEM;
493 goto out_put_hidden_dir;
494 }
495
495 unload_nls(sbi->nls); 496 unload_nls(sbi->nls);
496 sbi->nls = nls; 497 sbi->nls = nls;
497 return 0; 498 return 0;
498 499
499cleanup: 500out_put_hidden_dir:
500 hfsplus_put_super(sb); 501 iput(sbi->hidden_dir);
502out_put_root:
503 iput(sbi->alloc_file);
504out_put_alloc_file:
505 iput(sbi->alloc_file);
506out_close_cat_tree:
507 hfs_btree_close(sbi->cat_tree);
508out_close_ext_tree:
509 hfs_btree_close(sbi->ext_tree);
510out_free_vhdr:
511 kfree(sbi->s_vhdr);
512 kfree(sbi->s_backup_vhdr);
513out_unload_nls:
514 unload_nls(sbi->nls);
501 unload_nls(nls); 515 unload_nls(nls);
516 kfree(sbi);
517out:
502 return err; 518 return err;
503} 519}
504 520
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 196231794f64..3031d81f5f0f 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -167,7 +167,7 @@ reread:
167 break; 167 break;
168 case cpu_to_be16(HFSP_WRAP_MAGIC): 168 case cpu_to_be16(HFSP_WRAP_MAGIC):
169 if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) 169 if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
170 goto out; 170 goto out_free_backup_vhdr;
171 wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; 171 wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
172 part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; 172 part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
173 part_size = wd.embed_count * wd.ablk_size; 173 part_size = wd.embed_count * wd.ablk_size;
@@ -179,7 +179,7 @@ reread:
179 * (should do this only for cdrom/loop though) 179 * (should do this only for cdrom/loop though)
180 */ 180 */
181 if (hfs_part_find(sb, &part_start, &part_size)) 181 if (hfs_part_find(sb, &part_start, &part_size))
182 goto out; 182 goto out_free_backup_vhdr;
183 goto reread; 183 goto reread;
184 } 184 }
185 185
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..0647d80accf6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -295,6 +295,20 @@ static void destroy_inode(struct inode *inode)
295 call_rcu(&inode->i_rcu, i_callback); 295 call_rcu(&inode->i_rcu, i_callback);
296} 296}
297 297
298void address_space_init_once(struct address_space *mapping)
299{
300 memset(mapping, 0, sizeof(*mapping));
301 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
302 spin_lock_init(&mapping->tree_lock);
303 spin_lock_init(&mapping->i_mmap_lock);
304 INIT_LIST_HEAD(&mapping->private_list);
305 spin_lock_init(&mapping->private_lock);
306 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
307 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
308 mutex_init(&mapping->unmap_mutex);
309}
310EXPORT_SYMBOL(address_space_init_once);
311
298/* 312/*
299 * These are initializations that only need to be done 313 * These are initializations that only need to be done
300 * once, because the fields are idempotent across use 314 * once, because the fields are idempotent across use
@@ -308,13 +322,7 @@ void inode_init_once(struct inode *inode)
308 INIT_LIST_HEAD(&inode->i_devices); 322 INIT_LIST_HEAD(&inode->i_devices);
309 INIT_LIST_HEAD(&inode->i_wb_list); 323 INIT_LIST_HEAD(&inode->i_wb_list);
310 INIT_LIST_HEAD(&inode->i_lru); 324 INIT_LIST_HEAD(&inode->i_lru);
311 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 325 address_space_init_once(&inode->i_data);
312 spin_lock_init(&inode->i_data.tree_lock);
313 spin_lock_init(&inode->i_data.i_mmap_lock);
314 INIT_LIST_HEAD(&inode->i_data.private_list);
315 spin_lock_init(&inode->i_data.private_lock);
316 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
317 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
318 i_size_ordered_init(inode); 326 i_size_ordered_init(inode);
319#ifdef CONFIG_FSNOTIFY 327#ifdef CONFIG_FSNOTIFY
320 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 328 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -540,11 +548,14 @@ void evict_inodes(struct super_block *sb)
540/** 548/**
541 * invalidate_inodes - attempt to free all inodes on a superblock 549 * invalidate_inodes - attempt to free all inodes on a superblock
542 * @sb: superblock to operate on 550 * @sb: superblock to operate on
551 * @kill_dirty: flag to guide handling of dirty inodes
543 * 552 *
544 * Attempts to free all inodes for a given superblock. If there were any 553 * Attempts to free all inodes for a given superblock. If there were any
545 * busy inodes return a non-zero value, else zero. 554 * busy inodes return a non-zero value, else zero.
555 * If @kill_dirty is set, discard dirty inodes too, otherwise treat
556 * them as busy.
546 */ 557 */
547int invalidate_inodes(struct super_block *sb) 558int invalidate_inodes(struct super_block *sb, bool kill_dirty)
548{ 559{
549 int busy = 0; 560 int busy = 0;
550 struct inode *inode, *next; 561 struct inode *inode, *next;
@@ -556,6 +567,10 @@ int invalidate_inodes(struct super_block *sb)
556 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 567 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
557 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 568 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
558 continue; 569 continue;
570 if (inode->i_state & I_DIRTY && !kill_dirty) {
571 busy = 1;
572 continue;
573 }
559 if (atomic_read(&inode->i_count)) { 574 if (atomic_read(&inode->i_count)) {
560 busy = 1; 575 busy = 1;
561 continue; 576 continue;
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b1247..9b976b57d7fe 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -112,4 +112,4 @@ extern void release_open_intent(struct nameidata *);
112 */ 112 */
113extern int get_nr_dirty_inodes(void); 113extern int get_nr_dirty_inodes(void);
114extern void evict_inodes(struct super_block *); 114extern void evict_inodes(struct super_block *);
115extern int invalidate_inodes(struct super_block *); 115extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index a59635e295fa..1eebeb72b202 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -273,6 +273,13 @@ int __generic_block_fiemap(struct inode *inode,
273 len = isize; 273 len = isize;
274 } 274 }
275 275
276 /*
277 * Some filesystems can't deal with being asked to map less than
278 * blocksize, so make sure our len is at least block length.
279 */
280 if (logical_to_blk(inode, len) == 0)
281 len = blk_to_logical(inode, 1);
282
276 start_blk = logical_to_blk(inode, start); 283 start_blk = logical_to_blk(inode, start);
277 last_blk = logical_to_blk(inode, start + len - 1); 284 last_blk = logical_to_blk(inode, start + len - 1);
278 285
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9e4686900f18..97e73469b2c4 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal)
473} 473}
474 474
475/* 475/*
476 * Called under j_state_lock. Returns true if a transaction commit was started. 476 * Called with j_state_lock locked for writing.
477 * Returns true if a transaction commit was started.
477 */ 478 */
478int __jbd2_log_start_commit(journal_t *journal, tid_t target) 479int __jbd2_log_start_commit(journal_t *journal, tid_t target)
479{ 480{
@@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
520{ 521{
521 transaction_t *transaction = NULL; 522 transaction_t *transaction = NULL;
522 tid_t tid; 523 tid_t tid;
524 int need_to_start = 0;
523 525
524 read_lock(&journal->j_state_lock); 526 read_lock(&journal->j_state_lock);
525 if (journal->j_running_transaction && !current->journal_info) { 527 if (journal->j_running_transaction && !current->journal_info) {
526 transaction = journal->j_running_transaction; 528 transaction = journal->j_running_transaction;
527 __jbd2_log_start_commit(journal, transaction->t_tid); 529 if (!tid_geq(journal->j_commit_request, transaction->t_tid))
530 need_to_start = 1;
528 } else if (journal->j_committing_transaction) 531 } else if (journal->j_committing_transaction)
529 transaction = journal->j_committing_transaction; 532 transaction = journal->j_committing_transaction;
530 533
@@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
535 538
536 tid = transaction->t_tid; 539 tid = transaction->t_tid;
537 read_unlock(&journal->j_state_lock); 540 read_unlock(&journal->j_state_lock);
541 if (need_to_start)
542 jbd2_log_start_commit(journal, tid);
538 jbd2_log_wait_commit(journal, tid); 543 jbd2_log_wait_commit(journal, tid);
539 return 1; 544 return 1;
540} 545}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index faad2bd787c7..1d1191050f99 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction)
117static int start_this_handle(journal_t *journal, handle_t *handle, 117static int start_this_handle(journal_t *journal, handle_t *handle,
118 int gfp_mask) 118 int gfp_mask)
119{ 119{
120 transaction_t *transaction; 120 transaction_t *transaction, *new_transaction = NULL;
121 int needed; 121 tid_t tid;
122 int nblocks = handle->h_buffer_credits; 122 int needed, need_to_start;
123 transaction_t *new_transaction = NULL; 123 int nblocks = handle->h_buffer_credits;
124 124
125 if (nblocks > journal->j_max_transaction_buffers) { 125 if (nblocks > journal->j_max_transaction_buffers) {
126 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 126 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -222,8 +222,11 @@ repeat:
222 atomic_sub(nblocks, &transaction->t_outstanding_credits); 222 atomic_sub(nblocks, &transaction->t_outstanding_credits);
223 prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 223 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
224 TASK_UNINTERRUPTIBLE); 224 TASK_UNINTERRUPTIBLE);
225 __jbd2_log_start_commit(journal, transaction->t_tid); 225 tid = transaction->t_tid;
226 need_to_start = !tid_geq(journal->j_commit_request, tid);
226 read_unlock(&journal->j_state_lock); 227 read_unlock(&journal->j_state_lock);
228 if (need_to_start)
229 jbd2_log_start_commit(journal, tid);
227 schedule(); 230 schedule();
228 finish_wait(&journal->j_wait_transaction_locked, &wait); 231 finish_wait(&journal->j_wait_transaction_locked, &wait);
229 goto repeat; 232 goto repeat;
@@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
442{ 445{
443 transaction_t *transaction = handle->h_transaction; 446 transaction_t *transaction = handle->h_transaction;
444 journal_t *journal = transaction->t_journal; 447 journal_t *journal = transaction->t_journal;
445 int ret; 448 tid_t tid;
449 int need_to_start, ret;
446 450
447 /* If we've had an abort of any type, don't even think about 451 /* If we've had an abort of any type, don't even think about
448 * actually doing the restart! */ 452 * actually doing the restart! */
@@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
465 spin_unlock(&transaction->t_handle_lock); 469 spin_unlock(&transaction->t_handle_lock);
466 470
467 jbd_debug(2, "restarting handle %p\n", handle); 471 jbd_debug(2, "restarting handle %p\n", handle);
468 __jbd2_log_start_commit(journal, transaction->t_tid); 472 tid = transaction->t_tid;
473 need_to_start = !tid_geq(journal->j_commit_request, tid);
469 read_unlock(&journal->j_state_lock); 474 read_unlock(&journal->j_state_lock);
475 if (need_to_start)
476 jbd2_log_start_commit(journal, tid);
470 477
471 lock_map_release(&handle->h_lockdep_map); 478 lock_map_release(&handle->h_lockdep_map);
472 handle->h_buffer_credits = nblocks; 479 handle->h_buffer_credits = nblocks;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337ddfdbf..6e6777f1b4b2 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
213 new_de = minix_find_entry(new_dentry, &new_page); 213 new_de = minix_find_entry(new_dentry, &new_page);
214 if (!new_de) 214 if (!new_de)
215 goto out_dir; 215 goto out_dir;
216 inode_inc_link_count(old_inode);
217 minix_set_link(new_de, new_page, old_inode); 216 minix_set_link(new_de, new_page, old_inode);
218 new_inode->i_ctime = CURRENT_TIME_SEC; 217 new_inode->i_ctime = CURRENT_TIME_SEC;
219 if (dir_de) 218 if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
225 if (new_dir->i_nlink >= info->s_link_max) 224 if (new_dir->i_nlink >= info->s_link_max)
226 goto out_dir; 225 goto out_dir;
227 } 226 }
228 inode_inc_link_count(old_inode);
229 err = minix_add_link(new_dentry, old_inode); 227 err = minix_add_link(new_dentry, old_inode);
230 if (err) { 228 if (err)
231 inode_dec_link_count(old_inode);
232 goto out_dir; 229 goto out_dir;
233 }
234 if (dir_de) 230 if (dir_de)
235 inode_inc_link_count(new_dir); 231 inode_inc_link_count(new_dir);
236 } 232 }
237 233
238 minix_delete_entry(old_de, old_page); 234 minix_delete_entry(old_de, old_page);
239 inode_dec_link_count(old_inode); 235 mark_inode_dirty(old_inode);
240 236
241 if (dir_de) { 237 if (dir_de) {
242 minix_set_link(dir_de, dir_page, new_dir); 238 minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 7d77f24d32a9..0087cf9c2c6b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -455,14 +455,6 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
455 struct fs_struct *fs = current->fs; 455 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 456 struct dentry *parent = nd->path.dentry;
457 457
458 /*
459 * It can be possible to revalidate the dentry that we started
460 * the path walk with. force_reval_path may also revalidate the
461 * dentry already committed to the nameidata.
462 */
463 if (unlikely(parent == dentry))
464 return nameidata_drop_rcu(nd);
465
466 BUG_ON(!(nd->flags & LOOKUP_RCU)); 458 BUG_ON(!(nd->flags & LOOKUP_RCU));
467 if (nd->root.mnt) { 459 if (nd->root.mnt) {
468 spin_lock(&fs->lock); 460 spin_lock(&fs->lock);
@@ -561,39 +553,25 @@ static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
561 */ 553 */
562void release_open_intent(struct nameidata *nd) 554void release_open_intent(struct nameidata *nd)
563{ 555{
564 if (nd->intent.open.file->f_path.dentry == NULL) 556 struct file *file = nd->intent.open.file;
565 put_filp(nd->intent.open.file);
566 else
567 fput(nd->intent.open.file);
568}
569
570/*
571 * Call d_revalidate and handle filesystems that request rcu-walk
572 * to be dropped. This may be called and return in rcu-walk mode,
573 * regardless of success or error. If -ECHILD is returned, the caller
574 * must return -ECHILD back up the path walk stack so path walk may
575 * be restarted in ref-walk mode.
576 */
577static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
578{
579 int status;
580 557
581 status = dentry->d_op->d_revalidate(dentry, nd); 558 if (file && !IS_ERR(file)) {
582 if (status == -ECHILD) { 559 if (file->f_path.dentry == NULL)
583 if (nameidata_dentry_drop_rcu(nd, dentry)) 560 put_filp(file);
584 return status; 561 else
585 status = dentry->d_op->d_revalidate(dentry, nd); 562 fput(file);
586 } 563 }
564}
587 565
588 return status; 566static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
567{
568 return dentry->d_op->d_revalidate(dentry, nd);
589} 569}
590 570
591static inline struct dentry * 571static struct dentry *
592do_revalidate(struct dentry *dentry, struct nameidata *nd) 572do_revalidate(struct dentry *dentry, struct nameidata *nd)
593{ 573{
594 int status; 574 int status = d_revalidate(dentry, nd);
595
596 status = d_revalidate(dentry, nd);
597 if (unlikely(status <= 0)) { 575 if (unlikely(status <= 0)) {
598 /* 576 /*
599 * The dentry failed validation. 577 * The dentry failed validation.
@@ -602,24 +580,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
602 * to return a fail status. 580 * to return a fail status.
603 */ 581 */
604 if (status < 0) { 582 if (status < 0) {
605 /* If we're in rcu-walk, we don't have a ref */ 583 dput(dentry);
606 if (!(nd->flags & LOOKUP_RCU))
607 dput(dentry);
608 dentry = ERR_PTR(status); 584 dentry = ERR_PTR(status);
609 585 } else if (!d_invalidate(dentry)) {
610 } else { 586 dput(dentry);
611 /* Don't d_invalidate in rcu-walk mode */ 587 dentry = NULL;
612 if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
613 return ERR_PTR(-ECHILD);
614 if (!d_invalidate(dentry)) {
615 dput(dentry);
616 dentry = NULL;
617 }
618 } 588 }
619 } 589 }
620 return dentry; 590 return dentry;
621} 591}
622 592
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
623static inline int need_reval_dot(struct dentry *dentry) 616static inline int need_reval_dot(struct dentry *dentry)
624{ 617{
625 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) 618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
@@ -664,9 +657,6 @@ force_reval_path(struct path *path, struct nameidata *nd)
664 return 0; 657 return 0;
665 658
666 if (!status) { 659 if (!status) {
667 /* Don't d_invalidate in rcu-walk mode */
668 if (nameidata_drop_rcu(nd))
669 return -ECHILD;
670 d_invalidate(dentry); 660 d_invalidate(dentry);
671 status = -ESTALE; 661 status = -ESTALE;
672 } 662 }
@@ -773,6 +763,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
773 int error; 763 int error;
774 struct dentry *dentry = link->dentry; 764 struct dentry *dentry = link->dentry;
775 765
766 BUG_ON(nd->flags & LOOKUP_RCU);
767
776 touch_atime(link->mnt, dentry); 768 touch_atime(link->mnt, dentry);
777 nd_set_link(nd, NULL); 769 nd_set_link(nd, NULL);
778 770
@@ -803,10 +795,16 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
803 * Without that kind of total limit, nasty chains of consecutive 795 * Without that kind of total limit, nasty chains of consecutive
804 * symlinks can cause almost arbitrarily long lookups. 796 * symlinks can cause almost arbitrarily long lookups.
805 */ 797 */
806static inline int do_follow_link(struct path *path, struct nameidata *nd) 798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
807{ 799{
808 void *cookie; 800 void *cookie;
809 int err = -ELOOP; 801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
810 if (current->link_count >= MAX_NESTED_LINKS) 808 if (current->link_count >= MAX_NESTED_LINKS)
811 goto loop; 809 goto loop;
812 if (current->total_link_count >= 40) 810 if (current->total_link_count >= 40)
@@ -1251,9 +1249,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1251 return -ECHILD; 1249 return -ECHILD;
1252 1250
1253 nd->seq = seq; 1251 nd->seq = seq;
1254 if (dentry->d_flags & DCACHE_OP_REVALIDATE) 1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1255 goto need_revalidate; 1253 dentry = do_revalidate_rcu(dentry, nd);
1256done2: 1254 if (!dentry)
1255 goto need_lookup;
1256 if (IS_ERR(dentry))
1257 goto fail;
1258 if (!(nd->flags & LOOKUP_RCU))
1259 goto done;
1260 }
1257 path->mnt = mnt; 1261 path->mnt = mnt;
1258 path->dentry = dentry; 1262 path->dentry = dentry;
1259 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1263 if (likely(__follow_mount_rcu(nd, path, inode, false)))
@@ -1266,8 +1270,13 @@ done2:
1266 if (!dentry) 1270 if (!dentry)
1267 goto need_lookup; 1271 goto need_lookup;
1268found: 1272found:
1269 if (dentry->d_flags & DCACHE_OP_REVALIDATE) 1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1270 goto need_revalidate; 1274 dentry = do_revalidate(dentry, nd);
1275 if (!dentry)
1276 goto need_lookup;
1277 if (IS_ERR(dentry))
1278 goto fail;
1279 }
1271done: 1280done:
1272 path->mnt = mnt; 1281 path->mnt = mnt;
1273 path->dentry = dentry; 1282 path->dentry = dentry;
@@ -1309,16 +1318,6 @@ need_lookup:
1309 mutex_unlock(&dir->i_mutex); 1318 mutex_unlock(&dir->i_mutex);
1310 goto found; 1319 goto found;
1311 1320
1312need_revalidate:
1313 dentry = do_revalidate(dentry, nd);
1314 if (!dentry)
1315 goto need_lookup;
1316 if (IS_ERR(dentry))
1317 goto fail;
1318 if (nd->flags & LOOKUP_RCU)
1319 goto done2;
1320 goto done;
1321
1322fail: 1321fail:
1323 return PTR_ERR(dentry); 1322 return PTR_ERR(dentry);
1324} 1323}
@@ -1415,11 +1414,7 @@ exec_again:
1415 goto out_dput; 1414 goto out_dput;
1416 1415
1417 if (inode->i_op->follow_link) { 1416 if (inode->i_op->follow_link) {
1418 /* We commonly drop rcu-walk here */ 1417 err = do_follow_link(inode, &next, nd);
1419 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1420 return -ECHILD;
1421 BUG_ON(inode != next.dentry->d_inode);
1422 err = do_follow_link(&next, nd);
1423 if (err) 1418 if (err)
1424 goto return_err; 1419 goto return_err;
1425 nd->inode = nd->path.dentry->d_inode; 1420 nd->inode = nd->path.dentry->d_inode;
@@ -1463,10 +1458,7 @@ last_component:
1463 break; 1458 break;
1464 if (inode && unlikely(inode->i_op->follow_link) && 1459 if (inode && unlikely(inode->i_op->follow_link) &&
1465 (lookup_flags & LOOKUP_FOLLOW)) { 1460 (lookup_flags & LOOKUP_FOLLOW)) {
1466 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) 1461 err = do_follow_link(inode, &next, nd);
1467 return -ECHILD;
1468 BUG_ON(inode != next.dentry->d_inode);
1469 err = do_follow_link(&next, nd);
1470 if (err) 1462 if (err)
1471 goto return_err; 1463 goto return_err;
1472 nd->inode = nd->path.dentry->d_inode; 1464 nd->inode = nd->path.dentry->d_inode;
@@ -1500,12 +1492,15 @@ return_reval:
1500 * We may need to check the cached dentry for staleness. 1492 * We may need to check the cached dentry for staleness.
1501 */ 1493 */
1502 if (need_reval_dot(nd->path.dentry)) { 1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1503 /* Note: we do not d_invalidate() */ 1497 /* Note: we do not d_invalidate() */
1504 err = d_revalidate(nd->path.dentry, nd); 1498 err = d_revalidate(nd->path.dentry, nd);
1505 if (!err) 1499 if (!err)
1506 err = -ESTALE; 1500 err = -ESTALE;
1507 if (err < 0) 1501 if (err < 0)
1508 break; 1502 break;
1503 return 0;
1509 } 1504 }
1510return_base: 1505return_base:
1511 if (nameidata_drop_rcu_last_maybe(nd)) 1506 if (nameidata_drop_rcu_last_maybe(nd))
@@ -2265,8 +2260,6 @@ static struct file *finish_open(struct nameidata *nd,
2265 return filp; 2260 return filp;
2266 2261
2267exit: 2262exit:
2268 if (!IS_ERR(nd->intent.open.file))
2269 release_open_intent(nd);
2270 path_put(&nd->path); 2263 path_put(&nd->path);
2271 return ERR_PTR(error); 2264 return ERR_PTR(error);
2272} 2265}
@@ -2389,8 +2382,6 @@ exit_mutex_unlock:
2389exit_dput: 2382exit_dput:
2390 path_put_conditional(path, nd); 2383 path_put_conditional(path, nd);
2391exit: 2384exit:
2392 if (!IS_ERR(nd->intent.open.file))
2393 release_open_intent(nd);
2394 path_put(&nd->path); 2385 path_put(&nd->path);
2395 return ERR_PTR(error); 2386 return ERR_PTR(error);
2396} 2387}
@@ -2477,6 +2468,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
2477 } 2468 }
2478 audit_inode(pathname, nd.path.dentry); 2469 audit_inode(pathname, nd.path.dentry);
2479 filp = finish_open(&nd, open_flag, acc_mode); 2470 filp = finish_open(&nd, open_flag, acc_mode);
2471 release_open_intent(&nd);
2480 return filp; 2472 return filp;
2481 2473
2482creat: 2474creat:
@@ -2553,6 +2545,7 @@ out:
2553 path_put(&nd.root); 2545 path_put(&nd.root);
2554 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2546 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
2555 goto reval; 2547 goto reval;
2548 release_open_intent(&nd);
2556 return filp; 2549 return filp;
2557 2550
2558exit_dput: 2551exit_dput:
@@ -2560,8 +2553,6 @@ exit_dput:
2560out_path: 2553out_path:
2561 path_put(&nd.path); 2554 path_put(&nd.path);
2562out_filp: 2555out_filp:
2563 if (!IS_ERR(nd.intent.open.file))
2564 release_open_intent(&nd);
2565 filp = ERR_PTR(error); 2556 filp = ERR_PTR(error);
2566 goto out; 2557 goto out;
2567} 2558}
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b95371696..d1edf26025dc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1244,7 +1244,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1244 */ 1244 */
1245 br_write_lock(vfsmount_lock); 1245 br_write_lock(vfsmount_lock);
1246 if (mnt_get_count(mnt) != 2) { 1246 if (mnt_get_count(mnt) != 2) {
1247 br_write_lock(vfsmount_lock); 1247 br_write_unlock(vfsmount_lock);
1248 return -EBUSY; 1248 return -EBUSY;
1249 } 1249 }
1250 br_write_unlock(vfsmount_lock); 1250 br_write_unlock(vfsmount_lock);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40ab..1ff76acc7e98 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
51#include <linux/sunrpc/bc_xprt.h> 51#include <linux/sunrpc/bc_xprt.h>
52#include <linux/xattr.h> 52#include <linux/xattr.h>
53#include <linux/utsname.h> 53#include <linux/utsname.h>
54#include <linux/mm.h>
54 55
55#include "nfs4_fs.h" 56#include "nfs4_fs.h"
56#include "delegation.h" 57#include "delegation.h"
@@ -3252,6 +3253,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
3252 } 3253 }
3253} 3254}
3254 3255
3256static int buf_to_pages_noslab(const void *buf, size_t buflen,
3257 struct page **pages, unsigned int *pgbase)
3258{
3259 struct page *newpage, **spages;
3260 int rc = 0;
3261 size_t len;
3262 spages = pages;
3263
3264 do {
3265 len = min(PAGE_CACHE_SIZE, buflen);
3266 newpage = alloc_page(GFP_KERNEL);
3267
3268 if (newpage == NULL)
3269 goto unwind;
3270 memcpy(page_address(newpage), buf, len);
3271 buf += len;
3272 buflen -= len;
3273 *pages++ = newpage;
3274 rc++;
3275 } while (buflen != 0);
3276
3277 return rc;
3278
3279unwind:
3280 for(; rc > 0; rc--)
3281 __free_page(spages[rc-1]);
3282 return -ENOMEM;
3283}
3284
3255struct nfs4_cached_acl { 3285struct nfs4_cached_acl {
3256 int cached; 3286 int cached;
3257 size_t len; 3287 size_t len;
@@ -3420,13 +3450,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3420 .rpc_argp = &arg, 3450 .rpc_argp = &arg,
3421 .rpc_resp = &res, 3451 .rpc_resp = &res,
3422 }; 3452 };
3423 int ret; 3453 int ret, i;
3424 3454
3425 if (!nfs4_server_supports_acls(server)) 3455 if (!nfs4_server_supports_acls(server))
3426 return -EOPNOTSUPP; 3456 return -EOPNOTSUPP;
3457 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3458 if (i < 0)
3459 return i;
3427 nfs_inode_return_delegation(inode); 3460 nfs_inode_return_delegation(inode);
3428 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3429 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3461 ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
3462
3463 /*
3464 * Free each page after tx, so the only ref left is
3465 * held by the network stack
3466 */
3467 for (; i > 0; i--)
3468 put_page(pages[i-1]);
3469
3430 /* 3470 /*
3431 * Acl update can result in inode attribute update. 3471 * Acl update can result in inode attribute update.
3432 * so mark the attribute cache invalid. 3472 * so mark the attribute cache invalid.
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3be975e18919..cde36cb0f348 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -484,7 +484,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
484out: 484out:
485 return status; 485 return status;
486out_default: 486out_default:
487 return nfs_cb_stat_to_errno(status); 487 return nfs_cb_stat_to_errno(nfserr);
488} 488}
489 489
490/* 490/*
@@ -564,11 +564,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
564 if (unlikely(status)) 564 if (unlikely(status))
565 goto out; 565 goto out;
566 if (unlikely(nfserr != NFS4_OK)) 566 if (unlikely(nfserr != NFS4_OK))
567 goto out_default; 567 status = nfs_cb_stat_to_errno(nfserr);
568out: 568out:
569 return status; 569 return status;
570out_default:
571 return nfs_cb_stat_to_errno(status);
572} 570}
573 571
574/* 572/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d98d0213285d..54b60bfceb8d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -230,9 +230,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
230 dp->dl_client = clp; 230 dp->dl_client = clp;
231 get_nfs4_file(fp); 231 get_nfs4_file(fp);
232 dp->dl_file = fp; 232 dp->dl_file = fp;
233 dp->dl_vfs_file = find_readable_file(fp);
234 get_file(dp->dl_vfs_file);
235 dp->dl_flock = NULL;
236 dp->dl_type = type; 233 dp->dl_type = type;
237 dp->dl_stateid.si_boot = boot_time; 234 dp->dl_stateid.si_boot = boot_time;
238 dp->dl_stateid.si_stateownerid = current_delegid++; 235 dp->dl_stateid.si_stateownerid = current_delegid++;
@@ -241,8 +238,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
241 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle); 238 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
242 dp->dl_time = 0; 239 dp->dl_time = 0;
243 atomic_set(&dp->dl_count, 1); 240 atomic_set(&dp->dl_count, 1);
244 list_add(&dp->dl_perfile, &fp->fi_delegations);
245 list_add(&dp->dl_perclnt, &clp->cl_delegations);
246 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); 241 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
247 return dp; 242 return dp;
248} 243}
@@ -253,36 +248,30 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
253 if (atomic_dec_and_test(&dp->dl_count)) { 248 if (atomic_dec_and_test(&dp->dl_count)) {
254 dprintk("NFSD: freeing dp %p\n",dp); 249 dprintk("NFSD: freeing dp %p\n",dp);
255 put_nfs4_file(dp->dl_file); 250 put_nfs4_file(dp->dl_file);
256 fput(dp->dl_vfs_file);
257 kmem_cache_free(deleg_slab, dp); 251 kmem_cache_free(deleg_slab, dp);
258 num_delegations--; 252 num_delegations--;
259 } 253 }
260} 254}
261 255
262/* Remove the associated file_lock first, then remove the delegation. 256static void nfs4_put_deleg_lease(struct nfs4_file *fp)
263 * lease_modify() is called to remove the FS_LEASE file_lock from
264 * the i_flock list, eventually calling nfsd's lock_manager
265 * fl_release_callback.
266 */
267static void
268nfs4_close_delegation(struct nfs4_delegation *dp)
269{ 257{
270 dprintk("NFSD: close_delegation dp %p\n",dp); 258 if (atomic_dec_and_test(&fp->fi_delegees)) {
271 /* XXX: do we even need this check?: */ 259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
272 if (dp->dl_flock) 260 fp->fi_lease = NULL;
273 vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock); 261 fp->fi_deleg_file = NULL;
262 }
274} 263}
275 264
276/* Called under the state lock. */ 265/* Called under the state lock. */
277static void 266static void
278unhash_delegation(struct nfs4_delegation *dp) 267unhash_delegation(struct nfs4_delegation *dp)
279{ 268{
280 list_del_init(&dp->dl_perfile);
281 list_del_init(&dp->dl_perclnt); 269 list_del_init(&dp->dl_perclnt);
282 spin_lock(&recall_lock); 270 spin_lock(&recall_lock);
271 list_del_init(&dp->dl_perfile);
283 list_del_init(&dp->dl_recall_lru); 272 list_del_init(&dp->dl_recall_lru);
284 spin_unlock(&recall_lock); 273 spin_unlock(&recall_lock);
285 nfs4_close_delegation(dp); 274 nfs4_put_deleg_lease(dp->dl_file);
286 nfs4_put_delegation(dp); 275 nfs4_put_delegation(dp);
287} 276}
288 277
@@ -958,8 +947,6 @@ expire_client(struct nfs4_client *clp)
958 spin_lock(&recall_lock); 947 spin_lock(&recall_lock);
959 while (!list_empty(&clp->cl_delegations)) { 948 while (!list_empty(&clp->cl_delegations)) {
960 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); 949 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
961 dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
962 dp->dl_flock);
963 list_del_init(&dp->dl_perclnt); 950 list_del_init(&dp->dl_perclnt);
964 list_move(&dp->dl_recall_lru, &reaplist); 951 list_move(&dp->dl_recall_lru, &reaplist);
965 } 952 }
@@ -2078,6 +2065,7 @@ alloc_init_file(struct inode *ino)
2078 fp->fi_inode = igrab(ino); 2065 fp->fi_inode = igrab(ino);
2079 fp->fi_id = current_fileid++; 2066 fp->fi_id = current_fileid++;
2080 fp->fi_had_conflict = false; 2067 fp->fi_had_conflict = false;
2068 fp->fi_lease = NULL;
2081 memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); 2069 memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
2082 memset(fp->fi_access, 0, sizeof(fp->fi_access)); 2070 memset(fp->fi_access, 0, sizeof(fp->fi_access));
2083 spin_lock(&recall_lock); 2071 spin_lock(&recall_lock);
@@ -2329,23 +2317,8 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
2329 nfs4_file_put_access(fp, O_RDONLY); 2317 nfs4_file_put_access(fp, O_RDONLY);
2330} 2318}
2331 2319
2332/* 2320static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
2333 * Spawn a thread to perform a recall on the delegation represented
2334 * by the lease (file_lock)
2335 *
2336 * Called from break_lease() with lock_flocks() held.
2337 * Note: we assume break_lease will only call this *once* for any given
2338 * lease.
2339 */
2340static
2341void nfsd_break_deleg_cb(struct file_lock *fl)
2342{ 2321{
2343 struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
2344
2345 dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
2346 if (!dp)
2347 return;
2348
2349 /* We're assuming the state code never drops its reference 2322 /* We're assuming the state code never drops its reference
2350 * without first removing the lease. Since we're in this lease 2323 * without first removing the lease. Since we're in this lease
2351 * callback (and since the lease code is serialized by the kernel 2324 * callback (and since the lease code is serialized by the kernel
@@ -2353,22 +2326,35 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
2353 * it's safe to take a reference: */ 2326 * it's safe to take a reference: */
2354 atomic_inc(&dp->dl_count); 2327 atomic_inc(&dp->dl_count);
2355 2328
2356 spin_lock(&recall_lock);
2357 list_add_tail(&dp->dl_recall_lru, &del_recall_lru); 2329 list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
2358 spin_unlock(&recall_lock);
2359 2330
2360 /* only place dl_time is set. protected by lock_flocks*/ 2331 /* only place dl_time is set. protected by lock_flocks*/
2361 dp->dl_time = get_seconds(); 2332 dp->dl_time = get_seconds();
2362 2333
2334 nfsd4_cb_recall(dp);
2335}
2336
2337/* Called from break_lease() with lock_flocks() held. */
2338static void nfsd_break_deleg_cb(struct file_lock *fl)
2339{
2340 struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
2341 struct nfs4_delegation *dp;
2342
2343 BUG_ON(!fp);
2344 /* We assume break_lease is only called once per lease: */
2345 BUG_ON(fp->fi_had_conflict);
2363 /* 2346 /*
2364 * We don't want the locks code to timeout the lease for us; 2347 * We don't want the locks code to timeout the lease for us;
2365 * we'll remove it ourself if the delegation isn't returned 2348 * we'll remove it ourself if a delegation isn't returned
2366 * in time. 2349 * in time:
2367 */ 2350 */
2368 fl->fl_break_time = 0; 2351 fl->fl_break_time = 0;
2369 2352
2370 dp->dl_file->fi_had_conflict = true; 2353 spin_lock(&recall_lock);
2371 nfsd4_cb_recall(dp); 2354 fp->fi_had_conflict = true;
2355 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
2356 nfsd_break_one_deleg(dp);
2357 spin_unlock(&recall_lock);
2372} 2358}
2373 2359
2374static 2360static
@@ -2459,13 +2445,15 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
2459static struct nfs4_delegation * 2445static struct nfs4_delegation *
2460find_delegation_file(struct nfs4_file *fp, stateid_t *stid) 2446find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2461{ 2447{
2462 struct nfs4_delegation *dp; 2448 struct nfs4_delegation *dp = NULL;
2463 2449
2450 spin_lock(&recall_lock);
2464 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { 2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
2465 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) 2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
2466 return dp; 2453 break;
2467 } 2454 }
2468 return NULL; 2455 spin_unlock(&recall_lock);
2456 return dp;
2469} 2457}
2470 2458
2471int share_access_to_flags(u32 share_access) 2459int share_access_to_flags(u32 share_access)
@@ -2641,6 +2629,66 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
2641 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; 2629 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
2642} 2630}
2643 2631
2632static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag)
2633{
2634 struct file_lock *fl;
2635
2636 fl = locks_alloc_lock();
2637 if (!fl)
2638 return NULL;
2639 locks_init_lock(fl);
2640 fl->fl_lmops = &nfsd_lease_mng_ops;
2641 fl->fl_flags = FL_LEASE;
2642 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2643 fl->fl_end = OFFSET_MAX;
2644 fl->fl_owner = (fl_owner_t)(dp->dl_file);
2645 fl->fl_pid = current->tgid;
2646 return fl;
2647}
2648
2649static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
2650{
2651 struct nfs4_file *fp = dp->dl_file;
2652 struct file_lock *fl;
2653 int status;
2654
2655 fl = nfs4_alloc_init_lease(dp, flag);
2656 if (!fl)
2657 return -ENOMEM;
2658 fl->fl_file = find_readable_file(fp);
2659 list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
2660 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
2661 if (status) {
2662 list_del_init(&dp->dl_perclnt);
2663 locks_free_lock(fl);
2664 return -ENOMEM;
2665 }
2666 fp->fi_lease = fl;
2667 fp->fi_deleg_file = fl->fl_file;
2668 get_file(fp->fi_deleg_file);
2669 atomic_set(&fp->fi_delegees, 1);
2670 list_add(&dp->dl_perfile, &fp->fi_delegations);
2671 return 0;
2672}
2673
2674static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
2675{
2676 struct nfs4_file *fp = dp->dl_file;
2677
2678 if (!fp->fi_lease)
2679 return nfs4_setlease(dp, flag);
2680 spin_lock(&recall_lock);
2681 if (fp->fi_had_conflict) {
2682 spin_unlock(&recall_lock);
2683 return -EAGAIN;
2684 }
2685 atomic_inc(&fp->fi_delegees);
2686 list_add(&dp->dl_perfile, &fp->fi_delegations);
2687 spin_unlock(&recall_lock);
2688 list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
2689 return 0;
2690}
2691
2644/* 2692/*
2645 * Attempt to hand out a delegation. 2693 * Attempt to hand out a delegation.
2646 */ 2694 */
@@ -2650,7 +2698,6 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2650 struct nfs4_delegation *dp; 2698 struct nfs4_delegation *dp;
2651 struct nfs4_stateowner *sop = stp->st_stateowner; 2699 struct nfs4_stateowner *sop = stp->st_stateowner;
2652 int cb_up; 2700 int cb_up;
2653 struct file_lock *fl;
2654 int status, flag = 0; 2701 int status, flag = 0;
2655 2702
2656 cb_up = nfsd4_cb_channel_good(sop->so_client); 2703 cb_up = nfsd4_cb_channel_good(sop->so_client);
@@ -2681,36 +2728,11 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2681 } 2728 }
2682 2729
2683 dp = alloc_init_deleg(sop->so_client, stp, fh, flag); 2730 dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
2684 if (dp == NULL) { 2731 if (dp == NULL)
2685 flag = NFS4_OPEN_DELEGATE_NONE; 2732 goto out_no_deleg;
2686 goto out; 2733 status = nfs4_set_delegation(dp, flag);
2687 } 2734 if (status)
2688 status = -ENOMEM; 2735 goto out_free;
2689 fl = locks_alloc_lock();
2690 if (!fl)
2691 goto out;
2692 locks_init_lock(fl);
2693 fl->fl_lmops = &nfsd_lease_mng_ops;
2694 fl->fl_flags = FL_LEASE;
2695 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2696 fl->fl_end = OFFSET_MAX;
2697 fl->fl_owner = (fl_owner_t)dp;
2698 fl->fl_file = find_readable_file(stp->st_file);
2699 BUG_ON(!fl->fl_file);
2700 fl->fl_pid = current->tgid;
2701 dp->dl_flock = fl;
2702
2703 /* vfs_setlease checks to see if delegation should be handed out.
2704 * the lock_manager callback fl_change is used
2705 */
2706 if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
2707 dprintk("NFSD: setlease failed [%d], no delegation\n", status);
2708 dp->dl_flock = NULL;
2709 locks_free_lock(fl);
2710 unhash_delegation(dp);
2711 flag = NFS4_OPEN_DELEGATE_NONE;
2712 goto out;
2713 }
2714 2736
2715 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); 2737 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
2716 2738
@@ -2722,6 +2744,12 @@ out:
2722 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) 2744 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
2723 dprintk("NFSD: WARNING: refusing delegation reclaim\n"); 2745 dprintk("NFSD: WARNING: refusing delegation reclaim\n");
2724 open->op_delegate_type = flag; 2746 open->op_delegate_type = flag;
2747 return;
2748out_free:
2749 nfs4_put_delegation(dp);
2750out_no_deleg:
2751 flag = NFS4_OPEN_DELEGATE_NONE;
2752 goto out;
2725} 2753}
2726 2754
2727/* 2755/*
@@ -2916,8 +2944,6 @@ nfs4_laundromat(void)
2916 test_val = u; 2944 test_val = u;
2917 break; 2945 break;
2918 } 2946 }
2919 dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
2920 dp, dp->dl_flock);
2921 list_move(&dp->dl_recall_lru, &reaplist); 2947 list_move(&dp->dl_recall_lru, &reaplist);
2922 } 2948 }
2923 spin_unlock(&recall_lock); 2949 spin_unlock(&recall_lock);
@@ -3128,7 +3154,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
3128 goto out; 3154 goto out;
3129 renew_client(dp->dl_client); 3155 renew_client(dp->dl_client);
3130 if (filpp) { 3156 if (filpp) {
3131 *filpp = find_readable_file(dp->dl_file); 3157 *filpp = dp->dl_file->fi_deleg_file;
3132 BUG_ON(!*filpp); 3158 BUG_ON(!*filpp);
3133 } 3159 }
3134 } else { /* open or lock stateid */ 3160 } else { /* open or lock stateid */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 956629b9cdc9..1275b8655070 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -317,8 +317,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
317 READ_BUF(dummy32); 317 READ_BUF(dummy32);
318 len += (XDR_QUADLEN(dummy32) << 2); 318 len += (XDR_QUADLEN(dummy32) << 2);
319 READMEM(buf, dummy32); 319 READMEM(buf, dummy32);
320 if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) 320 if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
321 goto out_nfserr; 321 return status;
322 iattr->ia_valid |= ATTR_UID; 322 iattr->ia_valid |= ATTR_UID;
323 } 323 }
324 if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { 324 if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
@@ -328,8 +328,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
328 READ_BUF(dummy32); 328 READ_BUF(dummy32);
329 len += (XDR_QUADLEN(dummy32) << 2); 329 len += (XDR_QUADLEN(dummy32) << 2);
330 READMEM(buf, dummy32); 330 READMEM(buf, dummy32);
331 if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) 331 if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
332 goto out_nfserr; 332 return status;
333 iattr->ia_valid |= ATTR_GID; 333 iattr->ia_valid |= ATTR_GID;
334 } 334 }
335 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { 335 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 3074656ba7bf..2d31224b07bf 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -83,8 +83,6 @@ struct nfs4_delegation {
83 atomic_t dl_count; /* ref count */ 83 atomic_t dl_count; /* ref count */
84 struct nfs4_client *dl_client; 84 struct nfs4_client *dl_client;
85 struct nfs4_file *dl_file; 85 struct nfs4_file *dl_file;
86 struct file *dl_vfs_file;
87 struct file_lock *dl_flock;
88 u32 dl_type; 86 u32 dl_type;
89 time_t dl_time; 87 time_t dl_time;
90/* For recall: */ 88/* For recall: */
@@ -379,6 +377,9 @@ struct nfs4_file {
379 */ 377 */
380 atomic_t fi_readers; 378 atomic_t fi_readers;
381 atomic_t fi_writers; 379 atomic_t fi_writers;
380 struct file *fi_deleg_file;
381 struct file_lock *fi_lease;
382 atomic_t fi_delegees;
382 struct inode *fi_inode; 383 struct inode *fi_inode;
383 u32 fi_id; /* used with stateowner->so_id 384 u32 fi_id; /* used with stateowner->so_id
384 * for stateid_hashtbl hash */ 385 * for stateid_hashtbl hash */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 641117f2188d..da1d9701f8e4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -808,7 +808,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
808 if (ra->p_count == 0) 808 if (ra->p_count == 0)
809 frap = rap; 809 frap = rap;
810 } 810 }
811 depth = nfsdstats.ra_size*11/10; 811 depth = nfsdstats.ra_size;
812 if (!frap) { 812 if (!frap) {
813 spin_unlock(&rab->pb_lock); 813 spin_unlock(&rab->pb_lock);
814 return NULL; 814 return NULL;
@@ -1744,6 +1744,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1744 host_err = nfsd_break_lease(odentry->d_inode); 1744 host_err = nfsd_break_lease(odentry->d_inode);
1745 if (host_err) 1745 if (host_err)
1746 goto out_drop_write; 1746 goto out_drop_write;
1747 if (ndentry->d_inode) {
1748 host_err = nfsd_break_lease(ndentry->d_inode);
1749 if (host_err)
1750 goto out_drop_write;
1751 }
1752 if (host_err)
1753 goto out_drop_write;
1747 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1754 host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1748 if (!host_err) { 1755 if (!host_err) {
1749 host_err = commit_metadata(tfhp); 1756 host_err = commit_metadata(tfhp);
@@ -1812,22 +1819,22 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1812 1819
1813 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1820 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1814 if (host_err) 1821 if (host_err)
1815 goto out_nfserr; 1822 goto out_put;
1816 1823
1817 host_err = nfsd_break_lease(rdentry->d_inode); 1824 host_err = nfsd_break_lease(rdentry->d_inode);
1818 if (host_err) 1825 if (host_err)
1819 goto out_put; 1826 goto out_drop_write;
1820 if (type != S_IFDIR) 1827 if (type != S_IFDIR)
1821 host_err = vfs_unlink(dirp, rdentry); 1828 host_err = vfs_unlink(dirp, rdentry);
1822 else 1829 else
1823 host_err = vfs_rmdir(dirp, rdentry); 1830 host_err = vfs_rmdir(dirp, rdentry);
1824out_put:
1825 dput(rdentry);
1826
1827 if (!host_err) 1831 if (!host_err)
1828 host_err = commit_metadata(fhp); 1832 host_err = commit_metadata(fhp);
1829 1833out_drop_write:
1830 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1834 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1835out_put:
1836 dput(rdentry);
1837
1831out_nfserr: 1838out_nfserr:
1832 err = nfserrno(host_err); 1839 err = nfserrno(host_err);
1833out: 1840out:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 388e9e8f5286..85f7baa15f5d 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,11 +35,6 @@
35#include "btnode.h" 35#include "btnode.h"
36 36
37 37
38void nilfs_btnode_cache_init_once(struct address_space *btnc)
39{
40 nilfs_mapping_init_once(btnc);
41}
42
43static const struct address_space_operations def_btnode_aops = { 38static const struct address_space_operations def_btnode_aops = {
44 .sync_page = block_sync_page, 39 .sync_page = block_sync_page,
45}; 40};
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e0..1b8ebd888c28 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init_once(struct address_space *);
41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); 40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
42void nilfs_btnode_cache_clear(struct address_space *); 41void nilfs_btnode_cache_clear(struct address_space *);
43struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 6a0e2a189f60..a0babd2bff6a 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
454 struct backing_dev_info *bdi = inode->i_sb->s_bdi; 454 struct backing_dev_info *bdi = inode->i_sb->s_bdi;
455 455
456 INIT_LIST_HEAD(&shadow->frozen_buffers); 456 INIT_LIST_HEAD(&shadow->frozen_buffers);
457 nilfs_mapping_init_once(&shadow->frozen_data); 457 address_space_init_once(&shadow->frozen_data);
458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); 458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
459 nilfs_mapping_init_once(&shadow->frozen_btnodes); 459 address_space_init_once(&shadow->frozen_btnodes);
460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); 460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
461 mi->mi_shadow = shadow; 461 mi->mi_shadow = shadow;
462 return 0; 462 return 0;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 98034271cd02..161791d26458 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); 397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
398 if (!new_de) 398 if (!new_de)
399 goto out_dir; 399 goto out_dir;
400 inc_nlink(old_inode);
401 nilfs_set_link(new_dir, new_de, new_page, old_inode); 400 nilfs_set_link(new_dir, new_de, new_page, old_inode);
402 nilfs_mark_inode_dirty(new_dir); 401 nilfs_mark_inode_dirty(new_dir);
403 new_inode->i_ctime = CURRENT_TIME; 402 new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
411 if (new_dir->i_nlink >= NILFS_LINK_MAX) 410 if (new_dir->i_nlink >= NILFS_LINK_MAX)
412 goto out_dir; 411 goto out_dir;
413 } 412 }
414 inc_nlink(old_inode);
415 err = nilfs_add_link(new_dentry, old_inode); 413 err = nilfs_add_link(new_dentry, old_inode);
416 if (err) { 414 if (err)
417 drop_nlink(old_inode);
418 nilfs_mark_inode_dirty(old_inode);
419 goto out_dir; 415 goto out_dir;
420 }
421 if (dir_de) { 416 if (dir_de) {
422 inc_nlink(new_dir); 417 inc_nlink(new_dir);
423 nilfs_mark_inode_dirty(new_dir); 418 nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
431 old_inode->i_ctime = CURRENT_TIME; 426 old_inode->i_ctime = CURRENT_TIME;
432 427
433 nilfs_delete_entry(old_de, old_page); 428 nilfs_delete_entry(old_de, old_page);
434 drop_nlink(old_inode);
435 429
436 if (dir_de) { 430 if (dir_de) {
437 nilfs_set_link(old_inode, dir_de, dir_page, new_dir); 431 nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0c432416cfef..a585b35fd6bc 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 492 return nc;
493} 493}
494 494
495void nilfs_mapping_init_once(struct address_space *mapping)
496{
497 memset(mapping, 0, sizeof(*mapping));
498 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
499 spin_lock_init(&mapping->tree_lock);
500 INIT_LIST_HEAD(&mapping->private_list);
501 spin_lock_init(&mapping->private_lock);
502
503 spin_lock_init(&mapping->i_mmap_lock);
504 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
505 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
506}
507
508void nilfs_mapping_init(struct address_space *mapping, 495void nilfs_mapping_init(struct address_space *mapping,
509 struct backing_dev_info *bdi, 496 struct backing_dev_info *bdi,
510 const struct address_space_operations *aops) 497 const struct address_space_operations *aops)
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 622df27cd891..2a00953ebd5f 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 62void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 63void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init_once(struct address_space *mapping);
65void nilfs_mapping_init(struct address_space *mapping, 64void nilfs_mapping_init(struct address_space *mapping,
66 struct backing_dev_info *bdi, 65 struct backing_dev_info *bdi,
67 const struct address_space_operations *aops); 66 const struct address_space_operations *aops);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5c7f39..2de9f636792a 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,7 +430,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
430 nilfs_segctor_map_segsum_entry( 430 nilfs_segctor_map_segsum_entry(
431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); 431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
432 432
433 if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 433 if (NILFS_I(inode)->i_root &&
434 !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
434 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 435 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
435 /* skip finfo */ 436 /* skip finfo */
436} 437}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 58fd707174e1..1673b3d99842 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1279,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
1279#ifdef CONFIG_NILFS_XATTR 1279#ifdef CONFIG_NILFS_XATTR
1280 init_rwsem(&ii->xattr_sem); 1280 init_rwsem(&ii->xattr_sem);
1281#endif 1281#endif
1282 nilfs_btnode_cache_init_once(&ii->i_btnode_cache); 1282 address_space_init_once(&ii->i_btnode_cache);
1283 ii->i_bmap = &ii->i_bmap_data; 1283 ii->i_bmap = &ii->i_bmap_data;
1284 inode_init_once(&ii->vfs_inode); 1284 inode_init_once(&ii->vfs_inode);
1285} 1285}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c0..6180da1e37e6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
405 ocfs2_quota_trans_credits(sb); 405 ocfs2_quota_trans_credits(sb);
406} 406}
407 407
408/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + 408/* data block for new dir/symlink, allocation of directory block, dx_root
409 * bitmap block for the new bit) dx_root update for free list */ 409 * update for free list */
410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1) 410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
411 411
412static inline int ocfs2_add_dir_index_credits(struct super_block *sb) 412static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
413{ 413{
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e9..19ebc5aad391 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3228 u32 num_clusters, unsigned int e_flags) 3228 u32 num_clusters, unsigned int e_flags)
3229{ 3229{
3230 int ret, delete, index, credits = 0; 3230 int ret, delete, index, credits = 0;
3231 u32 new_bit, new_len; 3231 u32 new_bit, new_len, orig_num_clusters;
3232 unsigned int set_len; 3232 unsigned int set_len;
3233 struct ocfs2_super *osb = OCFS2_SB(sb); 3233 struct ocfs2_super *osb = OCFS2_SB(sb);
3234 handle_t *handle; 3234 handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3261 goto out; 3261 goto out;
3262 } 3262 }
3263 3263
3264 orig_num_clusters = num_clusters;
3265
3264 while (num_clusters) { 3266 while (num_clusters) {
3265 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh, 3267 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
3266 p_cluster, num_clusters, 3268 p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3348 * in write-back mode. 3350 * in write-back mode.
3349 */ 3351 */
3350 if (context->get_clusters == ocfs2_di_get_clusters) { 3352 if (context->get_clusters == ocfs2_di_get_clusters) {
3351 ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); 3353 ret = ocfs2_cow_sync_writeback(sb, context, cpos,
3354 orig_num_clusters);
3352 if (ret) 3355 if (ret)
3353 mlog_errno(ret); 3356 mlog_errno(ret);
3354 } 3357 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447e..36c423fb0635 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1316 struct mount_options *mopt, 1316 struct mount_options *mopt,
1317 int is_remount) 1317 int is_remount)
1318{ 1318{
1319 int status; 1319 int status, user_stack = 0;
1320 char *p; 1320 char *p;
1321 u32 tmp; 1321 u32 tmp;
1322 1322
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
1459 memcpy(mopt->cluster_stack, args[0].from, 1459 memcpy(mopt->cluster_stack, args[0].from,
1460 OCFS2_STACK_LABEL_LEN); 1460 OCFS2_STACK_LABEL_LEN);
1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; 1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
1462 /*
1463 * Open code the memcmp here as we don't have
1464 * an osb to pass to
1465 * ocfs2_userspace_stack().
1466 */
1467 if (memcmp(mopt->cluster_stack,
1468 OCFS2_CLASSIC_CLUSTER_STACK,
1469 OCFS2_STACK_LABEL_LEN))
1470 user_stack = 1;
1462 break; 1471 break;
1463 case Opt_inode64: 1472 case Opt_inode64:
1464 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1473 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
1514 } 1523 }
1515 } 1524 }
1516 1525
1517 /* Ensure only one heartbeat mode */ 1526 if (user_stack == 0) {
1518 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | 1527 /* Ensure only one heartbeat mode */
1519 OCFS2_MOUNT_HB_NONE); 1528 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
1520 if (hweight32(tmp) != 1) { 1529 OCFS2_MOUNT_HB_GLOBAL |
1521 mlog(ML_ERROR, "Invalid heartbeat mount options\n"); 1530 OCFS2_MOUNT_HB_NONE);
1522 status = 0; 1531 if (hweight32(tmp) != 1) {
1523 goto bail; 1532 mlog(ML_ERROR, "Invalid heartbeat mount options\n");
1533 status = 0;
1534 goto bail;
1535 }
1524 } 1536 }
1525 1537
1526 status = 1; 1538 status = 1;
diff --git a/fs/open.c b/fs/open.c
index e52389e1f05b..5a2c6ebc22b5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -790,6 +790,8 @@ struct file *nameidata_to_filp(struct nameidata *nd)
790 790
791 /* Pick up the filp from the open intent */ 791 /* Pick up the filp from the open intent */
792 filp = nd->intent.open.file; 792 filp = nd->intent.open.file;
793 nd->intent.open.file = NULL;
794
793 /* Has the filesystem initialised the file for us? */ 795 /* Has the filesystem initialised the file for us? */
794 if (filp->f_path.dentry == NULL) { 796 if (filp->f_path.dentry == NULL) {
795 path_get(&nd->path); 797 path_get(&nd->path);
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa5..b10e3540d5b7 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
251 } 251 }
252 252
253 vm->vblk_size = get_unaligned_be32(data + 0x08); 253 vm->vblk_size = get_unaligned_be32(data + 0x08);
254 if (vm->vblk_size == 0) {
255 ldm_error ("Illegal VBLK size");
256 return false;
257 }
258
254 vm->vblk_offset = get_unaligned_be32(data + 0x0C); 259 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
255 vm->last_vblk_seq = get_unaligned_be32(data + 0x04); 260 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
256 261
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index 68d6a216ee79..11f688bd76c5 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -29,10 +29,9 @@ static inline void mac_fix_string(char *stg, int len)
29 29
30int mac_partition(struct parsed_partitions *state) 30int mac_partition(struct parsed_partitions *state)
31{ 31{
32 int slot = 1;
33 Sector sect; 32 Sector sect;
34 unsigned char *data; 33 unsigned char *data;
35 int blk, blocks_in_map; 34 int slot, blocks_in_map;
36 unsigned secsize; 35 unsigned secsize;
37#ifdef CONFIG_PPC_PMAC 36#ifdef CONFIG_PPC_PMAC
38 int found_root = 0; 37 int found_root = 0;
@@ -59,10 +58,14 @@ int mac_partition(struct parsed_partitions *state)
59 put_dev_sector(sect); 58 put_dev_sector(sect);
60 return 0; /* not a MacOS disk */ 59 return 0; /* not a MacOS disk */
61 } 60 }
62 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
63 blocks_in_map = be32_to_cpu(part->map_count); 61 blocks_in_map = be32_to_cpu(part->map_count);
64 for (blk = 1; blk <= blocks_in_map; ++blk) { 62 if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) {
65 int pos = blk * secsize; 63 put_dev_sector(sect);
64 return 0;
65 }
66 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
67 for (slot = 1; slot <= blocks_in_map; ++slot) {
68 int pos = slot * secsize;
66 put_dev_sector(sect); 69 put_dev_sector(sect);
67 data = read_part_sector(state, pos/512, &sect); 70 data = read_part_sector(state, pos/512, &sect);
68 if (!data) 71 if (!data)
@@ -113,13 +116,11 @@ int mac_partition(struct parsed_partitions *state)
113 } 116 }
114 117
115 if (goodness > found_root_goodness) { 118 if (goodness > found_root_goodness) {
116 found_root = blk; 119 found_root = slot;
117 found_root_goodness = goodness; 120 found_root_goodness = goodness;
118 } 121 }
119 } 122 }
120#endif /* CONFIG_PPC_PMAC */ 123#endif /* CONFIG_PPC_PMAC */
121
122 ++slot;
123 } 124 }
124#ifdef CONFIG_PPC_PMAC 125#ifdef CONFIG_PPC_PMAC
125 if (found_root_goodness) 126 if (found_root_goodness)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index df2b703b9d0f..7c99c1cf7e5c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -353,9 +353,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
353 task_cap(m, task); 353 task_cap(m, task);
354 task_cpus_allowed(m, task); 354 task_cpus_allowed(m, task);
355 cpuset_task_status_allowed(m, task); 355 cpuset_task_status_allowed(m, task);
356#if defined(CONFIG_S390)
357 task_show_regs(m, task);
358#endif
359 task_context_switch_counts(m, task); 356 task_context_switch_counts(m, task);
360 return 0; 357 return 0;
361} 358}
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7ff..927cbd115e53 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
233 return; 233 return;
234 root = of_find_node_by_path("/"); 234 root = of_find_node_by_path("/");
235 if (root == NULL) { 235 if (root == NULL) {
236 printk(KERN_ERR "/proc/device-tree: can't find root\n"); 236 pr_debug("/proc/device-tree: can't find root\n");
237 return; 237 return;
238 } 238 }
239 proc_device_tree_add_node(root, proc_device_tree); 239 proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51ec3458..68fdf45cc6c9 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, 771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
772 dentry, inode, &security); 772 dentry, inode, &security);
773 if (retval) { 773 if (retval) {
774 dir->i_nlink--; 774 DEC_DIR_INODE_NLINK(dir)
775 goto out_failed; 775 goto out_failed;
776 } 776 }
777 777
diff --git a/fs/super.c b/fs/super.c
index 74e149efed81..7e9dd4cc2c01 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -177,6 +177,11 @@ void deactivate_locked_super(struct super_block *s)
177 struct file_system_type *fs = s->s_type; 177 struct file_system_type *fs = s->s_type;
178 if (atomic_dec_and_test(&s->s_active)) { 178 if (atomic_dec_and_test(&s->s_active)) {
179 fs->kill_sb(s); 179 fs->kill_sb(s);
180 /*
181 * We need to call rcu_barrier so all the delayed rcu free
182 * inodes are flushed before we release the fs module.
183 */
184 rcu_barrier();
180 put_filesystem(fs); 185 put_filesystem(fs);
181 put_super(s); 186 put_super(s);
182 } else { 187 } else {
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b1208c26..e474fbcf8bde 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
245 new_de = sysv_find_entry(new_dentry, &new_page); 245 new_de = sysv_find_entry(new_dentry, &new_page);
246 if (!new_de) 246 if (!new_de)
247 goto out_dir; 247 goto out_dir;
248 inode_inc_link_count(old_inode);
249 sysv_set_link(new_de, new_page, old_inode); 248 sysv_set_link(new_de, new_page, old_inode);
250 new_inode->i_ctime = CURRENT_TIME_SEC; 249 new_inode->i_ctime = CURRENT_TIME_SEC;
251 if (dir_de) 250 if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
257 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max) 256 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
258 goto out_dir; 257 goto out_dir;
259 } 258 }
260 inode_inc_link_count(old_inode);
261 err = sysv_add_link(new_dentry, old_inode); 259 err = sysv_add_link(new_dentry, old_inode);
262 if (err) { 260 if (err)
263 inode_dec_link_count(old_inode);
264 goto out_dir; 261 goto out_dir;
265 }
266 if (dir_de) 262 if (dir_de)
267 inode_inc_link_count(new_dir); 263 inode_inc_link_count(new_dir);
268 } 264 }
269 265
270 sysv_delete_entry(old_de, old_page); 266 sysv_delete_entry(old_de, old_page);
271 inode_dec_link_count(old_inode); 267 mark_inode_dirty(old_inode);
272 268
273 if (dir_de) { 269 if (dir_de) {
274 sysv_set_link(dir_de, dir_page, new_dir); 270 sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9eb86d2..b7c338d5e9df 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
32#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 33#include <linux/exportfs.h>
34 34
35enum { UDF_MAX_LINKS = 0xffff };
36
35static inline int udf_match(int len1, const unsigned char *name1, int len2, 37static inline int udf_match(int len1, const unsigned char *name1, int len2,
36 const unsigned char *name2) 38 const unsigned char *name2)
37{ 39{
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
650 struct udf_inode_info *iinfo; 652 struct udf_inode_info *iinfo;
651 653
652 err = -EMLINK; 654 err = -EMLINK;
653 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) 655 if (dir->i_nlink >= UDF_MAX_LINKS)
654 goto out; 656 goto out;
655 657
656 err = -EIO; 658 err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1034 struct fileIdentDesc cfi, *fi; 1036 struct fileIdentDesc cfi, *fi;
1035 int err; 1037 int err;
1036 1038
1037 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { 1039 if (inode->i_nlink >= UDF_MAX_LINKS)
1038 return -EMLINK; 1040 return -EMLINK;
1039 }
1040 1041
1041 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1042 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1042 if (!fi) { 1043 if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1131 goto end_rename; 1132 goto end_rename;
1132 1133
1133 retval = -EMLINK; 1134 retval = -EMLINK;
1134 if (!new_inode && 1135 if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
1135 new_dir->i_nlink >=
1136 (256 << sizeof(new_dir->i_nlink)) - 1)
1137 goto end_rename; 1136 goto end_rename;
1138 } 1137 }
1139 if (!nfi) { 1138 if (!nfi) {
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 12f39b9e4437..d6f681535eb8 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -306,7 +306,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page); 306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
307 if (!new_de) 307 if (!new_de)
308 goto out_dir; 308 goto out_dir;
309 inode_inc_link_count(old_inode);
310 ufs_set_link(new_dir, new_de, new_page, old_inode); 309 ufs_set_link(new_dir, new_de, new_page, old_inode);
311 new_inode->i_ctime = CURRENT_TIME_SEC; 310 new_inode->i_ctime = CURRENT_TIME_SEC;
312 if (dir_de) 311 if (dir_de)
@@ -318,12 +317,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
318 if (new_dir->i_nlink >= UFS_LINK_MAX) 317 if (new_dir->i_nlink >= UFS_LINK_MAX)
319 goto out_dir; 318 goto out_dir;
320 } 319 }
321 inode_inc_link_count(old_inode);
322 err = ufs_add_link(new_dentry, old_inode); 320 err = ufs_add_link(new_dentry, old_inode);
323 if (err) { 321 if (err)
324 inode_dec_link_count(old_inode);
325 goto out_dir; 322 goto out_dir;
326 }
327 if (dir_de) 323 if (dir_de)
328 inode_inc_link_count(new_dir); 324 inode_inc_link_count(new_dir);
329 } 325 }
@@ -331,12 +327,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
331 /* 327 /*
332 * Like most other Unix systems, set the ctime for inodes on a 328 * Like most other Unix systems, set the ctime for inodes on a
333 * rename. 329 * rename.
334 * inode_dec_link_count() will mark the inode dirty.
335 */ 330 */
336 old_inode->i_ctime = CURRENT_TIME_SEC; 331 old_inode->i_ctime = CURRENT_TIME_SEC;
337 332
338 ufs_delete_entry(old_dir, old_de, old_page); 333 ufs_delete_entry(old_dir, old_de, old_page);
339 inode_dec_link_count(old_inode); 334 mark_inode_dirty(old_inode);
340 335
341 if (dir_de) { 336 if (dir_de) {
342 ufs_set_link(old_inode, dir_de, dir_page, new_dir); 337 ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e5..d61611c88012 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
152 152
153 if (!capable(CAP_SYS_ADMIN)) 153 if (!capable(CAP_SYS_ADMIN))
154 return -XFS_ERROR(EPERM); 154 return -XFS_ERROR(EPERM);
155 if (!blk_queue_discard(q))
156 return -XFS_ERROR(EOPNOTSUPP);
155 if (copy_from_user(&range, urange, sizeof(range))) 157 if (copy_from_user(&range, urange, sizeof(range)))
156 return -XFS_ERROR(EFAULT); 158 return -XFS_ERROR(EFAULT);
157 159
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f5e2a19e0f8e..0ca0e3c024d7 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
695 xfs_mount_t *mp, 695 xfs_mount_t *mp,
696 void __user *arg) 696 void __user *arg)
697{ 697{
698 xfs_fsop_geom_v1_t fsgeo; 698 xfs_fsop_geom_t fsgeo;
699 int error; 699 int error;
700 700
701 error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); 701 error = xfs_fs_geometry(mp, &fsgeo, 3);
702 if (error) 702 if (error)
703 return -error; 703 return -error;
704 704
705 if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) 705 /*
706 * Caller should have passed an argument of type
707 * xfs_fsop_geom_v1_t. This is a proper subset of the
708 * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
709 */
710 if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
706 return -XFS_ERROR(EFAULT); 711 return -XFS_ERROR(EFAULT);
707 return 0; 712 return 0;
708} 713}
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d2..85668efb3e3e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
53 xfs_fsop_geom_t *geo, 53 xfs_fsop_geom_t *geo,
54 int new_version) 54 int new_version)
55{ 55{
56
57 memset(geo, 0, sizeof(*geo));
58
56 geo->blocksize = mp->m_sb.sb_blocksize; 59 geo->blocksize = mp->m_sb.sb_blocksize;
57 geo->rtextsize = mp->m_sb.sb_rextsize; 60 geo->rtextsize = mp->m_sb.sb_rextsize;
58 geo->agblocks = mp->m_sb.sb_agblocks; 61 geo->agblocks = mp->m_sb.sb_agblocks;