aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c52
-rw-r--r--fs/block_dev.c30
-rw-r--r--fs/btrfs/acl.c6
-rw-r--r--fs/btrfs/compression.c27
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c15
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/btrfs/extent-tree.c109
-rw-r--r--fs/btrfs/extent_io.c192
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/file-item.c5
-rw-r--r--fs/btrfs/file.c21
-rw-r--r--fs/btrfs/free-space-cache.c162
-rw-r--r--fs/btrfs/inode.c164
-rw-r--r--fs/btrfs/ioctl.c36
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/print-tree.c1
-rw-r--r--fs/btrfs/relocation.c43
-rw-r--r--fs/btrfs/super.c16
-rw-r--r--fs/btrfs/transaction.c5
-rw-r--r--fs/btrfs/tree-log.c35
-rw-r--r--fs/btrfs/volumes.c34
-rw-r--r--fs/ceph/caps.c43
-rw-r--r--fs/ceph/dir.c25
-rw-r--r--fs/ceph/inode.c12
-rw-r--r--fs/ceph/mds_client.c10
-rw-r--r--fs/ceph/snap.c14
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/ceph/xattr.c3
-rw-r--r--fs/cifs/Kconfig1
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README5
-rw-r--r--fs/cifs/cifs_dfs_ref.c10
-rw-r--r--fs/cifs/cifsacl.c4
-rw-r--r--fs/cifs/cifsencrypt.c38
-rw-r--r--fs/cifs/cifsencrypt.h33
-rw-r--r--fs/cifs/cifsfs.c15
-rw-r--r--fs/cifs/cifsfs.h6
-rw-r--r--fs/cifs/cifsglob.h37
-rw-r--r--fs/cifs/cifsproto.h11
-rw-r--r--fs/cifs/cifssmb.c8
-rw-r--r--fs/cifs/connect.c70
-rw-r--r--fs/cifs/file.c211
-rw-r--r--fs/cifs/link.c59
-rw-r--r--fs/cifs/md4.c205
-rw-r--r--fs/cifs/md5.c366
-rw-r--r--fs/cifs/md5.h38
-rw-r--r--fs/cifs/misc.c116
-rw-r--r--fs/cifs/netmisc.c8
-rw-r--r--fs/cifs/readdir.c3
-rw-r--r--fs/cifs/sess.c8
-rw-r--r--fs/cifs/smbdes.c1
-rw-r--r--fs/cifs/smbencrypt.c92
-rw-r--r--fs/cifs/transport.c69
-rw-r--r--fs/dcache.c4
-rw-r--r--fs/dlm/lowcomms.c6
-rw-r--r--fs/ecryptfs/dentry.c22
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h3
-rw-r--r--fs/ecryptfs/file.c1
-rw-r--r--fs/ecryptfs/inode.c138
-rw-r--r--fs/eventfd.c12
-rw-r--r--fs/eventpoll.c111
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/ext2/namei.c9
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/file.c60
-rw-r--r--fs/ext4/mballoc.c100
-rw-r--r--fs/ext4/page-io.c36
-rw-r--r--fs/ext4/super.c66
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/fuse/dir.c7
-rw-r--r--fs/fuse/file.c52
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/gfs2/glock.c4
-rw-r--r--fs/gfs2/main.c11
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/hfsplus/extents.c4
-rw-r--r--fs/hfsplus/part_tbl.c4
-rw-r--r--fs/hfsplus/super.c106
-rw-r--r--fs/hfsplus/wrapper.c4
-rw-r--r--fs/inode.c31
-rw-r--r--fs/internal.h2
-rw-r--r--fs/ioctl.c7
-rw-r--r--fs/jbd2/journal.c9
-rw-r--r--fs/jbd2/transaction.c21
-rw-r--r--fs/lockd/host.c9
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c151
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/callback.c109
-rw-r--r--fs/nfs/callback.h4
-rw-r--r--fs/nfs/callback_proc.c12
-rw-r--r--fs/nfs/callback_xdr.c5
-rw-r--r--fs/nfs/client.c15
-rw-r--r--fs/nfs/delegation.c6
-rw-r--r--fs/nfs/direct.c34
-rw-r--r--fs/nfs/inode.c26
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/nfs3acl.c4
-rw-r--r--fs/nfs/nfs3xdr.c5
-rw-r--r--fs/nfs/nfs4filelayoutdev.c9
-rw-r--r--fs/nfs/nfs4proc.c74
-rw-r--r--fs/nfs/nfs4state.c6
-rw-r--r--fs/nfs/nfs4xdr.c9
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfs_common/nfsacl.c54
-rw-r--r--fs/nfsd/nfs4callback.c6
-rw-r--r--fs/nfsd/nfs4state.c186
-rw-r--r--fs/nfsd/nfs4xdr.c8
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/vfs.c21
-rw-r--r--fs/nilfs2/btnode.c5
-rw-r--r--fs/nilfs2/btnode.h1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/page.c13
-rw-r--r--fs/nilfs2/page.h1
-rw-r--r--fs/nilfs2/segment.c3
-rw-r--r--fs/nilfs2/super.c5
-rw-r--r--fs/ntfs/mft.c11
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/refcounttree.c7
-rw-r--r--fs/ocfs2/super.c28
-rw-r--r--fs/open.c2
-rw-r--r--fs/partitions/ldm.c5
-rw-r--r--fs/partitions/mac.c17
-rw-r--r--fs/posix_acl.c17
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/consoles.c4
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/reiserfs/namei.c2
-rw-r--r--fs/squashfs/block.c8
-rw-r--r--fs/squashfs/xz_wrapper.c6
-rw-r--r--fs/squashfs/zlib_wrapper.c6
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/udf/namei.c11
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c31
-rw-r--r--fs/xfs/quota/xfs_qm.c46
-rw-r--r--fs/xfs/xfs_alloc.h16
-rw-r--r--fs/xfs/xfs_bmap.c61
-rw-r--r--fs/xfs/xfs_buf_item.c12
-rw-r--r--fs/xfs/xfs_extfree_item.c3
-rw-r--r--fs/xfs/xfs_fsops.c3
-rw-r--r--fs/xfs/xfs_iomap.c7
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c15
-rw-r--r--fs/xfs/xfs_trans.c41
158 files changed, 2677 insertions, 1971 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b5..789b3afb3423 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
140 candidate->first = candidate->last = index; 140 candidate->first = candidate->last = index;
141 candidate->offset_first = from; 141 candidate->offset_first = from;
142 candidate->to_last = to; 142 candidate->to_last = to;
143 INIT_LIST_HEAD(&candidate->link);
143 candidate->usage = 1; 144 candidate->usage = 1;
144 candidate->state = AFS_WBACK_PENDING; 145 candidate->state = AFS_WBACK_PENDING;
145 init_waitqueue_head(&candidate->waitq); 146 init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index fc557a3be0a9..26869cde3953 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -239,15 +239,23 @@ static void __put_ioctx(struct kioctx *ctx)
239 call_rcu(&ctx->rcu_head, ctx_rcu_free); 239 call_rcu(&ctx->rcu_head, ctx_rcu_free);
240} 240}
241 241
242#define get_ioctx(kioctx) do { \ 242static inline void get_ioctx(struct kioctx *kioctx)
243 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 243{
244 atomic_inc(&(kioctx)->users); \ 244 BUG_ON(atomic_read(&kioctx->users) <= 0);
245} while (0) 245 atomic_inc(&kioctx->users);
246#define put_ioctx(kioctx) do { \ 246}
247 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 247
248 if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ 248static inline int try_get_ioctx(struct kioctx *kioctx)
249 __put_ioctx(kioctx); \ 249{
250} while (0) 250 return atomic_inc_not_zero(&kioctx->users);
251}
252
253static inline void put_ioctx(struct kioctx *kioctx)
254{
255 BUG_ON(atomic_read(&kioctx->users) <= 0);
256 if (unlikely(atomic_dec_and_test(&kioctx->users)))
257 __put_ioctx(kioctx);
258}
251 259
252/* ioctx_alloc 260/* ioctx_alloc
253 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 261 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
@@ -601,8 +609,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
601 rcu_read_lock(); 609 rcu_read_lock();
602 610
603 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { 611 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
604 if (ctx->user_id == ctx_id && !ctx->dead) { 612 /*
605 get_ioctx(ctx); 613 * RCU protects us against accessing freed memory but
614 * we have to be careful not to get a reference when the
615 * reference count already dropped to 0 (ctx->dead test
616 * is unreliable because of races).
617 */
618 if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
606 ret = ctx; 619 ret = ctx;
607 break; 620 break;
608 } 621 }
@@ -1629,6 +1642,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1629 goto out_put_req; 1642 goto out_put_req;
1630 1643
1631 spin_lock_irq(&ctx->ctx_lock); 1644 spin_lock_irq(&ctx->ctx_lock);
1645 /*
1646 * We could have raced with io_destroy() and are currently holding a
1647 * reference to ctx which should be destroyed. We cannot submit IO
1648 * since ctx gets freed as soon as io_submit() puts its reference. The
1649 * check here is reliable: io_destroy() sets ctx->dead before waiting
1650 * for outstanding IO and the barrier between these two is realized by
1651 * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
1652 * increment ctx->reqs_active before checking for ctx->dead and the
1653 * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
1654 * don't see ctx->dead set here, io_destroy() waits for our IO to
1655 * finish.
1656 */
1657 if (ctx->dead) {
1658 spin_unlock_irq(&ctx->ctx_lock);
1659 ret = -EINVAL;
1660 goto out_put_req;
1661 }
1632 aio_run_iocb(req); 1662 aio_run_iocb(req);
1633 if (!list_empty(&ctx->run_list)) { 1663 if (!list_empty(&ctx->run_list)) {
1634 /* drain the run list */ 1664 /* drain the run list */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 333a7bb4cb9c..889287019599 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); 873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
874 if (ret) 874 if (ret)
875 goto out_del; 875 goto out_del;
876 /*
877 * bdev could be deleted beneath us which would implicitly destroy
878 * the holder directory. Hold on to it.
879 */
880 kobject_get(bdev->bd_part->holder_dir);
876 881
877 list_add(&holder->list, &bdev->bd_holder_disks); 882 list_add(&holder->list, &bdev->bd_holder_disks);
878 goto out_unlock; 883 goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
909 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); 914 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
910 del_symlink(bdev->bd_part->holder_dir, 915 del_symlink(bdev->bd_part->holder_dir,
911 &disk_to_dev(disk)->kobj); 916 &disk_to_dev(disk)->kobj);
917 kobject_put(bdev->bd_part->holder_dir);
912 list_del_init(&holder->list); 918 list_del_init(&holder->list);
913 kfree(holder); 919 kfree(holder);
914 } 920 }
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
922 * flush_disk - invalidates all buffer-cache entries on a disk 928 * flush_disk - invalidates all buffer-cache entries on a disk
923 * 929 *
924 * @bdev: struct block device to be flushed 930 * @bdev: struct block device to be flushed
931 * @kill_dirty: flag to guide handling of dirty inodes
925 * 932 *
926 * Invalidates all buffer-cache entries on a disk. It should be called 933 * Invalidates all buffer-cache entries on a disk. It should be called
927 * when a disk has been changed -- either by a media change or online 934 * when a disk has been changed -- either by a media change or online
928 * resize. 935 * resize.
929 */ 936 */
930static void flush_disk(struct block_device *bdev) 937static void flush_disk(struct block_device *bdev, bool kill_dirty)
931{ 938{
932 if (__invalidate_device(bdev)) { 939 if (__invalidate_device(bdev, kill_dirty)) {
933 char name[BDEVNAME_SIZE] = ""; 940 char name[BDEVNAME_SIZE] = "";
934 941
935 if (bdev->bd_disk) 942 if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
966 "%s: detected capacity change from %lld to %lld\n", 973 "%s: detected capacity change from %lld to %lld\n",
967 name, bdev_size, disk_size); 974 name, bdev_size, disk_size);
968 i_size_write(bdev->bd_inode, disk_size); 975 i_size_write(bdev->bd_inode, disk_size);
969 flush_disk(bdev); 976 flush_disk(bdev, false);
970 } 977 }
971} 978}
972EXPORT_SYMBOL(check_disk_size_change); 979EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
1019 if (!(events & DISK_EVENT_MEDIA_CHANGE)) 1026 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1020 return 0; 1027 return 0;
1021 1028
1022 flush_disk(bdev); 1029 flush_disk(bdev, true);
1023 if (bdops->revalidate_disk) 1030 if (bdops->revalidate_disk)
1024 bdops->revalidate_disk(bdev->bd_disk); 1031 bdops->revalidate_disk(bdev->bd_disk);
1025 return 1; 1032 return 1;
@@ -1215,12 +1222,6 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1215 1222
1216 res = __blkdev_get(bdev, mode, 0); 1223 res = __blkdev_get(bdev, mode, 0);
1217 1224
1218 /* __blkdev_get() may alter read only status, check it afterwards */
1219 if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1220 __blkdev_put(bdev, mode, 0);
1221 res = -EACCES;
1222 }
1223
1224 if (whole) { 1225 if (whole) {
1225 /* finish claiming */ 1226 /* finish claiming */
1226 mutex_lock(&bdev->bd_mutex); 1227 mutex_lock(&bdev->bd_mutex);
@@ -1298,6 +1299,11 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1298 if (err) 1299 if (err)
1299 return ERR_PTR(err); 1300 return ERR_PTR(err);
1300 1301
1302 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1303 blkdev_put(bdev, mode);
1304 return ERR_PTR(-EACCES);
1305 }
1306
1301 return bdev; 1307 return bdev;
1302} 1308}
1303EXPORT_SYMBOL(blkdev_get_by_path); 1309EXPORT_SYMBOL(blkdev_get_by_path);
@@ -1601,7 +1607,7 @@ fail:
1601} 1607}
1602EXPORT_SYMBOL(lookup_bdev); 1608EXPORT_SYMBOL(lookup_bdev);
1603 1609
1604int __invalidate_device(struct block_device *bdev) 1610int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1605{ 1611{
1606 struct super_block *sb = get_super(bdev); 1612 struct super_block *sb = get_super(bdev);
1607 int res = 0; 1613 int res = 0;
@@ -1614,7 +1620,7 @@ int __invalidate_device(struct block_device *bdev)
1614 * hold). 1620 * hold).
1615 */ 1621 */
1616 shrink_dcache_sb(sb); 1622 shrink_dcache_sb(sb);
1617 res = invalidate_inodes(sb); 1623 res = invalidate_inodes(sb, kill_dirty);
1618 drop_super(sb); 1624 drop_super(sb);
1619 } 1625 }
1620 invalidate_bdev(bdev); 1626 invalidate_bdev(bdev);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 15b5ca2a2606..9c949348510b 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -37,6 +37,9 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
37 char *value = NULL; 37 char *value = NULL;
38 struct posix_acl *acl; 38 struct posix_acl *acl;
39 39
40 if (!IS_POSIXACL(inode))
41 return NULL;
42
40 acl = get_cached_acl(inode, type); 43 acl = get_cached_acl(inode, type);
41 if (acl != ACL_NOT_CACHED) 44 if (acl != ACL_NOT_CACHED)
42 return acl; 45 return acl;
@@ -84,6 +87,9 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
84 struct posix_acl *acl; 87 struct posix_acl *acl;
85 int ret = 0; 88 int ret = 0;
86 89
90 if (!IS_POSIXACL(dentry->d_inode))
91 return -EOPNOTSUPP;
92
87 acl = btrfs_get_acl(dentry->d_inode, type); 93 acl = btrfs_get_acl(dentry->d_inode, type);
88 94
89 if (IS_ERR(acl)) 95 if (IS_ERR(acl))
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f745287fbf2e..4d2110eafe29 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -562,7 +562,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
562 u64 em_len; 562 u64 em_len;
563 u64 em_start; 563 u64 em_start;
564 struct extent_map *em; 564 struct extent_map *em;
565 int ret; 565 int ret = -ENOMEM;
566 u32 *sums; 566 u32 *sums;
567 567
568 tree = &BTRFS_I(inode)->io_tree; 568 tree = &BTRFS_I(inode)->io_tree;
@@ -577,6 +577,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
577 577
578 compressed_len = em->block_len; 578 compressed_len = em->block_len;
579 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 579 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
580 if (!cb)
581 goto out;
582
580 atomic_set(&cb->pending_bios, 0); 583 atomic_set(&cb->pending_bios, 0);
581 cb->errors = 0; 584 cb->errors = 0;
582 cb->inode = inode; 585 cb->inode = inode;
@@ -597,13 +600,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
597 600
598 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / 601 nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
599 PAGE_CACHE_SIZE; 602 PAGE_CACHE_SIZE;
600 cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, 603 cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
601 GFP_NOFS); 604 GFP_NOFS);
605 if (!cb->compressed_pages)
606 goto fail1;
607
602 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 608 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
603 609
604 for (page_index = 0; page_index < nr_pages; page_index++) { 610 for (page_index = 0; page_index < nr_pages; page_index++) {
605 cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | 611 cb->compressed_pages[page_index] = alloc_page(GFP_NOFS |
606 __GFP_HIGHMEM); 612 __GFP_HIGHMEM);
613 if (!cb->compressed_pages[page_index])
614 goto fail2;
607 } 615 }
608 cb->nr_pages = nr_pages; 616 cb->nr_pages = nr_pages;
609 617
@@ -614,6 +622,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
614 cb->len = uncompressed_len; 622 cb->len = uncompressed_len;
615 623
616 comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); 624 comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
625 if (!comp_bio)
626 goto fail2;
617 comp_bio->bi_private = cb; 627 comp_bio->bi_private = cb;
618 comp_bio->bi_end_io = end_compressed_bio_read; 628 comp_bio->bi_end_io = end_compressed_bio_read;
619 atomic_inc(&cb->pending_bios); 629 atomic_inc(&cb->pending_bios);
@@ -681,6 +691,17 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
681 691
682 bio_put(comp_bio); 692 bio_put(comp_bio);
683 return 0; 693 return 0;
694
695fail2:
696 for (page_index = 0; page_index < nr_pages; page_index++)
697 free_page((unsigned long)cb->compressed_pages[page_index]);
698
699 kfree(cb->compressed_pages);
700fail1:
701 kfree(cb);
702out:
703 free_extent_map(em);
704 return ret;
684} 705}
685 706
686static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; 707static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
@@ -900,7 +921,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
900 return ret; 921 return ret;
901} 922}
902 923
903void __exit btrfs_exit_compress(void) 924void btrfs_exit_compress(void)
904{ 925{
905 free_workspaces(); 926 free_workspaces();
906} 927}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..6f820fa23df4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1254,6 +1254,7 @@ struct btrfs_root {
1254#define BTRFS_MOUNT_SPACE_CACHE (1 << 12) 1254#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1255#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) 1255#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1256#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1256#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1257#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1257 1258
1258#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1259#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1259#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1260#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2219,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
2218 u64 start, u64 end); 2219 u64 start, u64 end);
2219int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 2220int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
2220 u64 num_bytes); 2221 u64 num_bytes);
2222int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
2223 struct btrfs_root *root, u64 type);
2221 2224
2222/* ctree.c */ 2225/* ctree.c */
2223int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2226int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b531c36455d8..e1aa8d607bc7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -359,10 +359,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
359 359
360 tree = &BTRFS_I(page->mapping->host)->io_tree; 360 tree = &BTRFS_I(page->mapping->host)->io_tree;
361 361
362 if (page->private == EXTENT_PAGE_PRIVATE) 362 if (page->private == EXTENT_PAGE_PRIVATE) {
363 WARN_ON(1);
363 goto out; 364 goto out;
364 if (!page->private) 365 }
366 if (!page->private) {
367 WARN_ON(1);
365 goto out; 368 goto out;
369 }
366 len = page->private >> 2; 370 len = page->private >> 2;
367 WARN_ON(len == 0); 371 WARN_ON(len == 0);
368 372
@@ -1550,6 +1554,7 @@ static int transaction_kthread(void *arg)
1550 spin_unlock(&root->fs_info->new_trans_lock); 1554 spin_unlock(&root->fs_info->new_trans_lock);
1551 1555
1552 trans = btrfs_join_transaction(root, 1); 1556 trans = btrfs_join_transaction(root, 1);
1557 BUG_ON(IS_ERR(trans));
1553 if (transid == trans->transid) { 1558 if (transid == trans->transid) {
1554 ret = btrfs_commit_transaction(trans, root); 1559 ret = btrfs_commit_transaction(trans, root);
1555 BUG_ON(ret); 1560 BUG_ON(ret);
@@ -2453,10 +2458,14 @@ int btrfs_commit_super(struct btrfs_root *root)
2453 up_write(&root->fs_info->cleanup_work_sem); 2458 up_write(&root->fs_info->cleanup_work_sem);
2454 2459
2455 trans = btrfs_join_transaction(root, 1); 2460 trans = btrfs_join_transaction(root, 1);
2461 if (IS_ERR(trans))
2462 return PTR_ERR(trans);
2456 ret = btrfs_commit_transaction(trans, root); 2463 ret = btrfs_commit_transaction(trans, root);
2457 BUG_ON(ret); 2464 BUG_ON(ret);
2458 /* run commit again to drop the original snapshot */ 2465 /* run commit again to drop the original snapshot */
2459 trans = btrfs_join_transaction(root, 1); 2466 trans = btrfs_join_transaction(root, 1);
2467 if (IS_ERR(trans))
2468 return PTR_ERR(trans);
2460 btrfs_commit_transaction(trans, root); 2469 btrfs_commit_transaction(trans, root);
2461 ret = btrfs_write_and_wait_transaction(NULL, root); 2470 ret = btrfs_write_and_wait_transaction(NULL, root);
2462 BUG_ON(ret); 2471 BUG_ON(ret);
@@ -2554,6 +2563,8 @@ int close_ctree(struct btrfs_root *root)
2554 kfree(fs_info->chunk_root); 2563 kfree(fs_info->chunk_root);
2555 kfree(fs_info->dev_root); 2564 kfree(fs_info->dev_root);
2556 kfree(fs_info->csum_root); 2565 kfree(fs_info->csum_root);
2566 kfree(fs_info);
2567
2557 return 0; 2568 return 0;
2558} 2569}
2559 2570
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 9786963b07e5..ff27d7a477b2 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -171,6 +171,8 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
171 int ret; 171 int ret;
172 172
173 path = btrfs_alloc_path(); 173 path = btrfs_alloc_path();
174 if (!path)
175 return ERR_PTR(-ENOMEM);
174 176
175 if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { 177 if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
176 key.objectid = root->root_key.objectid; 178 key.objectid = root->root_key.objectid;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b55269340cec..588ff9849873 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -320,11 +320,6 @@ static int caching_kthread(void *data)
320 if (!path) 320 if (!path)
321 return -ENOMEM; 321 return -ENOMEM;
322 322
323 exclude_super_stripes(extent_root, block_group);
324 spin_lock(&block_group->space_info->lock);
325 block_group->space_info->bytes_readonly += block_group->bytes_super;
326 spin_unlock(&block_group->space_info->lock);
327
328 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 323 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
329 324
330 /* 325 /*
@@ -467,8 +462,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
467 cache->cached = BTRFS_CACHE_NO; 462 cache->cached = BTRFS_CACHE_NO;
468 } 463 }
469 spin_unlock(&cache->lock); 464 spin_unlock(&cache->lock);
470 if (ret == 1) 465 if (ret == 1) {
466 free_excluded_extents(fs_info->extent_root, cache);
471 return 0; 467 return 0;
468 }
472 } 469 }
473 470
474 if (load_cache_only) 471 if (load_cache_only)
@@ -3344,8 +3341,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3344 u64 reserved; 3341 u64 reserved;
3345 u64 max_reclaim; 3342 u64 max_reclaim;
3346 u64 reclaimed = 0; 3343 u64 reclaimed = 0;
3344 long time_left;
3347 int pause = 1; 3345 int pause = 1;
3348 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3346 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3347 int loops = 0;
3349 3348
3350 block_rsv = &root->fs_info->delalloc_block_rsv; 3349 block_rsv = &root->fs_info->delalloc_block_rsv;
3351 space_info = block_rsv->space_info; 3350 space_info = block_rsv->space_info;
@@ -3358,7 +3357,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3358 3357
3359 max_reclaim = min(reserved, to_reclaim); 3358 max_reclaim = min(reserved, to_reclaim);
3360 3359
3361 while (1) { 3360 while (loops < 1024) {
3362 /* have the flusher threads jump in and do some IO */ 3361 /* have the flusher threads jump in and do some IO */
3363 smp_mb(); 3362 smp_mb();
3364 nr_pages = min_t(unsigned long, nr_pages, 3363 nr_pages = min_t(unsigned long, nr_pages,
@@ -3366,8 +3365,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3366 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3365 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3367 3366
3368 spin_lock(&space_info->lock); 3367 spin_lock(&space_info->lock);
3369 if (reserved > space_info->bytes_reserved) 3368 if (reserved > space_info->bytes_reserved) {
3369 loops = 0;
3370 reclaimed += reserved - space_info->bytes_reserved; 3370 reclaimed += reserved - space_info->bytes_reserved;
3371 } else {
3372 loops++;
3373 }
3371 reserved = space_info->bytes_reserved; 3374 reserved = space_info->bytes_reserved;
3372 spin_unlock(&space_info->lock); 3375 spin_unlock(&space_info->lock);
3373 3376
@@ -3378,7 +3381,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3378 return -EAGAIN; 3381 return -EAGAIN;
3379 3382
3380 __set_current_state(TASK_INTERRUPTIBLE); 3383 __set_current_state(TASK_INTERRUPTIBLE);
3381 schedule_timeout(pause); 3384 time_left = schedule_timeout(pause);
3385
3386 /* We were interrupted, exit */
3387 if (time_left)
3388 break;
3389
3382 pause <<= 1; 3390 pause <<= 1;
3383 if (pause > HZ / 10) 3391 if (pause > HZ / 10)
3384 pause = HZ / 10; 3392 pause = HZ / 10;
@@ -3588,8 +3596,20 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3588 3596
3589 if (num_bytes > 0) { 3597 if (num_bytes > 0) {
3590 if (dest) { 3598 if (dest) {
3591 block_rsv_add_bytes(dest, num_bytes, 0); 3599 spin_lock(&dest->lock);
3592 } else { 3600 if (!dest->full) {
3601 u64 bytes_to_add;
3602
3603 bytes_to_add = dest->size - dest->reserved;
3604 bytes_to_add = min(num_bytes, bytes_to_add);
3605 dest->reserved += bytes_to_add;
3606 if (dest->reserved >= dest->size)
3607 dest->full = 1;
3608 num_bytes -= bytes_to_add;
3609 }
3610 spin_unlock(&dest->lock);
3611 }
3612 if (num_bytes) {
3593 spin_lock(&space_info->lock); 3613 spin_lock(&space_info->lock);
3594 space_info->bytes_reserved -= num_bytes; 3614 space_info->bytes_reserved -= num_bytes;
3595 spin_unlock(&space_info->lock); 3615 spin_unlock(&space_info->lock);
@@ -4012,6 +4032,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4012 4032
4013 num_bytes = ALIGN(num_bytes, root->sectorsize); 4033 num_bytes = ALIGN(num_bytes, root->sectorsize);
4014 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 4034 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
4035 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
4015 4036
4016 spin_lock(&BTRFS_I(inode)->accounting_lock); 4037 spin_lock(&BTRFS_I(inode)->accounting_lock);
4017 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); 4038 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
@@ -5355,7 +5376,7 @@ again:
5355 num_bytes, data, 1); 5376 num_bytes, data, 1);
5356 goto again; 5377 goto again;
5357 } 5378 }
5358 if (ret == -ENOSPC) { 5379 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
5359 struct btrfs_space_info *sinfo; 5380 struct btrfs_space_info *sinfo;
5360 5381
5361 sinfo = __find_space_info(root->fs_info, data); 5382 sinfo = __find_space_info(root->fs_info, data);
@@ -5633,6 +5654,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5633 struct btrfs_root *root, u32 blocksize) 5654 struct btrfs_root *root, u32 blocksize)
5634{ 5655{
5635 struct btrfs_block_rsv *block_rsv; 5656 struct btrfs_block_rsv *block_rsv;
5657 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5636 int ret; 5658 int ret;
5637 5659
5638 block_rsv = get_block_rsv(trans, root); 5660 block_rsv = get_block_rsv(trans, root);
@@ -5640,14 +5662,39 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5640 if (block_rsv->size == 0) { 5662 if (block_rsv->size == 0) {
5641 ret = reserve_metadata_bytes(trans, root, block_rsv, 5663 ret = reserve_metadata_bytes(trans, root, block_rsv,
5642 blocksize, 0); 5664 blocksize, 0);
5643 if (ret) 5665 /*
5666 * If we couldn't reserve metadata bytes try and use some from
5667 * the global reserve.
5668 */
5669 if (ret && block_rsv != global_rsv) {
5670 ret = block_rsv_use_bytes(global_rsv, blocksize);
5671 if (!ret)
5672 return global_rsv;
5673 return ERR_PTR(ret);
5674 } else if (ret) {
5644 return ERR_PTR(ret); 5675 return ERR_PTR(ret);
5676 }
5645 return block_rsv; 5677 return block_rsv;
5646 } 5678 }
5647 5679
5648 ret = block_rsv_use_bytes(block_rsv, blocksize); 5680 ret = block_rsv_use_bytes(block_rsv, blocksize);
5649 if (!ret) 5681 if (!ret)
5650 return block_rsv; 5682 return block_rsv;
5683 if (ret) {
5684 WARN_ON(1);
5685 ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize,
5686 0);
5687 if (!ret) {
5688 spin_lock(&block_rsv->lock);
5689 block_rsv->size += blocksize;
5690 spin_unlock(&block_rsv->lock);
5691 return block_rsv;
5692 } else if (ret && block_rsv != global_rsv) {
5693 ret = block_rsv_use_bytes(global_rsv, blocksize);
5694 if (!ret)
5695 return global_rsv;
5696 }
5697 }
5651 5698
5652 return ERR_PTR(-ENOSPC); 5699 return ERR_PTR(-ENOSPC);
5653} 5700}
@@ -6221,6 +6268,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6221 BUG_ON(!wc); 6268 BUG_ON(!wc);
6222 6269
6223 trans = btrfs_start_transaction(tree_root, 0); 6270 trans = btrfs_start_transaction(tree_root, 0);
6271 BUG_ON(IS_ERR(trans));
6272
6224 if (block_rsv) 6273 if (block_rsv)
6225 trans->block_rsv = block_rsv; 6274 trans->block_rsv = block_rsv;
6226 6275
@@ -6318,6 +6367,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6318 6367
6319 btrfs_end_transaction_throttle(trans, tree_root); 6368 btrfs_end_transaction_throttle(trans, tree_root);
6320 trans = btrfs_start_transaction(tree_root, 0); 6369 trans = btrfs_start_transaction(tree_root, 0);
6370 BUG_ON(IS_ERR(trans));
6321 if (block_rsv) 6371 if (block_rsv)
6322 trans->block_rsv = block_rsv; 6372 trans->block_rsv = block_rsv;
6323 } 6373 }
@@ -6446,6 +6496,8 @@ static noinline int relocate_inode_pages(struct inode *inode, u64 start,
6446 int ret = 0; 6496 int ret = 0;
6447 6497
6448 ra = kzalloc(sizeof(*ra), GFP_NOFS); 6498 ra = kzalloc(sizeof(*ra), GFP_NOFS);
6499 if (!ra)
6500 return -ENOMEM;
6449 6501
6450 mutex_lock(&inode->i_mutex); 6502 mutex_lock(&inode->i_mutex);
6451 first_index = start >> PAGE_CACHE_SHIFT; 6503 first_index = start >> PAGE_CACHE_SHIFT;
@@ -6531,7 +6583,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
6531 u64 end = start + extent_key->offset - 1; 6583 u64 end = start + extent_key->offset - 1;
6532 6584
6533 em = alloc_extent_map(GFP_NOFS); 6585 em = alloc_extent_map(GFP_NOFS);
6534 BUG_ON(!em || IS_ERR(em)); 6586 BUG_ON(!em);
6535 6587
6536 em->start = start; 6588 em->start = start;
6537 em->len = extent_key->offset; 6589 em->len = extent_key->offset;
@@ -7477,7 +7529,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
7477 BUG_ON(reloc_root->commit_root != NULL); 7529 BUG_ON(reloc_root->commit_root != NULL);
7478 while (1) { 7530 while (1) {
7479 trans = btrfs_join_transaction(root, 1); 7531 trans = btrfs_join_transaction(root, 1);
7480 BUG_ON(!trans); 7532 BUG_ON(IS_ERR(trans));
7481 7533
7482 mutex_lock(&root->fs_info->drop_mutex); 7534 mutex_lock(&root->fs_info->drop_mutex);
7483 ret = btrfs_drop_snapshot(trans, reloc_root); 7535 ret = btrfs_drop_snapshot(trans, reloc_root);
@@ -7535,7 +7587,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7535 7587
7536 if (found) { 7588 if (found) {
7537 trans = btrfs_start_transaction(root, 1); 7589 trans = btrfs_start_transaction(root, 1);
7538 BUG_ON(!trans); 7590 BUG_ON(IS_ERR(trans));
7539 ret = btrfs_commit_transaction(trans, root); 7591 ret = btrfs_commit_transaction(trans, root);
7540 BUG_ON(ret); 7592 BUG_ON(ret);
7541 } 7593 }
@@ -7779,7 +7831,7 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7779 7831
7780 7832
7781 trans = btrfs_start_transaction(extent_root, 1); 7833 trans = btrfs_start_transaction(extent_root, 1);
7782 BUG_ON(!trans); 7834 BUG_ON(IS_ERR(trans));
7783 7835
7784 if (extent_key->objectid == 0) { 7836 if (extent_key->objectid == 0) {
7785 ret = del_extent_zero(trans, extent_root, path, extent_key); 7837 ret = del_extent_zero(trans, extent_root, path, extent_key);
@@ -8013,6 +8065,13 @@ out:
8013 return ret; 8065 return ret;
8014} 8066}
8015 8067
8068int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8069 struct btrfs_root *root, u64 type)
8070{
8071 u64 alloc_flags = get_alloc_profile(root, type);
8072 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
8073}
8074
8016/* 8075/*
8017 * helper to account the unused space of all the readonly block group in the 8076 * helper to account the unused space of all the readonly block group in the
8018 * list. takes mirrors into account. 8077 * list. takes mirrors into account.
@@ -8270,6 +8329,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8270 if (block_group->cached == BTRFS_CACHE_STARTED) 8329 if (block_group->cached == BTRFS_CACHE_STARTED)
8271 wait_block_group_cache_done(block_group); 8330 wait_block_group_cache_done(block_group);
8272 8331
8332 /*
8333 * We haven't cached this block group, which means we could
8334 * possibly have excluded extents on this block group.
8335 */
8336 if (block_group->cached == BTRFS_CACHE_NO)
8337 free_excluded_extents(info->extent_root, block_group);
8338
8273 btrfs_remove_free_space_cache(block_group); 8339 btrfs_remove_free_space_cache(block_group);
8274 btrfs_put_block_group(block_group); 8340 btrfs_put_block_group(block_group);
8275 8341
@@ -8385,6 +8451,13 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8385 cache->sectorsize = root->sectorsize; 8451 cache->sectorsize = root->sectorsize;
8386 8452
8387 /* 8453 /*
8454 * We need to exclude the super stripes now so that the space
8455 * info has super bytes accounted for, otherwise we'll think
8456 * we have more space than we actually do.
8457 */
8458 exclude_super_stripes(root, cache);
8459
8460 /*
8388 * check for two cases, either we are full, and therefore 8461 * check for two cases, either we are full, and therefore
8389 * don't need to bother with the caching work since we won't 8462 * don't need to bother with the caching work since we won't
8390 * find any space, or we are empty, and we can just add all 8463 * find any space, or we are empty, and we can just add all
@@ -8392,12 +8465,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8392 * time, particularly in the full case. 8465 * time, particularly in the full case.
8393 */ 8466 */
8394 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 8467 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
8395 exclude_super_stripes(root, cache);
8396 cache->last_byte_to_unpin = (u64)-1; 8468 cache->last_byte_to_unpin = (u64)-1;
8397 cache->cached = BTRFS_CACHE_FINISHED; 8469 cache->cached = BTRFS_CACHE_FINISHED;
8398 free_excluded_extents(root, cache); 8470 free_excluded_extents(root, cache);
8399 } else if (btrfs_block_group_used(&cache->item) == 0) { 8471 } else if (btrfs_block_group_used(&cache->item) == 0) {
8400 exclude_super_stripes(root, cache);
8401 cache->last_byte_to_unpin = (u64)-1; 8472 cache->last_byte_to_unpin = (u64)-1;
8402 cache->cached = BTRFS_CACHE_FINISHED; 8473 cache->cached = BTRFS_CACHE_FINISHED;
8403 add_new_free_space(cache, root->fs_info, 8474 add_new_free_space(cache, root->fs_info,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e993cf1766e..fd3f172e94e6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1433 */ 1433 */
1434u64 count_range_bits(struct extent_io_tree *tree, 1434u64 count_range_bits(struct extent_io_tree *tree,
1435 u64 *start, u64 search_end, u64 max_bytes, 1435 u64 *start, u64 search_end, u64 max_bytes,
1436 unsigned long bits) 1436 unsigned long bits, int contig)
1437{ 1437{
1438 struct rb_node *node; 1438 struct rb_node *node;
1439 struct extent_state *state; 1439 struct extent_state *state;
1440 u64 cur_start = *start; 1440 u64 cur_start = *start;
1441 u64 total_bytes = 0; 1441 u64 total_bytes = 0;
1442 u64 last = 0;
1442 int found = 0; 1443 int found = 0;
1443 1444
1444 if (search_end <= cur_start) { 1445 if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1463 state = rb_entry(node, struct extent_state, rb_node); 1464 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->start > search_end) 1465 if (state->start > search_end)
1465 break; 1466 break;
1466 if (state->end >= cur_start && (state->state & bits)) { 1467 if (contig && found && state->start > last + 1)
1468 break;
1469 if (state->end >= cur_start && (state->state & bits) == bits) {
1467 total_bytes += min(search_end, state->end) + 1 - 1470 total_bytes += min(search_end, state->end) + 1 -
1468 max(cur_start, state->start); 1471 max(cur_start, state->start);
1469 if (total_bytes >= max_bytes) 1472 if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1472 *start = state->start; 1475 *start = state->start;
1473 found = 1; 1476 found = 1;
1474 } 1477 }
1478 last = state->end;
1479 } else if (contig && found) {
1480 break;
1475 } 1481 }
1476 node = rb_next(node); 1482 node = rb_next(node);
1477 if (!node) 1483 if (!node)
@@ -1865,7 +1871,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1865 bio_get(bio); 1871 bio_get(bio);
1866 1872
1867 if (tree->ops && tree->ops->submit_bio_hook) 1873 if (tree->ops && tree->ops->submit_bio_hook)
1868 tree->ops->submit_bio_hook(page->mapping->host, rw, bio, 1874 ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
1869 mirror_num, bio_flags, start); 1875 mirror_num, bio_flags, start);
1870 else 1876 else
1871 submit_bio(rw, bio); 1877 submit_bio(rw, bio);
@@ -1920,6 +1926,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
1920 nr = bio_get_nr_vecs(bdev); 1926 nr = bio_get_nr_vecs(bdev);
1921 1927
1922 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1928 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1929 if (!bio)
1930 return -ENOMEM;
1923 1931
1924 bio_add_page(bio, page, page_size, offset); 1932 bio_add_page(bio, page, page_size, offset);
1925 bio->bi_end_io = end_io_func; 1933 bio->bi_end_io = end_io_func;
@@ -1944,6 +1952,7 @@ void set_page_extent_mapped(struct page *page)
1944 1952
1945static void set_page_extent_head(struct page *page, unsigned long len) 1953static void set_page_extent_head(struct page *page, unsigned long len)
1946{ 1954{
1955 WARN_ON(!PagePrivate(page));
1947 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 1956 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1948} 1957}
1949 1958
@@ -2126,7 +2135,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2126 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, 2135 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
2127 &bio_flags); 2136 &bio_flags);
2128 if (bio) 2137 if (bio)
2129 submit_one_bio(READ, bio, 0, bio_flags); 2138 ret = submit_one_bio(READ, bio, 0, bio_flags);
2130 return ret; 2139 return ret;
2131} 2140}
2132 2141
@@ -2819,9 +2828,17 @@ int try_release_extent_state(struct extent_map_tree *map,
2819 * at this point we can safely clear everything except the 2828 * at this point we can safely clear everything except the
2820 * locked bit and the nodatasum bit 2829 * locked bit and the nodatasum bit
2821 */ 2830 */
2822 clear_extent_bit(tree, start, end, 2831 ret = clear_extent_bit(tree, start, end,
2823 ~(EXTENT_LOCKED | EXTENT_NODATASUM), 2832 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2824 0, 0, NULL, mask); 2833 0, 0, NULL, mask);
2834
2835 /* if clear_extent_bit failed for enomem reasons,
2836 * we can't allow the release to continue.
2837 */
2838 if (ret < 0)
2839 ret = 0;
2840 else
2841 ret = 1;
2825 } 2842 }
2826 return ret; 2843 return ret;
2827} 2844}
@@ -2901,6 +2918,46 @@ out:
2901 return sector; 2918 return sector;
2902} 2919}
2903 2920
2921/*
2922 * helper function for fiemap, which doesn't want to see any holes.
2923 * This maps until we find something past 'last'
2924 */
2925static struct extent_map *get_extent_skip_holes(struct inode *inode,
2926 u64 offset,
2927 u64 last,
2928 get_extent_t *get_extent)
2929{
2930 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2931 struct extent_map *em;
2932 u64 len;
2933
2934 if (offset >= last)
2935 return NULL;
2936
2937 while(1) {
2938 len = last - offset;
2939 if (len == 0)
2940 break;
2941 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2942 em = get_extent(inode, NULL, 0, offset, len, 0);
2943 if (!em || IS_ERR(em))
2944 return em;
2945
2946 /* if this isn't a hole return it */
2947 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 em->block_start != EXTENT_MAP_HOLE) {
2949 return em;
2950 }
2951
2952 /* this is a hole, advance to the next extent */
2953 offset = extent_map_end(em);
2954 free_extent_map(em);
2955 if (offset >= last)
2956 break;
2957 }
2958 return NULL;
2959}
2960
2904int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2961int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2905 __u64 start, __u64 len, get_extent_t *get_extent) 2962 __u64 start, __u64 len, get_extent_t *get_extent)
2906{ 2963{
@@ -2910,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2910 u32 flags = 0; 2967 u32 flags = 0;
2911 u32 found_type; 2968 u32 found_type;
2912 u64 last; 2969 u64 last;
2970 u64 last_for_get_extent = 0;
2913 u64 disko = 0; 2971 u64 disko = 0;
2972 u64 isize = i_size_read(inode);
2914 struct btrfs_key found_key; 2973 struct btrfs_key found_key;
2915 struct extent_map *em = NULL; 2974 struct extent_map *em = NULL;
2916 struct extent_state *cached_state = NULL; 2975 struct extent_state *cached_state = NULL;
2917 struct btrfs_path *path; 2976 struct btrfs_path *path;
2918 struct btrfs_file_extent_item *item; 2977 struct btrfs_file_extent_item *item;
2919 int end = 0; 2978 int end = 0;
2920 u64 em_start = 0, em_len = 0; 2979 u64 em_start = 0;
2980 u64 em_len = 0;
2981 u64 em_end = 0;
2921 unsigned long emflags; 2982 unsigned long emflags;
2922 int hole = 0;
2923 2983
2924 if (len == 0) 2984 if (len == 0)
2925 return -EINVAL; 2985 return -EINVAL;
@@ -2929,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2929 return -ENOMEM; 2989 return -ENOMEM;
2930 path->leave_spinning = 1; 2990 path->leave_spinning = 1;
2931 2991
2992 /*
2993 * lookup the last file extent. We're not using i_size here
2994 * because there might be preallocation past i_size
2995 */
2932 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2996 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2933 path, inode->i_ino, -1, 0); 2997 path, inode->i_ino, -1, 0);
2934 if (ret < 0) { 2998 if (ret < 0) {
@@ -2942,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2942 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 3006 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2943 found_type = btrfs_key_type(&found_key); 3007 found_type = btrfs_key_type(&found_key);
2944 3008
2945 /* No extents, just return */ 3009 /* No extents, but there might be delalloc bits */
2946 if (found_key.objectid != inode->i_ino || 3010 if (found_key.objectid != inode->i_ino ||
2947 found_type != BTRFS_EXTENT_DATA_KEY) { 3011 found_type != BTRFS_EXTENT_DATA_KEY) {
2948 btrfs_free_path(path); 3012 /* have to trust i_size as the end */
2949 return 0; 3013 last = (u64)-1;
3014 last_for_get_extent = isize;
3015 } else {
3016 /*
3017 * remember the start of the last extent. There are a
3018 * bunch of different factors that go into the length of the
3019 * extent, so its much less complex to remember where it started
3020 */
3021 last = found_key.offset;
3022 last_for_get_extent = last + 1;
2950 } 3023 }
2951 last = found_key.offset;
2952 btrfs_free_path(path); 3024 btrfs_free_path(path);
2953 3025
3026 /*
3027 * we might have some extents allocated but more delalloc past those
3028 * extents. so, we trust isize unless the start of the last extent is
3029 * beyond isize
3030 */
3031 if (last < isize) {
3032 last = (u64)-1;
3033 last_for_get_extent = isize;
3034 }
3035
2954 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3036 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2955 &cached_state, GFP_NOFS); 3037 &cached_state, GFP_NOFS);
2956 em = get_extent(inode, NULL, 0, off, max - off, 0); 3038
3039 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3040 get_extent);
2957 if (!em) 3041 if (!em)
2958 goto out; 3042 goto out;
2959 if (IS_ERR(em)) { 3043 if (IS_ERR(em)) {
@@ -2962,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2962 } 3046 }
2963 3047
2964 while (!end) { 3048 while (!end) {
2965 hole = 0; 3049 off = extent_map_end(em);
2966 off = em->start + em->len;
2967 if (off >= max) 3050 if (off >= max)
2968 end = 1; 3051 end = 1;
2969 3052
2970 if (em->block_start == EXTENT_MAP_HOLE) {
2971 hole = 1;
2972 goto next;
2973 }
2974
2975 em_start = em->start; 3053 em_start = em->start;
2976 em_len = em->len; 3054 em_len = em->len;
2977 3055 em_end = extent_map_end(em);
3056 emflags = em->flags;
2978 disko = 0; 3057 disko = 0;
2979 flags = 0; 3058 flags = 0;
2980 3059
@@ -2993,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2993 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3072 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
2994 flags |= FIEMAP_EXTENT_ENCODED; 3073 flags |= FIEMAP_EXTENT_ENCODED;
2995 3074
2996next:
2997 emflags = em->flags;
2998 free_extent_map(em); 3075 free_extent_map(em);
2999 em = NULL; 3076 em = NULL;
3000 if (!end) { 3077 if ((em_start >= last) || em_len == (u64)-1 ||
3001 em = get_extent(inode, NULL, 0, off, max - off, 0); 3078 (last == (u64)-1 && isize <= em_end)) {
3002 if (!em)
3003 goto out;
3004 if (IS_ERR(em)) {
3005 ret = PTR_ERR(em);
3006 goto out;
3007 }
3008 emflags = em->flags;
3009 }
3010
3011 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
3012 flags |= FIEMAP_EXTENT_LAST; 3079 flags |= FIEMAP_EXTENT_LAST;
3013 end = 1; 3080 end = 1;
3014 } 3081 }
3015 3082
3016 if (em_start == last) { 3083 /* now scan forward to see if this is really the last extent. */
3084 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3085 get_extent);
3086 if (IS_ERR(em)) {
3087 ret = PTR_ERR(em);
3088 goto out;
3089 }
3090 if (!em) {
3017 flags |= FIEMAP_EXTENT_LAST; 3091 flags |= FIEMAP_EXTENT_LAST;
3018 end = 1; 3092 end = 1;
3019 } 3093 }
3020 3094 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3021 if (!hole) { 3095 em_len, flags);
3022 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3096 if (ret)
3023 em_len, flags); 3097 goto out_free;
3024 if (ret)
3025 goto out_free;
3026 }
3027 } 3098 }
3028out_free: 3099out_free:
3029 free_extent_map(em); 3100 free_extent_map(em);
@@ -3192,7 +3263,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3192 } 3263 }
3193 if (!PageUptodate(p)) 3264 if (!PageUptodate(p))
3194 uptodate = 0; 3265 uptodate = 0;
3195 unlock_page(p); 3266
3267 /*
3268 * see below about how we avoid a nasty race with release page
3269 * and why we unlock later
3270 */
3271 if (i != 0)
3272 unlock_page(p);
3196 } 3273 }
3197 if (uptodate) 3274 if (uptodate)
3198 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3275 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3216,9 +3293,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3216 atomic_inc(&eb->refs); 3293 atomic_inc(&eb->refs);
3217 spin_unlock(&tree->buffer_lock); 3294 spin_unlock(&tree->buffer_lock);
3218 radix_tree_preload_end(); 3295 radix_tree_preload_end();
3296
3297 /*
3298 * there is a race where release page may have
3299 * tried to find this extent buffer in the radix
3300 * but failed. It will tell the VM it is safe to
3301 * reclaim the, and it will clear the page private bit.
3302 * We must make sure to set the page private bit properly
3303 * after the extent buffer is in the radix tree so
3304 * it doesn't get lost
3305 */
3306 set_page_extent_mapped(eb->first_page);
3307 set_page_extent_head(eb->first_page, eb->len);
3308 if (!page0)
3309 unlock_page(eb->first_page);
3219 return eb; 3310 return eb;
3220 3311
3221free_eb: 3312free_eb:
3313 if (eb->first_page && !page0)
3314 unlock_page(eb->first_page);
3315
3222 if (!atomic_dec_and_test(&eb->refs)) 3316 if (!atomic_dec_and_test(&eb->refs))
3223 return exists; 3317 return exists;
3224 btrfs_release_extent_buffer(eb); 3318 btrfs_release_extent_buffer(eb);
@@ -3269,10 +3363,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3269 continue; 3363 continue;
3270 3364
3271 lock_page(page); 3365 lock_page(page);
3366 WARN_ON(!PagePrivate(page));
3367
3368 set_page_extent_mapped(page);
3272 if (i == 0) 3369 if (i == 0)
3273 set_page_extent_head(page, eb->len); 3370 set_page_extent_head(page, eb->len);
3274 else
3275 set_page_private(page, EXTENT_PAGE_PRIVATE);
3276 3371
3277 clear_page_dirty_for_io(page); 3372 clear_page_dirty_for_io(page);
3278 spin_lock_irq(&page->mapping->tree_lock); 3373 spin_lock_irq(&page->mapping->tree_lock);
@@ -3462,6 +3557,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3462 3557
3463 for (i = start_i; i < num_pages; i++) { 3558 for (i = start_i; i < num_pages; i++) {
3464 page = extent_buffer_page(eb, i); 3559 page = extent_buffer_page(eb, i);
3560
3561 WARN_ON(!PagePrivate(page));
3562
3563 set_page_extent_mapped(page);
3564 if (i == 0)
3565 set_page_extent_head(page, eb->len);
3566
3465 if (inc_all_pages) 3567 if (inc_all_pages)
3466 page_cache_get(page); 3568 page_cache_get(page);
3467 if (!PageUptodate(page)) { 3569 if (!PageUptodate(page)) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
191 191
192u64 count_range_bits(struct extent_io_tree *tree, 192u64 count_range_bits(struct extent_io_tree *tree,
193 u64 *start, u64 search_end, 193 u64 *start, u64 search_end,
194 u64 max_bytes, unsigned long bits); 194 u64 max_bytes, unsigned long bits, int contig);
195 195
196void free_extent_state(struct extent_state *state); 196void free_extent_state(struct extent_state *state);
197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b0e1fce12530..2b6c12e983b3 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -51,8 +51,8 @@ struct extent_map *alloc_extent_map(gfp_t mask)
51{ 51{
52 struct extent_map *em; 52 struct extent_map *em;
53 em = kmem_cache_alloc(extent_map_cache, mask); 53 em = kmem_cache_alloc(extent_map_cache, mask);
54 if (!em || IS_ERR(em)) 54 if (!em)
55 return em; 55 return NULL;
56 em->in_tree = 0; 56 em->in_tree = 0;
57 em->flags = 0; 57 em->flags = 0;
58 em->compress_type = BTRFS_COMPRESS_NONE; 58 em->compress_type = BTRFS_COMPRESS_NONE;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a562a250ae77..4f19a3e1bf32 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -536,6 +536,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
536 root = root->fs_info->csum_root; 536 root = root->fs_info->csum_root;
537 537
538 path = btrfs_alloc_path(); 538 path = btrfs_alloc_path();
539 if (!path)
540 return -ENOMEM;
539 541
540 while (1) { 542 while (1) {
541 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 543 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -548,7 +550,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
548 if (path->slots[0] == 0) 550 if (path->slots[0] == 0)
549 goto out; 551 goto out;
550 path->slots[0]--; 552 path->slots[0]--;
553 } else if (ret < 0) {
554 goto out;
551 } 555 }
556
552 leaf = path->nodes[0]; 557 leaf = path->nodes[0];
553 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 558 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
554 559
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c800d58f3013..7084140d5940 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -186,6 +186,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
186 split = alloc_extent_map(GFP_NOFS); 186 split = alloc_extent_map(GFP_NOFS);
187 if (!split2) 187 if (!split2)
188 split2 = alloc_extent_map(GFP_NOFS); 188 split2 = alloc_extent_map(GFP_NOFS);
189 BUG_ON(!split || !split2);
189 190
190 write_lock(&em_tree->lock); 191 write_lock(&em_tree->lock);
191 em = lookup_extent_mapping(em_tree, start, len); 192 em = lookup_extent_mapping(em_tree, start, len);
@@ -793,8 +794,12 @@ again:
793 for (i = 0; i < num_pages; i++) { 794 for (i = 0; i < num_pages; i++) {
794 pages[i] = grab_cache_page(inode->i_mapping, index + i); 795 pages[i] = grab_cache_page(inode->i_mapping, index + i);
795 if (!pages[i]) { 796 if (!pages[i]) {
796 err = -ENOMEM; 797 int c;
797 BUG_ON(1); 798 for (c = i - 1; c >= 0; c--) {
799 unlock_page(pages[c]);
800 page_cache_release(pages[c]);
801 }
802 return -ENOMEM;
798 } 803 }
799 wait_on_page_writeback(pages[i]); 804 wait_on_page_writeback(pages[i]);
800 } 805 }
@@ -946,6 +951,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
946 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 951 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
947 (sizeof(struct page *))); 952 (sizeof(struct page *)));
948 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 953 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
954 if (!pages) {
955 ret = -ENOMEM;
956 goto out;
957 }
949 958
950 /* generic_write_checks can change our pos */ 959 /* generic_write_checks can change our pos */
951 start_pos = pos; 960 start_pos = pos;
@@ -984,8 +993,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
984 size_t write_bytes = min(iov_iter_count(&i), 993 size_t write_bytes = min(iov_iter_count(&i),
985 nrptrs * (size_t)PAGE_CACHE_SIZE - 994 nrptrs * (size_t)PAGE_CACHE_SIZE -
986 offset); 995 offset);
987 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> 996 size_t num_pages = (write_bytes + offset +
988 PAGE_CACHE_SHIFT; 997 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
989 998
990 WARN_ON(num_pages > nrptrs); 999 WARN_ON(num_pages > nrptrs);
991 memset(pages, 0, sizeof(struct page *) * nrptrs); 1000 memset(pages, 0, sizeof(struct page *) * nrptrs);
@@ -1015,8 +1024,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1015 1024
1016 copied = btrfs_copy_from_user(pos, num_pages, 1025 copied = btrfs_copy_from_user(pos, num_pages,
1017 write_bytes, pages, &i); 1026 write_bytes, pages, &i);
1018 dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> 1027 dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
1019 PAGE_CACHE_SHIFT; 1028 PAGE_CACHE_SHIFT;
1020 1029
1021 if (num_pages > dirty_pages) { 1030 if (num_pages > dirty_pages) {
1022 if (copied > 0) 1031 if (copied > 0)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 60d684266959..a0390657451b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -987,11 +987,18 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
987 return entry; 987 return entry;
988} 988}
989 989
990static void unlink_free_space(struct btrfs_block_group_cache *block_group, 990static inline void
991 struct btrfs_free_space *info) 991__unlink_free_space(struct btrfs_block_group_cache *block_group,
992 struct btrfs_free_space *info)
992{ 993{
993 rb_erase(&info->offset_index, &block_group->free_space_offset); 994 rb_erase(&info->offset_index, &block_group->free_space_offset);
994 block_group->free_extents--; 995 block_group->free_extents--;
996}
997
998static void unlink_free_space(struct btrfs_block_group_cache *block_group,
999 struct btrfs_free_space *info)
1000{
1001 __unlink_free_space(block_group, info);
995 block_group->free_space -= info->bytes; 1002 block_group->free_space -= info->bytes;
996} 1003}
997 1004
@@ -1016,14 +1023,18 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1016 u64 max_bytes; 1023 u64 max_bytes;
1017 u64 bitmap_bytes; 1024 u64 bitmap_bytes;
1018 u64 extent_bytes; 1025 u64 extent_bytes;
1026 u64 size = block_group->key.offset;
1019 1027
1020 /* 1028 /*
1021 * The goal is to keep the total amount of memory used per 1gb of space 1029 * The goal is to keep the total amount of memory used per 1gb of space
1022 * at or below 32k, so we need to adjust how much memory we allow to be 1030 * at or below 32k, so we need to adjust how much memory we allow to be
1023 * used by extent based free space tracking 1031 * used by extent based free space tracking
1024 */ 1032 */
1025 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1033 if (size < 1024 * 1024 * 1024)
1026 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); 1034 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1035 else
1036 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1037 div64_u64(size, 1024 * 1024 * 1024);
1027 1038
1028 /* 1039 /*
1029 * we want to account for 1 more bitmap than what we have so we can make 1040 * we want to account for 1 more bitmap than what we have so we can make
@@ -1171,6 +1182,16 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
1171 recalculate_thresholds(block_group); 1182 recalculate_thresholds(block_group);
1172} 1183}
1173 1184
1185static void free_bitmap(struct btrfs_block_group_cache *block_group,
1186 struct btrfs_free_space *bitmap_info)
1187{
1188 unlink_free_space(block_group, bitmap_info);
1189 kfree(bitmap_info->bitmap);
1190 kfree(bitmap_info);
1191 block_group->total_bitmaps--;
1192 recalculate_thresholds(block_group);
1193}
1194
1174static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 1195static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
1175 struct btrfs_free_space *bitmap_info, 1196 struct btrfs_free_space *bitmap_info,
1176 u64 *offset, u64 *bytes) 1197 u64 *offset, u64 *bytes)
@@ -1195,6 +1216,7 @@ again:
1195 */ 1216 */
1196 search_start = *offset; 1217 search_start = *offset;
1197 search_bytes = *bytes; 1218 search_bytes = *bytes;
1219 search_bytes = min(search_bytes, end - search_start + 1);
1198 ret = search_bitmap(block_group, bitmap_info, &search_start, 1220 ret = search_bitmap(block_group, bitmap_info, &search_start,
1199 &search_bytes); 1221 &search_bytes);
1200 BUG_ON(ret < 0 || search_start != *offset); 1222 BUG_ON(ret < 0 || search_start != *offset);
@@ -1211,13 +1233,8 @@ again:
1211 1233
1212 if (*bytes) { 1234 if (*bytes) {
1213 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1235 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1214 if (!bitmap_info->bytes) { 1236 if (!bitmap_info->bytes)
1215 unlink_free_space(block_group, bitmap_info); 1237 free_bitmap(block_group, bitmap_info);
1216 kfree(bitmap_info->bitmap);
1217 kfree(bitmap_info);
1218 block_group->total_bitmaps--;
1219 recalculate_thresholds(block_group);
1220 }
1221 1238
1222 /* 1239 /*
1223 * no entry after this bitmap, but we still have bytes to 1240 * no entry after this bitmap, but we still have bytes to
@@ -1250,13 +1267,8 @@ again:
1250 return -EAGAIN; 1267 return -EAGAIN;
1251 1268
1252 goto again; 1269 goto again;
1253 } else if (!bitmap_info->bytes) { 1270 } else if (!bitmap_info->bytes)
1254 unlink_free_space(block_group, bitmap_info); 1271 free_bitmap(block_group, bitmap_info);
1255 kfree(bitmap_info->bitmap);
1256 kfree(bitmap_info);
1257 block_group->total_bitmaps--;
1258 recalculate_thresholds(block_group);
1259 }
1260 1272
1261 return 0; 1273 return 0;
1262} 1274}
@@ -1359,22 +1371,14 @@ out:
1359 return ret; 1371 return ret;
1360} 1372}
1361 1373
1362int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 1374bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1363 u64 offset, u64 bytes) 1375 struct btrfs_free_space *info, bool update_stat)
1364{ 1376{
1365 struct btrfs_free_space *right_info = NULL; 1377 struct btrfs_free_space *left_info;
1366 struct btrfs_free_space *left_info = NULL; 1378 struct btrfs_free_space *right_info;
1367 struct btrfs_free_space *info = NULL; 1379 bool merged = false;
1368 int ret = 0; 1380 u64 offset = info->offset;
1369 1381 u64 bytes = info->bytes;
1370 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
1371 if (!info)
1372 return -ENOMEM;
1373
1374 info->offset = offset;
1375 info->bytes = bytes;
1376
1377 spin_lock(&block_group->tree_lock);
1378 1382
1379 /* 1383 /*
1380 * first we want to see if there is free space adjacent to the range we 1384 * first we want to see if there is free space adjacent to the range we
@@ -1388,37 +1392,62 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1388 else 1392 else
1389 left_info = tree_search_offset(block_group, offset - 1, 0, 0); 1393 left_info = tree_search_offset(block_group, offset - 1, 0, 0);
1390 1394
1391 /*
1392 * If there was no extent directly to the left or right of this new
1393 * extent then we know we're going to have to allocate a new extent, so
1394 * before we do that see if we need to drop this into a bitmap
1395 */
1396 if ((!left_info || left_info->bitmap) &&
1397 (!right_info || right_info->bitmap)) {
1398 ret = insert_into_bitmap(block_group, info);
1399
1400 if (ret < 0) {
1401 goto out;
1402 } else if (ret) {
1403 ret = 0;
1404 goto out;
1405 }
1406 }
1407
1408 if (right_info && !right_info->bitmap) { 1395 if (right_info && !right_info->bitmap) {
1409 unlink_free_space(block_group, right_info); 1396 if (update_stat)
1397 unlink_free_space(block_group, right_info);
1398 else
1399 __unlink_free_space(block_group, right_info);
1410 info->bytes += right_info->bytes; 1400 info->bytes += right_info->bytes;
1411 kfree(right_info); 1401 kfree(right_info);
1402 merged = true;
1412 } 1403 }
1413 1404
1414 if (left_info && !left_info->bitmap && 1405 if (left_info && !left_info->bitmap &&
1415 left_info->offset + left_info->bytes == offset) { 1406 left_info->offset + left_info->bytes == offset) {
1416 unlink_free_space(block_group, left_info); 1407 if (update_stat)
1408 unlink_free_space(block_group, left_info);
1409 else
1410 __unlink_free_space(block_group, left_info);
1417 info->offset = left_info->offset; 1411 info->offset = left_info->offset;
1418 info->bytes += left_info->bytes; 1412 info->bytes += left_info->bytes;
1419 kfree(left_info); 1413 kfree(left_info);
1414 merged = true;
1420 } 1415 }
1421 1416
1417 return merged;
1418}
1419
1420int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1421 u64 offset, u64 bytes)
1422{
1423 struct btrfs_free_space *info;
1424 int ret = 0;
1425
1426 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
1427 if (!info)
1428 return -ENOMEM;
1429
1430 info->offset = offset;
1431 info->bytes = bytes;
1432
1433 spin_lock(&block_group->tree_lock);
1434
1435 if (try_merge_free_space(block_group, info, true))
1436 goto link;
1437
1438 /*
1439 * There was no extent directly to the left or right of this new
1440 * extent then we know we're going to have to allocate a new extent, so
1441 * before we do that see if we need to drop this into a bitmap
1442 */
1443 ret = insert_into_bitmap(block_group, info);
1444 if (ret < 0) {
1445 goto out;
1446 } else if (ret) {
1447 ret = 0;
1448 goto out;
1449 }
1450link:
1422 ret = link_free_space(block_group, info); 1451 ret = link_free_space(block_group, info);
1423 if (ret) 1452 if (ret)
1424 kfree(info); 1453 kfree(info);
@@ -1621,6 +1650,7 @@ __btrfs_return_cluster_to_free_space(
1621 node = rb_next(&entry->offset_index); 1650 node = rb_next(&entry->offset_index);
1622 rb_erase(&entry->offset_index, &cluster->root); 1651 rb_erase(&entry->offset_index, &cluster->root);
1623 BUG_ON(entry->bitmap); 1652 BUG_ON(entry->bitmap);
1653 try_merge_free_space(block_group, entry, false);
1624 tree_insert_offset(&block_group->free_space_offset, 1654 tree_insert_offset(&block_group->free_space_offset,
1625 entry->offset, &entry->offset_index, 0); 1655 entry->offset, &entry->offset_index, 0);
1626 } 1656 }
@@ -1685,13 +1715,8 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1685 ret = offset; 1715 ret = offset;
1686 if (entry->bitmap) { 1716 if (entry->bitmap) {
1687 bitmap_clear_bits(block_group, entry, offset, bytes); 1717 bitmap_clear_bits(block_group, entry, offset, bytes);
1688 if (!entry->bytes) { 1718 if (!entry->bytes)
1689 unlink_free_space(block_group, entry); 1719 free_bitmap(block_group, entry);
1690 kfree(entry->bitmap);
1691 kfree(entry);
1692 block_group->total_bitmaps--;
1693 recalculate_thresholds(block_group);
1694 }
1695 } else { 1720 } else {
1696 unlink_free_space(block_group, entry); 1721 unlink_free_space(block_group, entry);
1697 entry->offset += bytes; 1722 entry->offset += bytes;
@@ -1789,6 +1814,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1789 1814
1790 ret = search_start; 1815 ret = search_start;
1791 bitmap_clear_bits(block_group, entry, ret, bytes); 1816 bitmap_clear_bits(block_group, entry, ret, bytes);
1817 if (entry->bytes == 0)
1818 free_bitmap(block_group, entry);
1792out: 1819out:
1793 spin_unlock(&cluster->lock); 1820 spin_unlock(&cluster->lock);
1794 spin_unlock(&block_group->tree_lock); 1821 spin_unlock(&block_group->tree_lock);
@@ -1842,15 +1869,26 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1842 entry->offset += bytes; 1869 entry->offset += bytes;
1843 entry->bytes -= bytes; 1870 entry->bytes -= bytes;
1844 1871
1845 if (entry->bytes == 0) { 1872 if (entry->bytes == 0)
1846 rb_erase(&entry->offset_index, &cluster->root); 1873 rb_erase(&entry->offset_index, &cluster->root);
1847 kfree(entry);
1848 }
1849 break; 1874 break;
1850 } 1875 }
1851out: 1876out:
1852 spin_unlock(&cluster->lock); 1877 spin_unlock(&cluster->lock);
1853 1878
1879 if (!ret)
1880 return 0;
1881
1882 spin_lock(&block_group->tree_lock);
1883
1884 block_group->free_space -= bytes;
1885 if (entry->bytes == 0) {
1886 block_group->free_extents--;
1887 kfree(entry);
1888 }
1889
1890 spin_unlock(&block_group->tree_lock);
1891
1854 return ret; 1892 return ret;
1855} 1893}
1856 1894
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 160b55b3e132..0efdb65953c5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -416,7 +416,7 @@ again:
416 } 416 }
417 if (start == 0) { 417 if (start == 0) {
418 trans = btrfs_join_transaction(root, 1); 418 trans = btrfs_join_transaction(root, 1);
419 BUG_ON(!trans); 419 BUG_ON(IS_ERR(trans));
420 btrfs_set_trans_block_group(trans, inode); 420 btrfs_set_trans_block_group(trans, inode);
421 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 421 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
422 422
@@ -612,6 +612,7 @@ retry:
612 GFP_NOFS); 612 GFP_NOFS);
613 613
614 trans = btrfs_join_transaction(root, 1); 614 trans = btrfs_join_transaction(root, 1);
615 BUG_ON(IS_ERR(trans));
615 ret = btrfs_reserve_extent(trans, root, 616 ret = btrfs_reserve_extent(trans, root,
616 async_extent->compressed_size, 617 async_extent->compressed_size,
617 async_extent->compressed_size, 618 async_extent->compressed_size,
@@ -643,6 +644,7 @@ retry:
643 async_extent->ram_size - 1, 0); 644 async_extent->ram_size - 1, 0);
644 645
645 em = alloc_extent_map(GFP_NOFS); 646 em = alloc_extent_map(GFP_NOFS);
647 BUG_ON(!em);
646 em->start = async_extent->start; 648 em->start = async_extent->start;
647 em->len = async_extent->ram_size; 649 em->len = async_extent->ram_size;
648 em->orig_start = em->start; 650 em->orig_start = em->start;
@@ -771,7 +773,7 @@ static noinline int cow_file_range(struct inode *inode,
771 773
772 BUG_ON(root == root->fs_info->tree_root); 774 BUG_ON(root == root->fs_info->tree_root);
773 trans = btrfs_join_transaction(root, 1); 775 trans = btrfs_join_transaction(root, 1);
774 BUG_ON(!trans); 776 BUG_ON(IS_ERR(trans));
775 btrfs_set_trans_block_group(trans, inode); 777 btrfs_set_trans_block_group(trans, inode);
776 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 778 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
777 779
@@ -819,6 +821,7 @@ static noinline int cow_file_range(struct inode *inode,
819 BUG_ON(ret); 821 BUG_ON(ret);
820 822
821 em = alloc_extent_map(GFP_NOFS); 823 em = alloc_extent_map(GFP_NOFS);
824 BUG_ON(!em);
822 em->start = start; 825 em->start = start;
823 em->orig_start = em->start; 826 em->orig_start = em->start;
824 ram_size = ins.offset; 827 ram_size = ins.offset;
@@ -1049,7 +1052,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1049 } else { 1052 } else {
1050 trans = btrfs_join_transaction(root, 1); 1053 trans = btrfs_join_transaction(root, 1);
1051 } 1054 }
1052 BUG_ON(!trans); 1055 BUG_ON(IS_ERR(trans));
1053 1056
1054 cow_start = (u64)-1; 1057 cow_start = (u64)-1;
1055 cur_offset = start; 1058 cur_offset = start;
@@ -1168,6 +1171,7 @@ out_check:
1168 struct extent_map_tree *em_tree; 1171 struct extent_map_tree *em_tree;
1169 em_tree = &BTRFS_I(inode)->extent_tree; 1172 em_tree = &BTRFS_I(inode)->extent_tree;
1170 em = alloc_extent_map(GFP_NOFS); 1173 em = alloc_extent_map(GFP_NOFS);
1174 BUG_ON(!em);
1171 em->start = cur_offset; 1175 em->start = cur_offset;
1172 em->orig_start = em->start; 1176 em->orig_start = em->start;
1173 em->len = num_bytes; 1177 em->len = num_bytes;
@@ -1557,6 +1561,7 @@ out:
1557out_page: 1561out_page:
1558 unlock_page(page); 1562 unlock_page(page);
1559 page_cache_release(page); 1563 page_cache_release(page);
1564 kfree(fixup);
1560} 1565}
1561 1566
1562/* 1567/*
@@ -1703,7 +1708,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1703 trans = btrfs_join_transaction_nolock(root, 1); 1708 trans = btrfs_join_transaction_nolock(root, 1);
1704 else 1709 else
1705 trans = btrfs_join_transaction(root, 1); 1710 trans = btrfs_join_transaction(root, 1);
1706 BUG_ON(!trans); 1711 BUG_ON(IS_ERR(trans));
1707 btrfs_set_trans_block_group(trans, inode); 1712 btrfs_set_trans_block_group(trans, inode);
1708 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1713 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1709 ret = btrfs_update_inode(trans, root, inode); 1714 ret = btrfs_update_inode(trans, root, inode);
@@ -1720,6 +1725,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1720 trans = btrfs_join_transaction_nolock(root, 1); 1725 trans = btrfs_join_transaction_nolock(root, 1);
1721 else 1726 else
1722 trans = btrfs_join_transaction(root, 1); 1727 trans = btrfs_join_transaction(root, 1);
1728 BUG_ON(IS_ERR(trans));
1723 btrfs_set_trans_block_group(trans, inode); 1729 btrfs_set_trans_block_group(trans, inode);
1724 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1730 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1725 1731
@@ -1907,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
1907 1913
1908 private = 0; 1914 private = 0;
1909 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, 1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1910 (u64)-1, 1, EXTENT_DIRTY)) { 1916 (u64)-1, 1, EXTENT_DIRTY, 0)) {
1911 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, 1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
1912 start, &private_failure); 1918 start, &private_failure);
1913 if (ret == 0) { 1919 if (ret == 0) {
@@ -2354,6 +2360,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2354 */ 2360 */
2355 if (is_bad_inode(inode)) { 2361 if (is_bad_inode(inode)) {
2356 trans = btrfs_start_transaction(root, 0); 2362 trans = btrfs_start_transaction(root, 0);
2363 BUG_ON(IS_ERR(trans));
2357 btrfs_orphan_del(trans, inode); 2364 btrfs_orphan_del(trans, inode);
2358 btrfs_end_transaction(trans, root); 2365 btrfs_end_transaction(trans, root);
2359 iput(inode); 2366 iput(inode);
@@ -2381,6 +2388,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2381 2388
2382 if (root->orphan_block_rsv || root->orphan_item_inserted) { 2389 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2383 trans = btrfs_join_transaction(root, 1); 2390 trans = btrfs_join_transaction(root, 1);
2391 BUG_ON(IS_ERR(trans));
2384 btrfs_end_transaction(trans, root); 2392 btrfs_end_transaction(trans, root);
2385 } 2393 }
2386 2394
@@ -2641,7 +2649,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2641 path = btrfs_alloc_path(); 2649 path = btrfs_alloc_path();
2642 if (!path) { 2650 if (!path) {
2643 ret = -ENOMEM; 2651 ret = -ENOMEM;
2644 goto err; 2652 goto out;
2645 } 2653 }
2646 2654
2647 path->leave_spinning = 1; 2655 path->leave_spinning = 1;
@@ -2714,9 +2722,10 @@ static int check_path_shared(struct btrfs_root *root,
2714 struct extent_buffer *eb; 2722 struct extent_buffer *eb;
2715 int level; 2723 int level;
2716 u64 refs = 1; 2724 u64 refs = 1;
2717 int uninitialized_var(ret);
2718 2725
2719 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2726 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2727 int ret;
2728
2720 if (!path->nodes[level]) 2729 if (!path->nodes[level])
2721 break; 2730 break;
2722 eb = path->nodes[level]; 2731 eb = path->nodes[level];
@@ -2727,7 +2736,7 @@ static int check_path_shared(struct btrfs_root *root,
2727 if (refs > 1) 2736 if (refs > 1)
2728 return 1; 2737 return 1;
2729 } 2738 }
2730 return ret; /* XXX callers? */ 2739 return 0;
2731} 2740}
2732 2741
2733/* 2742/*
@@ -4134,7 +4143,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4134 } 4143 }
4135 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 4144 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
4136 4145
4137 if (root != sub_root) { 4146 if (!IS_ERR(inode) && root != sub_root) {
4138 down_read(&root->fs_info->cleanup_work_sem); 4147 down_read(&root->fs_info->cleanup_work_sem);
4139 if (!(inode->i_sb->s_flags & MS_RDONLY)) 4148 if (!(inode->i_sb->s_flags & MS_RDONLY))
4140 btrfs_orphan_cleanup(sub_root); 4149 btrfs_orphan_cleanup(sub_root);
@@ -4347,6 +4356,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4347 trans = btrfs_join_transaction_nolock(root, 1); 4356 trans = btrfs_join_transaction_nolock(root, 1);
4348 else 4357 else
4349 trans = btrfs_join_transaction(root, 1); 4358 trans = btrfs_join_transaction(root, 1);
4359 if (IS_ERR(trans))
4360 return PTR_ERR(trans);
4350 btrfs_set_trans_block_group(trans, inode); 4361 btrfs_set_trans_block_group(trans, inode);
4351 if (nolock) 4362 if (nolock)
4352 ret = btrfs_end_transaction_nolock(trans, root); 4363 ret = btrfs_end_transaction_nolock(trans, root);
@@ -4372,6 +4383,7 @@ void btrfs_dirty_inode(struct inode *inode)
4372 return; 4383 return;
4373 4384
4374 trans = btrfs_join_transaction(root, 1); 4385 trans = btrfs_join_transaction(root, 1);
4386 BUG_ON(IS_ERR(trans));
4375 btrfs_set_trans_block_group(trans, inode); 4387 btrfs_set_trans_block_group(trans, inode);
4376 4388
4377 ret = btrfs_update_inode(trans, root, inode); 4389 ret = btrfs_update_inode(trans, root, inode);
@@ -5176,6 +5188,8 @@ again:
5176 em = NULL; 5188 em = NULL;
5177 btrfs_release_path(root, path); 5189 btrfs_release_path(root, path);
5178 trans = btrfs_join_transaction(root, 1); 5190 trans = btrfs_join_transaction(root, 1);
5191 if (IS_ERR(trans))
5192 return ERR_CAST(trans);
5179 goto again; 5193 goto again;
5180 } 5194 }
5181 map = kmap(page); 5195 map = kmap(page);
@@ -5266,6 +5280,128 @@ out:
5266 return em; 5280 return em;
5267} 5281}
5268 5282
5283struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
5284 size_t pg_offset, u64 start, u64 len,
5285 int create)
5286{
5287 struct extent_map *em;
5288 struct extent_map *hole_em = NULL;
5289 u64 range_start = start;
5290 u64 end;
5291 u64 found;
5292 u64 found_end;
5293 int err = 0;
5294
5295 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
5296 if (IS_ERR(em))
5297 return em;
5298 if (em) {
5299 /*
5300 * if our em maps to a hole, there might
5301 * actually be delalloc bytes behind it
5302 */
5303 if (em->block_start != EXTENT_MAP_HOLE)
5304 return em;
5305 else
5306 hole_em = em;
5307 }
5308
5309 /* check to see if we've wrapped (len == -1 or similar) */
5310 end = start + len;
5311 if (end < start)
5312 end = (u64)-1;
5313 else
5314 end -= 1;
5315
5316 em = NULL;
5317
5318 /* ok, we didn't find anything, lets look for delalloc */
5319 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
5320 end, len, EXTENT_DELALLOC, 1);
5321 found_end = range_start + found;
5322 if (found_end < range_start)
5323 found_end = (u64)-1;
5324
5325 /*
5326 * we didn't find anything useful, return
5327 * the original results from get_extent()
5328 */
5329 if (range_start > end || found_end <= start) {
5330 em = hole_em;
5331 hole_em = NULL;
5332 goto out;
5333 }
5334
5335 /* adjust the range_start to make sure it doesn't
5336 * go backwards from the start they passed in
5337 */
5338 range_start = max(start,range_start);
5339 found = found_end - range_start;
5340
5341 if (found > 0) {
5342 u64 hole_start = start;
5343 u64 hole_len = len;
5344
5345 em = alloc_extent_map(GFP_NOFS);
5346 if (!em) {
5347 err = -ENOMEM;
5348 goto out;
5349 }
5350 /*
5351 * when btrfs_get_extent can't find anything it
5352 * returns one huge hole
5353 *
5354 * make sure what it found really fits our range, and
5355 * adjust to make sure it is based on the start from
5356 * the caller
5357 */
5358 if (hole_em) {
5359 u64 calc_end = extent_map_end(hole_em);
5360
5361 if (calc_end <= start || (hole_em->start > end)) {
5362 free_extent_map(hole_em);
5363 hole_em = NULL;
5364 } else {
5365 hole_start = max(hole_em->start, start);
5366 hole_len = calc_end - hole_start;
5367 }
5368 }
5369 em->bdev = NULL;
5370 if (hole_em && range_start > hole_start) {
5371 /* our hole starts before our delalloc, so we
5372 * have to return just the parts of the hole
5373 * that go until the delalloc starts
5374 */
5375 em->len = min(hole_len,
5376 range_start - hole_start);
5377 em->start = hole_start;
5378 em->orig_start = hole_start;
5379 /*
5380 * don't adjust block start at all,
5381 * it is fixed at EXTENT_MAP_HOLE
5382 */
5383 em->block_start = hole_em->block_start;
5384 em->block_len = hole_len;
5385 } else {
5386 em->start = range_start;
5387 em->len = found;
5388 em->orig_start = range_start;
5389 em->block_start = EXTENT_MAP_DELALLOC;
5390 em->block_len = found;
5391 }
5392 } else if (hole_em) {
5393 return hole_em;
5394 }
5395out:
5396
5397 free_extent_map(hole_em);
5398 if (err) {
5399 free_extent_map(em);
5400 return ERR_PTR(err);
5401 }
5402 return em;
5403}
5404
5269static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5405static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5270 u64 start, u64 len) 5406 u64 start, u64 len)
5271{ 5407{
@@ -5280,8 +5416,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5280 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5416 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5281 5417
5282 trans = btrfs_join_transaction(root, 0); 5418 trans = btrfs_join_transaction(root, 0);
5283 if (!trans) 5419 if (IS_ERR(trans))
5284 return ERR_PTR(-ENOMEM); 5420 return ERR_CAST(trans);
5285 5421
5286 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 5422 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5287 5423
@@ -5505,7 +5641,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5505 * while we look for nocow cross refs 5641 * while we look for nocow cross refs
5506 */ 5642 */
5507 trans = btrfs_join_transaction(root, 0); 5643 trans = btrfs_join_transaction(root, 0);
5508 if (!trans) 5644 if (IS_ERR(trans))
5509 goto must_cow; 5645 goto must_cow;
5510 5646
5511 if (can_nocow_odirect(trans, inode, start, len) == 1) { 5647 if (can_nocow_odirect(trans, inode, start, len) == 1) {
@@ -5640,7 +5776,7 @@ again:
5640 BUG_ON(!ordered); 5776 BUG_ON(!ordered);
5641 5777
5642 trans = btrfs_join_transaction(root, 1); 5778 trans = btrfs_join_transaction(root, 1);
5643 if (!trans) { 5779 if (IS_ERR(trans)) {
5644 err = -ENOMEM; 5780 err = -ENOMEM;
5645 goto out; 5781 goto out;
5646 } 5782 }
@@ -6088,7 +6224,7 @@ out:
6088static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6224static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
6089 __u64 start, __u64 len) 6225 __u64 start, __u64 len)
6090{ 6226{
6091 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); 6227 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
6092} 6228}
6093 6229
6094int btrfs_readpage(struct file *file, struct page *page) 6230int btrfs_readpage(struct file *file, struct page *page)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a506a22b522a..5fdb2abc4fa7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -203,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
203 203
204 204
205 trans = btrfs_join_transaction(root, 1); 205 trans = btrfs_join_transaction(root, 1);
206 BUG_ON(!trans); 206 BUG_ON(IS_ERR(trans));
207 207
208 ret = btrfs_update_inode(trans, root, inode); 208 ret = btrfs_update_inode(trans, root, inode);
209 BUG_ON(ret); 209 BUG_ON(ret);
@@ -907,6 +907,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
907 907
908 if (new_size > old_size) { 908 if (new_size > old_size) {
909 trans = btrfs_start_transaction(root, 0); 909 trans = btrfs_start_transaction(root, 0);
910 if (IS_ERR(trans)) {
911 ret = PTR_ERR(trans);
912 goto out_unlock;
913 }
910 ret = btrfs_grow_device(trans, device, new_size); 914 ret = btrfs_grow_device(trans, device, new_size);
911 btrfs_commit_transaction(trans, root); 915 btrfs_commit_transaction(trans, root);
912 } else { 916 } else {
@@ -1067,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1067 if (copy_from_user(&flags, arg, sizeof(flags))) 1071 if (copy_from_user(&flags, arg, sizeof(flags)))
1068 return -EFAULT; 1072 return -EFAULT;
1069 1073
1070 if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) 1074 if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
1071 return -EINVAL; 1075 return -EINVAL;
1072 1076
1073 if (flags & ~BTRFS_SUBVOL_RDONLY) 1077 if (flags & ~BTRFS_SUBVOL_RDONLY)
1074 return -EOPNOTSUPP; 1078 return -EOPNOTSUPP;
1075 1079
1080 if (!is_owner_or_cap(inode))
1081 return -EACCES;
1082
1076 down_write(&root->fs_info->subvol_sem); 1083 down_write(&root->fs_info->subvol_sem);
1077 1084
1078 /* nothing to do */ 1085 /* nothing to do */
@@ -1093,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1093 goto out_reset; 1100 goto out_reset;
1094 } 1101 }
1095 1102
1096 ret = btrfs_update_root(trans, root, 1103 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1097 &root->root_key, &root->root_item); 1104 &root->root_key, &root->root_item);
1098 1105
1099 btrfs_commit_transaction(trans, root); 1106 btrfs_commit_transaction(trans, root);
@@ -1898,7 +1905,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1898 1905
1899 memcpy(&new_key, &key, sizeof(new_key)); 1906 memcpy(&new_key, &key, sizeof(new_key));
1900 new_key.objectid = inode->i_ino; 1907 new_key.objectid = inode->i_ino;
1901 new_key.offset = key.offset + destoff - off; 1908 if (off <= key.offset)
1909 new_key.offset = key.offset + destoff - off;
1910 else
1911 new_key.offset = destoff;
1902 1912
1903 trans = btrfs_start_transaction(root, 1); 1913 trans = btrfs_start_transaction(root, 1);
1904 if (IS_ERR(trans)) { 1914 if (IS_ERR(trans)) {
@@ -2082,7 +2092,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2082 2092
2083 ret = -ENOMEM; 2093 ret = -ENOMEM;
2084 trans = btrfs_start_ioctl_transaction(root, 0); 2094 trans = btrfs_start_ioctl_transaction(root, 0);
2085 if (!trans) 2095 if (IS_ERR(trans))
2086 goto out_drop; 2096 goto out_drop;
2087 2097
2088 file->private_data = trans; 2098 file->private_data = trans;
@@ -2138,9 +2148,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
2138 path->leave_spinning = 1; 2148 path->leave_spinning = 1;
2139 2149
2140 trans = btrfs_start_transaction(root, 1); 2150 trans = btrfs_start_transaction(root, 1);
2141 if (!trans) { 2151 if (IS_ERR(trans)) {
2142 btrfs_free_path(path); 2152 btrfs_free_path(path);
2143 return -ENOMEM; 2153 return PTR_ERR(trans);
2144 } 2154 }
2145 2155
2146 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); 2156 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
@@ -2201,7 +2211,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2201 int num_types = 4; 2211 int num_types = 4;
2202 int alloc_size; 2212 int alloc_size;
2203 int ret = 0; 2213 int ret = 0;
2204 int slot_count = 0; 2214 u64 slot_count = 0;
2205 int i, c; 2215 int i, c;
2206 2216
2207 if (copy_from_user(&space_args, 2217 if (copy_from_user(&space_args,
@@ -2240,7 +2250,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2240 goto out; 2250 goto out;
2241 } 2251 }
2242 2252
2243 slot_count = min_t(int, space_args.space_slots, slot_count); 2253 slot_count = min_t(u64, space_args.space_slots, slot_count);
2244 2254
2245 alloc_size = sizeof(*dest) * slot_count; 2255 alloc_size = sizeof(*dest) * slot_count;
2246 2256
@@ -2260,6 +2270,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2260 for (i = 0; i < num_types; i++) { 2270 for (i = 0; i < num_types; i++) {
2261 struct btrfs_space_info *tmp; 2271 struct btrfs_space_info *tmp;
2262 2272
2273 if (!slot_count)
2274 break;
2275
2263 info = NULL; 2276 info = NULL;
2264 rcu_read_lock(); 2277 rcu_read_lock();
2265 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 2278 list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
@@ -2281,7 +2294,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2281 memcpy(dest, &space, sizeof(space)); 2294 memcpy(dest, &space, sizeof(space));
2282 dest++; 2295 dest++;
2283 space_args.total_spaces++; 2296 space_args.total_spaces++;
2297 slot_count--;
2284 } 2298 }
2299 if (!slot_count)
2300 break;
2285 } 2301 }
2286 up_read(&info->groups_sem); 2302 up_read(&info->groups_sem);
2287 } 2303 }
@@ -2334,6 +2350,8 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
2334 u64 transid; 2350 u64 transid;
2335 2351
2336 trans = btrfs_start_transaction(root, 0); 2352 trans = btrfs_start_transaction(root, 0);
2353 if (IS_ERR(trans))
2354 return PTR_ERR(trans);
2337 transid = trans->transid; 2355 transid = trans->transid;
2338 btrfs_commit_transaction_async(trans, root, 0); 2356 btrfs_commit_transaction_async(trans, root, 0);
2339 2357
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
280 unsigned long tot_out; 280 unsigned long tot_out;
281 unsigned long tot_len; 281 unsigned long tot_len;
282 char *buf; 282 char *buf;
283 bool may_late_unmap, need_unmap;
283 284
284 data_in = kmap(pages_in[0]); 285 data_in = kmap(pages_in[0]);
285 tot_len = read_compress_length(data_in); 286 tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
300 301
301 tot_in += in_len; 302 tot_in += in_len;
302 working_bytes = in_len; 303 working_bytes = in_len;
304 may_late_unmap = need_unmap = false;
303 305
304 /* fast path: avoid using the working buffer */ 306 /* fast path: avoid using the working buffer */
305 if (in_page_bytes_left >= in_len) { 307 if (in_page_bytes_left >= in_len) {
306 buf = data_in + in_offset; 308 buf = data_in + in_offset;
307 bytes = in_len; 309 bytes = in_len;
310 may_late_unmap = true;
308 goto cont; 311 goto cont;
309 } 312 }
310 313
@@ -329,14 +332,17 @@ cont:
329 if (working_bytes == 0 && tot_in >= tot_len) 332 if (working_bytes == 0 && tot_in >= tot_len)
330 break; 333 break;
331 334
332 kunmap(pages_in[page_in_index]); 335 if (page_in_index + 1 >= total_pages_in) {
333 page_in_index++;
334 if (page_in_index >= total_pages_in) {
335 ret = -1; 336 ret = -1;
336 data_in = NULL;
337 goto done; 337 goto done;
338 } 338 }
339 data_in = kmap(pages_in[page_in_index]); 339
340 if (may_late_unmap)
341 need_unmap = true;
342 else
343 kunmap(pages_in[page_in_index]);
344
345 data_in = kmap(pages_in[++page_in_index]);
340 346
341 in_page_bytes_left = PAGE_CACHE_SIZE; 347 in_page_bytes_left = PAGE_CACHE_SIZE;
342 in_offset = 0; 348 in_offset = 0;
@@ -346,6 +352,8 @@ cont:
346 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); 352 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
347 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, 353 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
348 &out_len); 354 &out_len);
355 if (need_unmap)
356 kunmap(pages_in[page_in_index - 1]);
349 if (ret != LZO_E_OK) { 357 if (ret != LZO_E_OK) {
350 printk(KERN_WARNING "btrfs decompress failed\n"); 358 printk(KERN_WARNING "btrfs decompress failed\n");
351 ret = -1; 359 ret = -1;
@@ -363,8 +371,7 @@ cont:
363 break; 371 break;
364 } 372 }
365done: 373done:
366 if (data_in) 374 kunmap(pages_in[page_in_index]);
367 kunmap(pages_in[page_in_index]);
368 return ret; 375 return ret;
369} 376}
370 377
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2b61e1ddcd99..083a55477375 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
141 u64 file_offset) 141 u64 file_offset)
142{ 142{
143 struct rb_root *root = &tree->tree; 143 struct rb_root *root = &tree->tree;
144 struct rb_node *prev; 144 struct rb_node *prev = NULL;
145 struct rb_node *ret; 145 struct rb_node *ret;
146 struct btrfs_ordered_extent *entry; 146 struct btrfs_ordered_extent *entry;
147 147
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 0d126be22b63..fb2605d998e9 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -260,6 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
260#else 260#else
261 BUG(); 261 BUG();
262#endif 262#endif
263 break;
263 case BTRFS_BLOCK_GROUP_ITEM_KEY: 264 case BTRFS_BLOCK_GROUP_ITEM_KEY:
264 bi = btrfs_item_ptr(l, i, 265 bi = btrfs_item_ptr(l, i,
265 struct btrfs_block_group_item); 266 struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 045c9c2b2d7e..31ade5802ae8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
1157 new_node->bytenr = dest->node->start; 1157 new_node->bytenr = dest->node->start;
1158 new_node->level = node->level; 1158 new_node->level = node->level;
1159 new_node->lowest = node->lowest; 1159 new_node->lowest = node->lowest;
1160 new_node->checked = 1;
1160 new_node->root = dest; 1161 new_node->root = dest;
1161 1162
1162 if (!node->lowest) { 1163 if (!node->lowest) {
@@ -2028,6 +2029,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
2028 2029
2029 while (1) { 2030 while (1) {
2030 trans = btrfs_start_transaction(root, 0); 2031 trans = btrfs_start_transaction(root, 0);
2032 BUG_ON(IS_ERR(trans));
2031 trans->block_rsv = rc->block_rsv; 2033 trans->block_rsv = rc->block_rsv;
2032 2034
2033 ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, 2035 ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
@@ -2147,6 +2149,12 @@ again:
2147 } 2149 }
2148 2150
2149 trans = btrfs_join_transaction(rc->extent_root, 1); 2151 trans = btrfs_join_transaction(rc->extent_root, 1);
2152 if (IS_ERR(trans)) {
2153 if (!err)
2154 btrfs_block_rsv_release(rc->extent_root,
2155 rc->block_rsv, num_bytes);
2156 return PTR_ERR(trans);
2157 }
2150 2158
2151 if (!err) { 2159 if (!err) {
2152 if (num_bytes != rc->merging_rsv_size) { 2160 if (num_bytes != rc->merging_rsv_size) {
@@ -3222,6 +3230,7 @@ truncate:
3222 trans = btrfs_join_transaction(root, 0); 3230 trans = btrfs_join_transaction(root, 0);
3223 if (IS_ERR(trans)) { 3231 if (IS_ERR(trans)) {
3224 btrfs_free_path(path); 3232 btrfs_free_path(path);
3233 ret = PTR_ERR(trans);
3225 goto out; 3234 goto out;
3226 } 3235 }
3227 3236
@@ -3628,6 +3637,7 @@ int prepare_to_relocate(struct reloc_control *rc)
3628 set_reloc_control(rc); 3637 set_reloc_control(rc);
3629 3638
3630 trans = btrfs_join_transaction(rc->extent_root, 1); 3639 trans = btrfs_join_transaction(rc->extent_root, 1);
3640 BUG_ON(IS_ERR(trans));
3631 btrfs_commit_transaction(trans, rc->extent_root); 3641 btrfs_commit_transaction(trans, rc->extent_root);
3632 return 0; 3642 return 0;
3633} 3643}
@@ -3644,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3644 u32 item_size; 3654 u32 item_size;
3645 int ret; 3655 int ret;
3646 int err = 0; 3656 int err = 0;
3657 int progress = 0;
3647 3658
3648 path = btrfs_alloc_path(); 3659 path = btrfs_alloc_path();
3649 if (!path) 3660 if (!path)
@@ -3656,8 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3656 } 3667 }
3657 3668
3658 while (1) { 3669 while (1) {
3670 progress++;
3659 trans = btrfs_start_transaction(rc->extent_root, 0); 3671 trans = btrfs_start_transaction(rc->extent_root, 0);
3660 3672 BUG_ON(IS_ERR(trans));
3673restart:
3661 if (update_backref_cache(trans, &rc->backref_cache)) { 3674 if (update_backref_cache(trans, &rc->backref_cache)) {
3662 btrfs_end_transaction(trans, rc->extent_root); 3675 btrfs_end_transaction(trans, rc->extent_root);
3663 continue; 3676 continue;
@@ -3770,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3770 } 3783 }
3771 } 3784 }
3772 } 3785 }
3786 if (trans && progress && err == -ENOSPC) {
3787 ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
3788 rc->block_group->flags);
3789 if (ret == 0) {
3790 err = 0;
3791 progress = 0;
3792 goto restart;
3793 }
3794 }
3773 3795
3774 btrfs_release_path(rc->extent_root, path); 3796 btrfs_release_path(rc->extent_root, path);
3775 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3797 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
@@ -3804,7 +3826,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3804 3826
3805 /* get rid of pinned extents */ 3827 /* get rid of pinned extents */
3806 trans = btrfs_join_transaction(rc->extent_root, 1); 3828 trans = btrfs_join_transaction(rc->extent_root, 1);
3807 btrfs_commit_transaction(trans, rc->extent_root); 3829 if (IS_ERR(trans))
3830 err = PTR_ERR(trans);
3831 else
3832 btrfs_commit_transaction(trans, rc->extent_root);
3808out_free: 3833out_free:
3809 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 3834 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
3810 btrfs_free_path(path); 3835 btrfs_free_path(path);
@@ -4022,6 +4047,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4022 int ret; 4047 int ret;
4023 4048
4024 trans = btrfs_start_transaction(root->fs_info->tree_root, 0); 4049 trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
4050 BUG_ON(IS_ERR(trans));
4025 4051
4026 memset(&root->root_item.drop_progress, 0, 4052 memset(&root->root_item.drop_progress, 0,
4027 sizeof(root->root_item.drop_progress)); 4053 sizeof(root->root_item.drop_progress));
@@ -4125,6 +4151,11 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4125 set_reloc_control(rc); 4151 set_reloc_control(rc);
4126 4152
4127 trans = btrfs_join_transaction(rc->extent_root, 1); 4153 trans = btrfs_join_transaction(rc->extent_root, 1);
4154 if (IS_ERR(trans)) {
4155 unset_reloc_control(rc);
4156 err = PTR_ERR(trans);
4157 goto out_free;
4158 }
4128 4159
4129 rc->merge_reloc_tree = 1; 4160 rc->merge_reloc_tree = 1;
4130 4161
@@ -4154,9 +4185,13 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4154 unset_reloc_control(rc); 4185 unset_reloc_control(rc);
4155 4186
4156 trans = btrfs_join_transaction(rc->extent_root, 1); 4187 trans = btrfs_join_transaction(rc->extent_root, 1);
4157 btrfs_commit_transaction(trans, rc->extent_root); 4188 if (IS_ERR(trans))
4158out: 4189 err = PTR_ERR(trans);
4190 else
4191 btrfs_commit_transaction(trans, rc->extent_root);
4192out_free:
4159 kfree(rc); 4193 kfree(rc);
4194out:
4160 while (!list_empty(&reloc_roots)) { 4195 while (!list_empty(&reloc_roots)) {
4161 reloc_root = list_entry(reloc_roots.next, 4196 reloc_root = list_entry(reloc_roots.next,
4162 struct btrfs_root, root_list); 4197 struct btrfs_root, root_list);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b2130c46fdb5..d39a9895d932 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, 158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
159 Opt_enospc_debug, Opt_err,
159}; 160};
160 161
161static match_table_t tokens = { 162static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
184 {Opt_space_cache, "space_cache"}, 185 {Opt_space_cache, "space_cache"},
185 {Opt_clear_cache, "clear_cache"}, 186 {Opt_clear_cache, "clear_cache"},
186 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 187 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
188 {Opt_enospc_debug, "enospc_debug"},
187 {Opt_err, NULL}, 189 {Opt_err, NULL},
188}; 190};
189 191
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
358 case Opt_user_subvol_rm_allowed: 360 case Opt_user_subvol_rm_allowed:
359 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 361 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
360 break; 362 break;
363 case Opt_enospc_debug:
364 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
365 break;
361 case Opt_err: 366 case Opt_err:
362 printk(KERN_INFO "btrfs: unrecognized mount option " 367 printk(KERN_INFO "btrfs: unrecognized mount option "
363 "'%s'\n", p); 368 "'%s'\n", p);
@@ -383,7 +388,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
383 struct btrfs_fs_devices **fs_devices) 388 struct btrfs_fs_devices **fs_devices)
384{ 389{
385 substring_t args[MAX_OPT_ARGS]; 390 substring_t args[MAX_OPT_ARGS];
386 char *opts, *p; 391 char *opts, *orig, *p;
387 int error = 0; 392 int error = 0;
388 int intarg; 393 int intarg;
389 394
@@ -397,6 +402,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
397 opts = kstrdup(options, GFP_KERNEL); 402 opts = kstrdup(options, GFP_KERNEL);
398 if (!opts) 403 if (!opts)
399 return -ENOMEM; 404 return -ENOMEM;
405 orig = opts;
400 406
401 while ((p = strsep(&opts, ",")) != NULL) { 407 while ((p = strsep(&opts, ",")) != NULL) {
402 int token; 408 int token;
@@ -432,7 +438,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
432 } 438 }
433 439
434 out_free_opts: 440 out_free_opts:
435 kfree(opts); 441 kfree(orig);
436 out: 442 out:
437 /* 443 /*
438 * If no subvolume name is specified we use the default one. Allocate 444 * If no subvolume name is specified we use the default one. Allocate
@@ -623,6 +629,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
623 btrfs_wait_ordered_extents(root, 0, 0); 629 btrfs_wait_ordered_extents(root, 0, 0);
624 630
625 trans = btrfs_start_transaction(root, 0); 631 trans = btrfs_start_transaction(root, 0);
632 if (IS_ERR(trans))
633 return PTR_ERR(trans);
626 ret = btrfs_commit_transaction(trans, root); 634 ret = btrfs_commit_transaction(trans, root);
627 return ret; 635 return ret;
628} 636}
@@ -761,6 +769,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
761 } 769 }
762 770
763 btrfs_close_devices(fs_devices); 771 btrfs_close_devices(fs_devices);
772 kfree(fs_info);
773 kfree(tree_root);
764 } else { 774 } else {
765 char b[BDEVNAME_SIZE]; 775 char b[BDEVNAME_SIZE];
766 776
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index bae5c7b8bbe2..3d73c8d93bbb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1161,6 +1161,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1161 INIT_DELAYED_WORK(&ac->work, do_async_commit); 1161 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1162 ac->root = root; 1162 ac->root = root;
1163 ac->newtrans = btrfs_join_transaction(root, 0); 1163 ac->newtrans = btrfs_join_transaction(root, 0);
1164 if (IS_ERR(ac->newtrans)) {
1165 int err = PTR_ERR(ac->newtrans);
1166 kfree(ac);
1167 return err;
1168 }
1164 1169
1165 /* take transaction reference */ 1170 /* take transaction reference */
1166 mutex_lock(&root->fs_info->trans_mutex); 1171 mutex_lock(&root->fs_info->trans_mutex);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 054744ac5719..a4bbb854dfd2 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -338,6 +338,12 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
338 } 338 }
339 dst_copy = kmalloc(item_size, GFP_NOFS); 339 dst_copy = kmalloc(item_size, GFP_NOFS);
340 src_copy = kmalloc(item_size, GFP_NOFS); 340 src_copy = kmalloc(item_size, GFP_NOFS);
341 if (!dst_copy || !src_copy) {
342 btrfs_release_path(root, path);
343 kfree(dst_copy);
344 kfree(src_copy);
345 return -ENOMEM;
346 }
341 347
342 read_extent_buffer(eb, src_copy, src_ptr, item_size); 348 read_extent_buffer(eb, src_copy, src_ptr, item_size);
343 349
@@ -665,6 +671,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
665 btrfs_dir_item_key_to_cpu(leaf, di, &location); 671 btrfs_dir_item_key_to_cpu(leaf, di, &location);
666 name_len = btrfs_dir_name_len(leaf, di); 672 name_len = btrfs_dir_name_len(leaf, di);
667 name = kmalloc(name_len, GFP_NOFS); 673 name = kmalloc(name_len, GFP_NOFS);
674 if (!name)
675 return -ENOMEM;
676
668 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); 677 read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
669 btrfs_release_path(root, path); 678 btrfs_release_path(root, path);
670 679
@@ -744,6 +753,9 @@ static noinline int backref_in_log(struct btrfs_root *log,
744 int match = 0; 753 int match = 0;
745 754
746 path = btrfs_alloc_path(); 755 path = btrfs_alloc_path();
756 if (!path)
757 return -ENOMEM;
758
747 ret = btrfs_search_slot(NULL, log, key, path, 0, 0); 759 ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
748 if (ret != 0) 760 if (ret != 0)
749 goto out; 761 goto out;
@@ -967,6 +979,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
967 key.offset = (u64)-1; 979 key.offset = (u64)-1;
968 980
969 path = btrfs_alloc_path(); 981 path = btrfs_alloc_path();
982 if (!path)
983 return -ENOMEM;
970 984
971 while (1) { 985 while (1) {
972 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 986 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -1178,6 +1192,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1178 1192
1179 name_len = btrfs_dir_name_len(eb, di); 1193 name_len = btrfs_dir_name_len(eb, di);
1180 name = kmalloc(name_len, GFP_NOFS); 1194 name = kmalloc(name_len, GFP_NOFS);
1195 if (!name)
1196 return -ENOMEM;
1197
1181 log_type = btrfs_dir_type(eb, di); 1198 log_type = btrfs_dir_type(eb, di);
1182 read_extent_buffer(eb, name, (unsigned long)(di + 1), 1199 read_extent_buffer(eb, name, (unsigned long)(di + 1),
1183 name_len); 1200 name_len);
@@ -1692,6 +1709,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1692 root_owner = btrfs_header_owner(parent); 1709 root_owner = btrfs_header_owner(parent);
1693 1710
1694 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 1711 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
1712 if (!next)
1713 return -ENOMEM;
1695 1714
1696 if (*level == 1) { 1715 if (*level == 1) {
1697 wc->process_func(root, next, wc, ptr_gen); 1716 wc->process_func(root, next, wc, ptr_gen);
@@ -2032,6 +2051,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2032 wait_log_commit(trans, log_root_tree, 2051 wait_log_commit(trans, log_root_tree,
2033 log_root_tree->log_transid); 2052 log_root_tree->log_transid);
2034 mutex_unlock(&log_root_tree->log_mutex); 2053 mutex_unlock(&log_root_tree->log_mutex);
2054 ret = 0;
2035 goto out; 2055 goto out;
2036 } 2056 }
2037 atomic_set(&log_root_tree->log_commit[index2], 1); 2057 atomic_set(&log_root_tree->log_commit[index2], 1);
@@ -2096,7 +2116,7 @@ out:
2096 smp_mb(); 2116 smp_mb();
2097 if (waitqueue_active(&root->log_commit_wait[index1])) 2117 if (waitqueue_active(&root->log_commit_wait[index1]))
2098 wake_up(&root->log_commit_wait[index1]); 2118 wake_up(&root->log_commit_wait[index1]);
2099 return 0; 2119 return ret;
2100} 2120}
2101 2121
2102static void free_log_tree(struct btrfs_trans_handle *trans, 2122static void free_log_tree(struct btrfs_trans_handle *trans,
@@ -2194,6 +2214,9 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2194 2214
2195 log = root->log_root; 2215 log = root->log_root;
2196 path = btrfs_alloc_path(); 2216 path = btrfs_alloc_path();
2217 if (!path)
2218 return -ENOMEM;
2219
2197 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, 2220 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
2198 name, name_len, -1); 2221 name, name_len, -1);
2199 if (IS_ERR(di)) { 2222 if (IS_ERR(di)) {
@@ -2594,6 +2617,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2594 2617
2595 ins_data = kmalloc(nr * sizeof(struct btrfs_key) + 2618 ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
2596 nr * sizeof(u32), GFP_NOFS); 2619 nr * sizeof(u32), GFP_NOFS);
2620 if (!ins_data)
2621 return -ENOMEM;
2622
2597 ins_sizes = (u32 *)ins_data; 2623 ins_sizes = (u32 *)ins_data;
2598 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); 2624 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
2599 2625
@@ -2725,7 +2751,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2725 log = root->log_root; 2751 log = root->log_root;
2726 2752
2727 path = btrfs_alloc_path(); 2753 path = btrfs_alloc_path();
2754 if (!path)
2755 return -ENOMEM;
2728 dst_path = btrfs_alloc_path(); 2756 dst_path = btrfs_alloc_path();
2757 if (!dst_path) {
2758 btrfs_free_path(path);
2759 return -ENOMEM;
2760 }
2729 2761
2730 min_key.objectid = inode->i_ino; 2762 min_key.objectid = inode->i_ino;
2731 min_key.type = BTRFS_INODE_ITEM_KEY; 2763 min_key.type = BTRFS_INODE_ITEM_KEY;
@@ -3080,6 +3112,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
3080 BUG_ON(!path); 3112 BUG_ON(!path);
3081 3113
3082 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3114 trans = btrfs_start_transaction(fs_info->tree_root, 0);
3115 BUG_ON(IS_ERR(trans));
3083 3116
3084 wc.trans = trans; 3117 wc.trans = trans;
3085 wc.pin = 1; 3118 wc.pin = 1;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d158530233b7..dd13eb81ee40 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1213,6 +1213,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
1213 return -ENOMEM; 1213 return -ENOMEM;
1214 1214
1215 trans = btrfs_start_transaction(root, 0); 1215 trans = btrfs_start_transaction(root, 0);
1216 if (IS_ERR(trans)) {
1217 btrfs_free_path(path);
1218 return PTR_ERR(trans);
1219 }
1216 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 1220 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1217 key.type = BTRFS_DEV_ITEM_KEY; 1221 key.type = BTRFS_DEV_ITEM_KEY;
1218 key.offset = device->devid; 1222 key.offset = device->devid;
@@ -1334,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1334 1338
1335 ret = btrfs_shrink_device(device, 0); 1339 ret = btrfs_shrink_device(device, 0);
1336 if (ret) 1340 if (ret)
1337 goto error_brelse; 1341 goto error_undo;
1338 1342
1339 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); 1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1340 if (ret) 1344 if (ret)
1341 goto error_brelse; 1345 goto error_undo;
1342 1346
1343 device->in_fs_metadata = 0; 1347 device->in_fs_metadata = 0;
1344 1348
@@ -1412,6 +1416,13 @@ out:
1412 mutex_unlock(&root->fs_info->volume_mutex); 1416 mutex_unlock(&root->fs_info->volume_mutex);
1413 mutex_unlock(&uuid_mutex); 1417 mutex_unlock(&uuid_mutex);
1414 return ret; 1418 return ret;
1419error_undo:
1420 if (device->writeable) {
1421 list_add(&device->dev_alloc_list,
1422 &root->fs_info->fs_devices->alloc_list);
1423 root->fs_info->fs_devices->rw_devices++;
1424 }
1425 goto error_brelse;
1415} 1426}
1416 1427
1417/* 1428/*
@@ -1601,11 +1612,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1601 1612
1602 ret = find_next_devid(root, &device->devid); 1613 ret = find_next_devid(root, &device->devid);
1603 if (ret) { 1614 if (ret) {
1615 kfree(device->name);
1604 kfree(device); 1616 kfree(device);
1605 goto error; 1617 goto error;
1606 } 1618 }
1607 1619
1608 trans = btrfs_start_transaction(root, 0); 1620 trans = btrfs_start_transaction(root, 0);
1621 if (IS_ERR(trans)) {
1622 kfree(device->name);
1623 kfree(device);
1624 ret = PTR_ERR(trans);
1625 goto error;
1626 }
1627
1609 lock_chunks(root); 1628 lock_chunks(root);
1610 1629
1611 device->writeable = 1; 1630 device->writeable = 1;
@@ -1621,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1621 device->dev_root = root->fs_info->dev_root; 1640 device->dev_root = root->fs_info->dev_root;
1622 device->bdev = bdev; 1641 device->bdev = bdev;
1623 device->in_fs_metadata = 1; 1642 device->in_fs_metadata = 1;
1624 device->mode = 0; 1643 device->mode = FMODE_EXCL;
1625 set_blocksize(device->bdev, 4096); 1644 set_blocksize(device->bdev, 4096);
1626 1645
1627 if (seeding_dev) { 1646 if (seeding_dev) {
@@ -1873,7 +1892,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1873 return ret; 1892 return ret;
1874 1893
1875 trans = btrfs_start_transaction(root, 0); 1894 trans = btrfs_start_transaction(root, 0);
1876 BUG_ON(!trans); 1895 BUG_ON(IS_ERR(trans));
1877 1896
1878 lock_chunks(root); 1897 lock_chunks(root);
1879 1898
@@ -2047,7 +2066,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
2047 BUG_ON(ret); 2066 BUG_ON(ret);
2048 2067
2049 trans = btrfs_start_transaction(dev_root, 0); 2068 trans = btrfs_start_transaction(dev_root, 0);
2050 BUG_ON(!trans); 2069 BUG_ON(IS_ERR(trans));
2051 2070
2052 ret = btrfs_grow_device(trans, device, old_size); 2071 ret = btrfs_grow_device(trans, device, old_size);
2053 BUG_ON(ret); 2072 BUG_ON(ret);
@@ -2213,6 +2232,11 @@ again:
2213 2232
2214 /* Shrinking succeeded, else we would be at "done". */ 2233 /* Shrinking succeeded, else we would be at "done". */
2215 trans = btrfs_start_transaction(root, 0); 2234 trans = btrfs_start_transaction(root, 0);
2235 if (IS_ERR(trans)) {
2236 ret = PTR_ERR(trans);
2237 goto done;
2238 }
2239
2216 lock_chunks(root); 2240 lock_chunks(root);
2217 2241
2218 device->disk_total_bytes = new_size; 2242 device->disk_total_bytes = new_size;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 60d27bc9eb83..6b61ded701e1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1560,9 +1560,10 @@ retry_locked:
1560 /* NOTE: no side-effects allowed, until we take s_mutex */ 1560 /* NOTE: no side-effects allowed, until we take s_mutex */
1561 1561
1562 revoking = cap->implemented & ~cap->issued; 1562 revoking = cap->implemented & ~cap->issued;
1563 if (revoking) 1563 dout(" mds%d cap %p issued %s implemented %s revoking %s\n",
1564 dout(" mds%d revoking %s\n", cap->mds, 1564 cap->mds, cap, ceph_cap_string(cap->issued),
1565 ceph_cap_string(revoking)); 1565 ceph_cap_string(cap->implemented),
1566 ceph_cap_string(revoking));
1566 1567
1567 if (cap == ci->i_auth_cap && 1568 if (cap == ci->i_auth_cap &&
1568 (cap->issued & CEPH_CAP_FILE_WR)) { 1569 (cap->issued & CEPH_CAP_FILE_WR)) {
@@ -1658,6 +1659,8 @@ ack:
1658 1659
1659 if (cap == ci->i_auth_cap && ci->i_dirty_caps) 1660 if (cap == ci->i_auth_cap && ci->i_dirty_caps)
1660 flushing = __mark_caps_flushing(inode, session); 1661 flushing = __mark_caps_flushing(inode, session);
1662 else
1663 flushing = 0;
1661 1664
1662 mds = cap->mds; /* remember mds, so we don't repeat */ 1665 mds = cap->mds; /* remember mds, so we don't repeat */
1663 sent++; 1666 sent++;
@@ -1940,6 +1943,35 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
1940 } 1943 }
1941} 1944}
1942 1945
1946static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1947 struct ceph_mds_session *session,
1948 struct inode *inode)
1949{
1950 struct ceph_inode_info *ci = ceph_inode(inode);
1951 struct ceph_cap *cap;
1952 int delayed = 0;
1953
1954 spin_lock(&inode->i_lock);
1955 cap = ci->i_auth_cap;
1956 dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
1957 ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
1958 __ceph_flush_snaps(ci, &session, 1);
1959 if (ci->i_flushing_caps) {
1960 delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
1961 __ceph_caps_used(ci),
1962 __ceph_caps_wanted(ci),
1963 cap->issued | cap->implemented,
1964 ci->i_flushing_caps, NULL);
1965 if (delayed) {
1966 spin_lock(&inode->i_lock);
1967 __cap_delay_requeue(mdsc, ci);
1968 spin_unlock(&inode->i_lock);
1969 }
1970 } else {
1971 spin_unlock(&inode->i_lock);
1972 }
1973}
1974
1943 1975
1944/* 1976/*
1945 * Take references to capabilities we hold, so that we don't release 1977 * Take references to capabilities we hold, so that we don't release
@@ -2687,7 +2719,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2687 ceph_add_cap(inode, session, cap_id, -1, 2719 ceph_add_cap(inode, session, cap_id, -1,
2688 issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, 2720 issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
2689 NULL /* no caps context */); 2721 NULL /* no caps context */);
2690 try_flush_caps(inode, session, NULL); 2722 kick_flushing_inode_caps(mdsc, session, inode);
2691 up_read(&mdsc->snap_rwsem); 2723 up_read(&mdsc->snap_rwsem);
2692 2724
2693 /* make sure we re-request max_size, if necessary */ 2725 /* make sure we re-request max_size, if necessary */
@@ -2785,8 +2817,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2785 case CEPH_CAP_OP_IMPORT: 2817 case CEPH_CAP_OP_IMPORT:
2786 handle_cap_import(mdsc, inode, h, session, 2818 handle_cap_import(mdsc, inode, h, session,
2787 snaptrace, snaptrace_len); 2819 snaptrace, snaptrace_len);
2788 ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, 2820 ceph_check_caps(ceph_inode(inode), 0, session);
2789 session);
2790 goto done_unlocked; 2821 goto done_unlocked;
2791 } 2822 }
2792 2823
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0bc68de8edd7..099a58615b90 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -409,7 +409,7 @@ more:
409 spin_lock(&inode->i_lock); 409 spin_lock(&inode->i_lock);
410 if (ci->i_release_count == fi->dir_release_count) { 410 if (ci->i_release_count == fi->dir_release_count) {
411 dout(" marking %p complete\n", inode); 411 dout(" marking %p complete\n", inode);
412 ci->i_ceph_flags |= CEPH_I_COMPLETE; 412 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
413 ci->i_max_offset = filp->f_pos; 413 ci->i_max_offset = filp->f_pos;
414 } 414 }
415 spin_unlock(&inode->i_lock); 415 spin_unlock(&inode->i_lock);
@@ -496,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
496 496
497 /* .snap dir? */ 497 /* .snap dir? */
498 if (err == -ENOENT && 498 if (err == -ENOENT &&
499 ceph_snap(parent) == CEPH_NOSNAP &&
499 strcmp(dentry->d_name.name, 500 strcmp(dentry->d_name.name,
500 fsc->mount_options->snapdir_name) == 0) { 501 fsc->mount_options->snapdir_name) == 0) {
501 struct inode *inode = ceph_get_snapdir(parent); 502 struct inode *inode = ceph_get_snapdir(parent);
@@ -1029,28 +1030,8 @@ out_touch:
1029static void ceph_dentry_release(struct dentry *dentry) 1030static void ceph_dentry_release(struct dentry *dentry)
1030{ 1031{
1031 struct ceph_dentry_info *di = ceph_dentry(dentry); 1032 struct ceph_dentry_info *di = ceph_dentry(dentry);
1032 struct inode *parent_inode = NULL;
1033 u64 snapid = CEPH_NOSNAP;
1034 1033
1035 if (!IS_ROOT(dentry)) { 1034 dout("dentry_release %p\n", dentry);
1036 parent_inode = dentry->d_parent->d_inode;
1037 if (parent_inode)
1038 snapid = ceph_snap(parent_inode);
1039 }
1040 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1041 if (parent_inode && snapid != CEPH_SNAPDIR) {
1042 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1043
1044 spin_lock(&parent_inode->i_lock);
1045 if (ci->i_shared_gen == di->lease_shared_gen ||
1046 snapid <= CEPH_MAXSNAP) {
1047 dout(" clearing %p complete (d_release)\n",
1048 parent_inode);
1049 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
1050 ci->i_release_count++;
1051 }
1052 spin_unlock(&parent_inode->i_lock);
1053 }
1054 if (di) { 1035 if (di) {
1055 ceph_dentry_lru_del(dentry); 1036 ceph_dentry_lru_del(dentry);
1056 if (di->lease_session) 1037 if (di->lease_session)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e835eff551e3..193bfa5e9cbd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -707,13 +707,9 @@ static int fill_inode(struct inode *inode,
707 (issued & CEPH_CAP_FILE_EXCL) == 0 && 707 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
709 dout(" marking %p complete (empty)\n", inode); 709 dout(" marking %p complete (empty)\n", inode);
710 ci->i_ceph_flags |= CEPH_I_COMPLETE; 710 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
711 ci->i_max_offset = 2; 711 ci->i_max_offset = 2;
712 } 712 }
713
714 /* it may be better to set st_size in getattr instead? */
715 if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES))
716 inode->i_size = ci->i_rbytes;
717 break; 713 break;
718 default: 714 default:
719 pr_err("fill_inode %llx.%llx BAD mode 0%o\n", 715 pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
@@ -1819,7 +1815,11 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
1819 else 1815 else
1820 stat->dev = 0; 1816 stat->dev = 0;
1821 if (S_ISDIR(inode->i_mode)) { 1817 if (S_ISDIR(inode->i_mode)) {
1822 stat->size = ci->i_rbytes; 1818 if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
1819 RBYTES))
1820 stat->size = ci->i_rbytes;
1821 else
1822 stat->size = ci->i_files + ci->i_subdirs;
1823 stat->blocks = 0; 1823 stat->blocks = 0;
1824 stat->blksize = 65536; 1824 stat->blksize = 65536;
1825 } 1825 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1e30d194a8e3..a1ee8fa3a8e7 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -693,9 +693,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
693 dout("choose_mds %p %llx.%llx " 693 dout("choose_mds %p %llx.%llx "
694 "frag %u mds%d (%d/%d)\n", 694 "frag %u mds%d (%d/%d)\n",
695 inode, ceph_vinop(inode), 695 inode, ceph_vinop(inode),
696 frag.frag, frag.mds, 696 frag.frag, mds,
697 (int)r, frag.ndist); 697 (int)r, frag.ndist);
698 return mds; 698 if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
699 CEPH_MDS_STATE_ACTIVE)
700 return mds;
699 } 701 }
700 702
701 /* since this file/dir wasn't known to be 703 /* since this file/dir wasn't known to be
@@ -708,7 +710,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
708 dout("choose_mds %p %llx.%llx " 710 dout("choose_mds %p %llx.%llx "
709 "frag %u mds%d (auth)\n", 711 "frag %u mds%d (auth)\n",
710 inode, ceph_vinop(inode), frag.frag, mds); 712 inode, ceph_vinop(inode), frag.frag, mds);
711 return mds; 713 if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
714 CEPH_MDS_STATE_ACTIVE)
715 return mds;
712 } 716 }
713 } 717 }
714 } 718 }
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 39c243acd062..f40b9139e437 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -584,10 +584,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
584 if (lastinode) 584 if (lastinode)
585 iput(lastinode); 585 iput(lastinode);
586 586
587 dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino); 587 list_for_each_entry(child, &realm->children, child_item) {
588 list_for_each_entry(child, &realm->children, child_item) 588 dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
589 queue_realm_cap_snaps(child); 589 realm, realm->ino, child, child->ino);
590 list_del_init(&child->dirty_item);
591 list_add(&child->dirty_item, &realm->dirty_item);
592 }
590 593
594 list_del_init(&realm->dirty_item);
591 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); 595 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
592} 596}
593 597
@@ -683,7 +687,9 @@ more:
683 * queue cap snaps _after_ we've built the new snap contexts, 687 * queue cap snaps _after_ we've built the new snap contexts,
684 * so that i_head_snapc can be set appropriately. 688 * so that i_head_snapc can be set appropriately.
685 */ 689 */
686 list_for_each_entry(realm, &dirty_realms, dirty_item) { 690 while (!list_empty(&dirty_realms)) {
691 realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
692 dirty_item);
687 queue_realm_cap_snaps(realm); 693 queue_realm_cap_snaps(realm);
688 } 694 }
689 695
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index bf6f0f34082a..9c5085465a63 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -290,6 +290,8 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
290 290
291 fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; 291 fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
292 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 292 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
293 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
294 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
293 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; 295 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
294 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; 296 fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
295 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 297 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 6e12a6ba5f79..8c9eba6ef9df 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -219,6 +219,7 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
219 struct rb_node **p; 219 struct rb_node **p;
220 struct rb_node *parent = NULL; 220 struct rb_node *parent = NULL;
221 struct ceph_inode_xattr *xattr = NULL; 221 struct ceph_inode_xattr *xattr = NULL;
222 int name_len = strlen(name);
222 int c; 223 int c;
223 224
224 p = &ci->i_xattrs.index.rb_node; 225 p = &ci->i_xattrs.index.rb_node;
@@ -226,6 +227,8 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
226 parent = *p; 227 parent = *p;
227 xattr = rb_entry(parent, struct ceph_inode_xattr, node); 228 xattr = rb_entry(parent, struct ceph_inode_xattr, node);
228 c = strncmp(name, xattr->name, xattr->name_len); 229 c = strncmp(name, xattr->name, xattr->name_len);
230 if (c == 0 && name_len > xattr->name_len)
231 c = 1;
229 if (c < 0) 232 if (c < 0)
230 p = &(*p)->rb_left; 233 p = &(*p)->rb_left;
231 else if (c > 0) 234 else if (c > 0)
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index ee45648b0d1a..7cb0f7f847e4 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -3,6 +3,7 @@ config CIFS
3 depends on INET 3 depends on INET
4 select NLS 4 select NLS
5 select CRYPTO 5 select CRYPTO
6 select CRYPTO_MD4
6 select CRYPTO_MD5 7 select CRYPTO_MD5
7 select CRYPTO_HMAC 8 select CRYPTO_HMAC
8 select CRYPTO_ARC4 9 select CRYPTO_ARC4
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 43b19dd39191..d87558448e3d 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
8 md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ 8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o 9 readdir.o ioctl.o sess.o export.o
10 10
11cifs-$(CONFIG_CIFS_ACL) += cifsacl.o 11cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
diff --git a/fs/cifs/README b/fs/cifs/README
index 46af99ab3614..fe1683590828 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -452,6 +452,11 @@ A partial list of the supported mount options follows:
452 if oplock (caching token) is granted and held. Note that 452 if oplock (caching token) is granted and held. Note that
453 direct allows write operations larger than page size 453 direct allows write operations larger than page size
454 to be sent to the server. 454 to be sent to the server.
455 strictcache Use for switching on strict cache mode. In this mode the
456 client read from the cache all the time it has Oplock Level II,
457 otherwise - read from the server. All written data are stored
458 in the cache, but if the client doesn't have Exclusive Oplock,
459 it writes the data to the server.
455 acl Allow setfacl and getfacl to manage posix ACLs if server 460 acl Allow setfacl and getfacl to manage posix ACLs if server
456 supports them. (default) 461 supports them. (default)
457 noacl Do not allow setfacl and getfacl calls on this mount 462 noacl Do not allow setfacl and getfacl calls on this mount
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 7ed36536e754..0a265ad9e426 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -282,8 +282,6 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
282 cFYI(1, "in %s", __func__); 282 cFYI(1, "in %s", __func__);
283 BUG_ON(IS_ROOT(mntpt)); 283 BUG_ON(IS_ROOT(mntpt));
284 284
285 xid = GetXid();
286
287 /* 285 /*
288 * The MSDFS spec states that paths in DFS referral requests and 286 * The MSDFS spec states that paths in DFS referral requests and
289 * responses must be prefixed by a single '\' character instead of 287 * responses must be prefixed by a single '\' character instead of
@@ -293,20 +291,21 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
293 mnt = ERR_PTR(-ENOMEM); 291 mnt = ERR_PTR(-ENOMEM);
294 full_path = build_path_from_dentry(mntpt); 292 full_path = build_path_from_dentry(mntpt);
295 if (full_path == NULL) 293 if (full_path == NULL)
296 goto free_xid; 294 goto cdda_exit;
297 295
298 cifs_sb = CIFS_SB(mntpt->d_inode->i_sb); 296 cifs_sb = CIFS_SB(mntpt->d_inode->i_sb);
299 tlink = cifs_sb_tlink(cifs_sb); 297 tlink = cifs_sb_tlink(cifs_sb);
300 mnt = ERR_PTR(-EINVAL);
301 if (IS_ERR(tlink)) { 298 if (IS_ERR(tlink)) {
302 mnt = ERR_CAST(tlink); 299 mnt = ERR_CAST(tlink);
303 goto free_full_path; 300 goto free_full_path;
304 } 301 }
305 ses = tlink_tcon(tlink)->ses; 302 ses = tlink_tcon(tlink)->ses;
306 303
304 xid = GetXid();
307 rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, 305 rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls,
308 &num_referrals, &referrals, 306 &num_referrals, &referrals,
309 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 307 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
308 FreeXid(xid);
310 309
311 cifs_put_tlink(tlink); 310 cifs_put_tlink(tlink);
312 311
@@ -339,8 +338,7 @@ success:
339 free_dfs_info_array(referrals, num_referrals); 338 free_dfs_info_array(referrals, num_referrals);
340free_full_path: 339free_full_path:
341 kfree(full_path); 340 kfree(full_path);
342free_xid: 341cdda_exit:
343 FreeXid(xid);
344 cFYI(1, "leaving %s" , __func__); 342 cFYI(1, "leaving %s" , __func__);
345 return mnt; 343 return mnt;
346} 344}
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1e7636b145a8..beeebf194234 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -372,6 +372,10 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
372 372
373 ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), 373 ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
374 GFP_KERNEL); 374 GFP_KERNEL);
375 if (!ppace) {
376 cERROR(1, "DACL memory allocation error");
377 return;
378 }
375 379
376 for (i = 0; i < num_aces; ++i) { 380 for (i = 0; i < num_aces; ++i) {
377 ppace[i] = (struct cifs_ace *) (acl_base + acl_size); 381 ppace[i] = (struct cifs_ace *) (acl_base + acl_size);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 66f3d50d0676..a51585f9852b 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -24,7 +24,6 @@
24#include "cifspdu.h" 24#include "cifspdu.h"
25#include "cifsglob.h" 25#include "cifsglob.h"
26#include "cifs_debug.h" 26#include "cifs_debug.h"
27#include "md5.h"
28#include "cifs_unicode.h" 27#include "cifs_unicode.h"
29#include "cifsproto.h" 28#include "cifsproto.h"
30#include "ntlmssp.h" 29#include "ntlmssp.h"
@@ -37,11 +36,6 @@
37/* Note that the smb header signature field on input contains the 36/* Note that the smb header signature field on input contains the
38 sequence number before this function is called */ 37 sequence number before this function is called */
39 38
40extern void mdfour(unsigned char *out, unsigned char *in, int n);
41extern void E_md4hash(const unsigned char *passwd, unsigned char *p16);
42extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
43 unsigned char *p24);
44
45static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, 39static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
46 struct TCP_Server_Info *server, char *signature) 40 struct TCP_Server_Info *server, char *signature)
47{ 41{
@@ -234,6 +228,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
234/* first calculate 24 bytes ntlm response and then 16 byte session key */ 228/* first calculate 24 bytes ntlm response and then 16 byte session key */
235int setup_ntlm_response(struct cifsSesInfo *ses) 229int setup_ntlm_response(struct cifsSesInfo *ses)
236{ 230{
231 int rc = 0;
237 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE; 232 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
238 char temp_key[CIFS_SESS_KEY_SIZE]; 233 char temp_key[CIFS_SESS_KEY_SIZE];
239 234
@@ -247,13 +242,26 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
247 } 242 }
248 ses->auth_key.len = temp_len; 243 ses->auth_key.len = temp_len;
249 244
250 SMBNTencrypt(ses->password, ses->server->cryptkey, 245 rc = SMBNTencrypt(ses->password, ses->server->cryptkey,
251 ses->auth_key.response + CIFS_SESS_KEY_SIZE); 246 ses->auth_key.response + CIFS_SESS_KEY_SIZE);
247 if (rc) {
248 cFYI(1, "%s Can't generate NTLM response, error: %d",
249 __func__, rc);
250 return rc;
251 }
252 252
253 E_md4hash(ses->password, temp_key); 253 rc = E_md4hash(ses->password, temp_key);
254 mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE); 254 if (rc) {
255 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
256 return rc;
257 }
255 258
256 return 0; 259 rc = mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
260 if (rc)
261 cFYI(1, "%s Can't generate NTLM session key, error: %d",
262 __func__, rc);
263
264 return rc;
257} 265}
258 266
259#ifdef CONFIG_CIFS_WEAK_PW_HASH 267#ifdef CONFIG_CIFS_WEAK_PW_HASH
@@ -649,9 +657,10 @@ calc_seckey(struct cifsSesInfo *ses)
649 get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE); 657 get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
650 658
651 tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); 659 tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
652 if (!tfm_arc4 || IS_ERR(tfm_arc4)) { 660 if (IS_ERR(tfm_arc4)) {
661 rc = PTR_ERR(tfm_arc4);
653 cERROR(1, "could not allocate crypto API arc4\n"); 662 cERROR(1, "could not allocate crypto API arc4\n");
654 return PTR_ERR(tfm_arc4); 663 return rc;
655 } 664 }
656 665
657 desc.tfm = tfm_arc4; 666 desc.tfm = tfm_arc4;
@@ -700,14 +709,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
700 unsigned int size; 709 unsigned int size;
701 710
702 server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); 711 server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
703 if (!server->secmech.hmacmd5 || 712 if (IS_ERR(server->secmech.hmacmd5)) {
704 IS_ERR(server->secmech.hmacmd5)) {
705 cERROR(1, "could not allocate crypto hmacmd5\n"); 713 cERROR(1, "could not allocate crypto hmacmd5\n");
706 return PTR_ERR(server->secmech.hmacmd5); 714 return PTR_ERR(server->secmech.hmacmd5);
707 } 715 }
708 716
709 server->secmech.md5 = crypto_alloc_shash("md5", 0, 0); 717 server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
710 if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) { 718 if (IS_ERR(server->secmech.md5)) {
711 cERROR(1, "could not allocate crypto md5\n"); 719 cERROR(1, "could not allocate crypto md5\n");
712 rc = PTR_ERR(server->secmech.md5); 720 rc = PTR_ERR(server->secmech.md5);
713 goto crypto_allocate_md5_fail; 721 goto crypto_allocate_md5_fail;
diff --git a/fs/cifs/cifsencrypt.h b/fs/cifs/cifsencrypt.h
deleted file mode 100644
index 15d2ec006474..000000000000
--- a/fs/cifs/cifsencrypt.h
+++ /dev/null
@@ -1,33 +0,0 @@
1/*
2 * fs/cifs/cifsencrypt.h
3 *
4 * Copyright (c) International Business Machines Corp., 2005
5 * Author(s): Steve French (sfrench@us.ibm.com)
6 *
7 * Externs for misc. small encryption routines
8 * so we do not have to put them in cifsproto.h
9 *
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25/* md4.c */
26extern void mdfour(unsigned char *out, unsigned char *in, int n);
27/* smbdes.c */
28extern void E_P16(unsigned char *p14, unsigned char *p16);
29extern void E_P24(unsigned char *p21, const unsigned char *c8,
30 unsigned char *p24);
31
32
33
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a8323f1dc1c4..f2970136d17d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -600,10 +600,17 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
600{ 600{
601 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 601 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
602 ssize_t written; 602 ssize_t written;
603 int rc;
603 604
604 written = generic_file_aio_write(iocb, iov, nr_segs, pos); 605 written = generic_file_aio_write(iocb, iov, nr_segs, pos);
605 if (!CIFS_I(inode)->clientCanCacheAll) 606
606 filemap_fdatawrite(inode->i_mapping); 607 if (CIFS_I(inode)->clientCanCacheAll)
608 return written;
609
610 rc = filemap_fdatawrite(inode->i_mapping);
611 if (rc)
612 cFYI(1, "cifs_file_aio_write: %d rc on %p inode", rc, inode);
613
607 return written; 614 return written;
608} 615}
609 616
@@ -737,7 +744,7 @@ const struct file_operations cifs_file_strict_ops = {
737 .read = do_sync_read, 744 .read = do_sync_read,
738 .write = do_sync_write, 745 .write = do_sync_write,
739 .aio_read = cifs_strict_readv, 746 .aio_read = cifs_strict_readv,
740 .aio_write = cifs_file_aio_write, 747 .aio_write = cifs_strict_writev,
741 .open = cifs_open, 748 .open = cifs_open,
742 .release = cifs_close, 749 .release = cifs_close,
743 .lock = cifs_lock, 750 .lock = cifs_lock,
@@ -793,7 +800,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
793 .read = do_sync_read, 800 .read = do_sync_read,
794 .write = do_sync_write, 801 .write = do_sync_write,
795 .aio_read = cifs_strict_readv, 802 .aio_read = cifs_strict_readv,
796 .aio_write = cifs_file_aio_write, 803 .aio_write = cifs_strict_writev,
797 .open = cifs_open, 804 .open = cifs_open,
798 .release = cifs_close, 805 .release = cifs_close,
799 .fsync = cifs_strict_fsync, 806 .fsync = cifs_strict_fsync,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f23206d46531..a9371b6578c0 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -85,7 +85,9 @@ extern ssize_t cifs_user_read(struct file *file, char __user *read_data,
85extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, 85extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
86 unsigned long nr_segs, loff_t pos); 86 unsigned long nr_segs, loff_t pos);
87extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, 87extern ssize_t cifs_user_write(struct file *file, const char __user *write_data,
88 size_t write_size, loff_t *poffset); 88 size_t write_size, loff_t *poffset);
89extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
90 unsigned long nr_segs, loff_t pos);
89extern int cifs_lock(struct file *, int, struct file_lock *); 91extern int cifs_lock(struct file *, int, struct file_lock *);
90extern int cifs_fsync(struct file *, int); 92extern int cifs_fsync(struct file *, int);
91extern int cifs_strict_fsync(struct file *, int); 93extern int cifs_strict_fsync(struct file *, int);
@@ -125,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
125extern const struct export_operations cifs_export_ops; 127extern const struct export_operations cifs_export_ops;
126#endif /* EXPERIMENTAL */ 128#endif /* EXPERIMENTAL */
127 129
128#define CIFS_VERSION "1.69" 130#define CIFS_VERSION "1.71"
129#endif /* _CIFSFS_H */ 131#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5bfb75346cb0..17afb0fbcaed 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -166,6 +166,9 @@ struct TCP_Server_Info {
166 struct socket *ssocket; 166 struct socket *ssocket;
167 struct sockaddr_storage dstaddr; 167 struct sockaddr_storage dstaddr;
168 struct sockaddr_storage srcaddr; /* locally bind to this IP */ 168 struct sockaddr_storage srcaddr; /* locally bind to this IP */
169#ifdef CONFIG_NET_NS
170 struct net *net;
171#endif
169 wait_queue_head_t response_q; 172 wait_queue_head_t response_q;
170 wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ 173 wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/
171 struct list_head pending_mid_q; 174 struct list_head pending_mid_q;
@@ -185,6 +188,8 @@ struct TCP_Server_Info {
185 /* multiplexed reads or writes */ 188 /* multiplexed reads or writes */
186 unsigned int maxBuf; /* maxBuf specifies the maximum */ 189 unsigned int maxBuf; /* maxBuf specifies the maximum */
187 /* message size the server can send or receive for non-raw SMBs */ 190 /* message size the server can send or receive for non-raw SMBs */
191 /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */
192 /* when socket is setup (and during reconnect) before NegProt sent */
188 unsigned int max_rw; /* maxRw specifies the maximum */ 193 unsigned int max_rw; /* maxRw specifies the maximum */
189 /* message size the server can send or receive for */ 194 /* message size the server can send or receive for */
190 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ 195 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
@@ -217,6 +222,36 @@ struct TCP_Server_Info {
217}; 222};
218 223
219/* 224/*
225 * Macros to allow the TCP_Server_Info->net field and related code to drop out
226 * when CONFIG_NET_NS isn't set.
227 */
228
229#ifdef CONFIG_NET_NS
230
231static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv)
232{
233 return srv->net;
234}
235
236static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
237{
238 srv->net = net;
239}
240
241#else
242
243static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv)
244{
245 return &init_net;
246}
247
248static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
249{
250}
251
252#endif
253
254/*
220 * Session structure. One of these for each uid session with a particular host 255 * Session structure. One of these for each uid session with a particular host
221 */ 256 */
222struct cifsSesInfo { 257struct cifsSesInfo {
@@ -619,7 +654,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
619#define MID_REQUEST_SUBMITTED 2 654#define MID_REQUEST_SUBMITTED 2
620#define MID_RESPONSE_RECEIVED 4 655#define MID_RESPONSE_RECEIVED 4
621#define MID_RETRY_NEEDED 8 /* session closed while this request out */ 656#define MID_RETRY_NEEDED 8 /* session closed while this request out */
622#define MID_NO_RESP_NEEDED 0x10 657#define MID_RESPONSE_MALFORMED 0x10
623 658
624/* Types of response buffer returned from SendReceive2 */ 659/* Types of response buffer returned from SendReceive2 */
625#define CIFS_NO_BUFFER 0 /* Response buffer not returned */ 660#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 982895fa7615..8096f27ad9a8 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -85,6 +85,8 @@ extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
85extern bool is_valid_oplock_break(struct smb_hdr *smb, 85extern bool is_valid_oplock_break(struct smb_hdr *smb,
86 struct TCP_Server_Info *); 86 struct TCP_Server_Info *);
87extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); 87extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
88extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
89 unsigned int bytes_written);
88extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); 90extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
89extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); 91extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
90extern unsigned int smbCalcSize(struct smb_hdr *ptr); 92extern unsigned int smbCalcSize(struct smb_hdr *ptr);
@@ -373,7 +375,7 @@ extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
373extern int cifs_verify_signature(struct smb_hdr *, 375extern int cifs_verify_signature(struct smb_hdr *,
374 struct TCP_Server_Info *server, 376 struct TCP_Server_Info *server,
375 __u32 expected_sequence_number); 377 __u32 expected_sequence_number);
376extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *); 378extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
377extern int setup_ntlm_response(struct cifsSesInfo *); 379extern int setup_ntlm_response(struct cifsSesInfo *);
378extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *); 380extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *);
379extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); 381extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
@@ -423,4 +425,11 @@ extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr);
423extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr, 425extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
424 const unsigned char *path, 426 const unsigned char *path,
425 struct cifs_sb_info *cifs_sb, int xid); 427 struct cifs_sb_info *cifs_sb, int xid);
428extern int mdfour(unsigned char *, unsigned char *, int);
429extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
430extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
431 unsigned char *p24);
432extern void E_P16(unsigned char *p14, unsigned char *p16);
433extern void E_P24(unsigned char *p21, const unsigned char *c8,
434 unsigned char *p24);
426#endif /* _CIFSPROTO_H */ 435#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 3106f5e5c633..904aa47e3515 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -136,9 +136,6 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
136 } 136 }
137 } 137 }
138 138
139 if (ses->status == CifsExiting)
140 return -EIO;
141
142 /* 139 /*
143 * Give demultiplex thread up to 10 seconds to reconnect, should be 140 * Give demultiplex thread up to 10 seconds to reconnect, should be
144 * greater than cifs socket timeout which is 7 seconds 141 * greater than cifs socket timeout which is 7 seconds
@@ -156,7 +153,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
156 * retrying until process is killed or server comes 153 * retrying until process is killed or server comes
157 * back on-line 154 * back on-line
158 */ 155 */
159 if (!tcon->retry || ses->status == CifsExiting) { 156 if (!tcon->retry) {
160 cFYI(1, "gave up waiting on reconnect in smb_init"); 157 cFYI(1, "gave up waiting on reconnect in smb_init");
161 return -EHOSTDOWN; 158 return -EHOSTDOWN;
162 } 159 }
@@ -4914,7 +4911,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4914 __u16 fid, __u32 pid_of_opener, bool SetAllocation) 4911 __u16 fid, __u32 pid_of_opener, bool SetAllocation)
4915{ 4912{
4916 struct smb_com_transaction2_sfi_req *pSMB = NULL; 4913 struct smb_com_transaction2_sfi_req *pSMB = NULL;
4917 char *data_offset;
4918 struct file_end_of_file_info *parm_data; 4914 struct file_end_of_file_info *parm_data;
4919 int rc = 0; 4915 int rc = 0;
4920 __u16 params, param_offset, offset, byte_count, count; 4916 __u16 params, param_offset, offset, byte_count, count;
@@ -4938,8 +4934,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4938 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; 4934 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
4939 offset = param_offset + params; 4935 offset = param_offset + params;
4940 4936
4941 data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
4942
4943 count = sizeof(struct file_end_of_file_info); 4937 count = sizeof(struct file_end_of_file_info);
4944 pSMB->MaxParameterCount = cpu_to_le16(2); 4938 pSMB->MaxParameterCount = cpu_to_le16(2);
4945 /* BB find exact max SMB PDU from sess structure BB */ 4939 /* BB find exact max SMB PDU from sess structure BB */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 18d3c7724d6e..8d6c17ab593d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -55,9 +55,6 @@
55/* SMB echo "timeout" -- FIXME: tunable? */ 55/* SMB echo "timeout" -- FIXME: tunable? */
56#define SMB_ECHO_INTERVAL (60 * HZ) 56#define SMB_ECHO_INTERVAL (60 * HZ)
57 57
58extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
59 unsigned char *p24);
60
61extern mempool_t *cifs_req_poolp; 58extern mempool_t *cifs_req_poolp;
62 59
63struct smb_vol { 60struct smb_vol {
@@ -87,6 +84,7 @@ struct smb_vol {
87 bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ 84 bool no_xattr:1; /* set if xattr (EA) support should be disabled*/
88 bool server_ino:1; /* use inode numbers from server ie UniqueId */ 85 bool server_ino:1; /* use inode numbers from server ie UniqueId */
89 bool direct_io:1; 86 bool direct_io:1;
87 bool strict_io:1; /* strict cache behavior */
90 bool remap:1; /* set to remap seven reserved chars in filenames */ 88 bool remap:1; /* set to remap seven reserved chars in filenames */
91 bool posix_paths:1; /* unset to not ask for posix pathnames. */ 89 bool posix_paths:1; /* unset to not ask for posix pathnames. */
92 bool no_linux_ext:1; 90 bool no_linux_ext:1;
@@ -339,8 +337,13 @@ cifs_echo_request(struct work_struct *work)
339 struct TCP_Server_Info *server = container_of(work, 337 struct TCP_Server_Info *server = container_of(work,
340 struct TCP_Server_Info, echo.work); 338 struct TCP_Server_Info, echo.work);
341 339
342 /* no need to ping if we got a response recently */ 340 /*
343 if (time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) 341 * We cannot send an echo until the NEGOTIATE_PROTOCOL request is
342 * done, which is indicated by maxBuf != 0. Also, no need to ping if
343 * we got a response recently
344 */
345 if (server->maxBuf == 0 ||
346 time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
344 goto requeue_echo; 347 goto requeue_echo;
345 348
346 rc = CIFSSMBEcho(server); 349 rc = CIFSSMBEcho(server);
@@ -580,14 +583,23 @@ incomplete_rcv:
580 else if (reconnect == 1) 583 else if (reconnect == 1)
581 continue; 584 continue;
582 585
583 length += 4; /* account for rfc1002 hdr */ 586 total_read += 4; /* account for rfc1002 hdr */
584 587
588 dump_smb(smb_buffer, total_read);
585 589
586 dump_smb(smb_buffer, length); 590 /*
587 if (checkSMB(smb_buffer, smb_buffer->Mid, total_read+4)) { 591 * We know that we received enough to get to the MID as we
588 cifs_dump_mem("Bad SMB: ", smb_buffer, 48); 592 * checked the pdu_length earlier. Now check to see
589 continue; 593 * if the rest of the header is OK. We borrow the length
590 } 594 * var for the rest of the loop to avoid a new stack var.
595 *
596 * 48 bytes is enough to display the header and a little bit
597 * into the payload for debugging purposes.
598 */
599 length = checkSMB(smb_buffer, smb_buffer->Mid, total_read);
600 if (length != 0)
601 cifs_dump_mem("Bad SMB: ", smb_buffer,
602 min_t(unsigned int, total_read, 48));
591 603
592 mid_entry = NULL; 604 mid_entry = NULL;
593 server->lstrp = jiffies; 605 server->lstrp = jiffies;
@@ -599,7 +611,8 @@ incomplete_rcv:
599 if ((mid_entry->mid == smb_buffer->Mid) && 611 if ((mid_entry->mid == smb_buffer->Mid) &&
600 (mid_entry->midState == MID_REQUEST_SUBMITTED) && 612 (mid_entry->midState == MID_REQUEST_SUBMITTED) &&
601 (mid_entry->command == smb_buffer->Command)) { 613 (mid_entry->command == smb_buffer->Command)) {
602 if (check2ndT2(smb_buffer,server->maxBuf) > 0) { 614 if (length == 0 &&
615 check2ndT2(smb_buffer, server->maxBuf) > 0) {
603 /* We have a multipart transact2 resp */ 616 /* We have a multipart transact2 resp */
604 isMultiRsp = true; 617 isMultiRsp = true;
605 if (mid_entry->resp_buf) { 618 if (mid_entry->resp_buf) {
@@ -634,12 +647,17 @@ incomplete_rcv:
634 mid_entry->resp_buf = smb_buffer; 647 mid_entry->resp_buf = smb_buffer;
635 mid_entry->largeBuf = isLargeBuf; 648 mid_entry->largeBuf = isLargeBuf;
636multi_t2_fnd: 649multi_t2_fnd:
637 mid_entry->midState = MID_RESPONSE_RECEIVED; 650 if (length == 0)
638 list_del_init(&mid_entry->qhead); 651 mid_entry->midState =
639 mid_entry->callback(mid_entry); 652 MID_RESPONSE_RECEIVED;
653 else
654 mid_entry->midState =
655 MID_RESPONSE_MALFORMED;
640#ifdef CONFIG_CIFS_STATS2 656#ifdef CONFIG_CIFS_STATS2
641 mid_entry->when_received = jiffies; 657 mid_entry->when_received = jiffies;
642#endif 658#endif
659 list_del_init(&mid_entry->qhead);
660 mid_entry->callback(mid_entry);
643 break; 661 break;
644 } 662 }
645 mid_entry = NULL; 663 mid_entry = NULL;
@@ -655,6 +673,9 @@ multi_t2_fnd:
655 else 673 else
656 smallbuf = NULL; 674 smallbuf = NULL;
657 } 675 }
676 } else if (length != 0) {
677 /* response sanity checks failed */
678 continue;
658 } else if (!is_valid_oplock_break(smb_buffer, server) && 679 } else if (!is_valid_oplock_break(smb_buffer, server) &&
659 !isMultiRsp) { 680 !isMultiRsp) {
660 cERROR(1, "No task to wake, unknown frame received! " 681 cERROR(1, "No task to wake, unknown frame received! "
@@ -1344,6 +1365,8 @@ cifs_parse_mount_options(char *options, const char *devname,
1344 vol->direct_io = 1; 1365 vol->direct_io = 1;
1345 } else if (strnicmp(data, "forcedirectio", 13) == 0) { 1366 } else if (strnicmp(data, "forcedirectio", 13) == 0) {
1346 vol->direct_io = 1; 1367 vol->direct_io = 1;
1368 } else if (strnicmp(data, "strictcache", 11) == 0) {
1369 vol->strict_io = 1;
1347 } else if (strnicmp(data, "noac", 4) == 0) { 1370 } else if (strnicmp(data, "noac", 4) == 0) {
1348 printk(KERN_WARNING "CIFS: Mount option noac not " 1371 printk(KERN_WARNING "CIFS: Mount option noac not "
1349 "supported. Instead set " 1372 "supported. Instead set "
@@ -1568,6 +1591,9 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol)
1568 1591
1569 spin_lock(&cifs_tcp_ses_lock); 1592 spin_lock(&cifs_tcp_ses_lock);
1570 list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { 1593 list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
1594 if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns))
1595 continue;
1596
1571 if (!match_address(server, addr, 1597 if (!match_address(server, addr,
1572 (struct sockaddr *)&vol->srcaddr)) 1598 (struct sockaddr *)&vol->srcaddr))
1573 continue; 1599 continue;
@@ -1598,6 +1624,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
1598 return; 1624 return;
1599 } 1625 }
1600 1626
1627 put_net(cifs_net_ns(server));
1628
1601 list_del_init(&server->tcp_ses_list); 1629 list_del_init(&server->tcp_ses_list);
1602 spin_unlock(&cifs_tcp_ses_lock); 1630 spin_unlock(&cifs_tcp_ses_lock);
1603 1631
@@ -1672,6 +1700,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1672 goto out_err; 1700 goto out_err;
1673 } 1701 }
1674 1702
1703 cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
1675 tcp_ses->hostname = extract_hostname(volume_info->UNC); 1704 tcp_ses->hostname = extract_hostname(volume_info->UNC);
1676 if (IS_ERR(tcp_ses->hostname)) { 1705 if (IS_ERR(tcp_ses->hostname)) {
1677 rc = PTR_ERR(tcp_ses->hostname); 1706 rc = PTR_ERR(tcp_ses->hostname);
@@ -1752,6 +1781,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1752out_err_crypto_release: 1781out_err_crypto_release:
1753 cifs_crypto_shash_release(tcp_ses); 1782 cifs_crypto_shash_release(tcp_ses);
1754 1783
1784 put_net(cifs_net_ns(tcp_ses));
1785
1755out_err: 1786out_err:
1756 if (tcp_ses) { 1787 if (tcp_ses) {
1757 if (!IS_ERR(tcp_ses->hostname)) 1788 if (!IS_ERR(tcp_ses->hostname))
@@ -2263,8 +2294,8 @@ generic_ip_connect(struct TCP_Server_Info *server)
2263 } 2294 }
2264 2295
2265 if (socket == NULL) { 2296 if (socket == NULL) {
2266 rc = sock_create_kern(sfamily, SOCK_STREAM, 2297 rc = __sock_create(cifs_net_ns(server), sfamily, SOCK_STREAM,
2267 IPPROTO_TCP, &socket); 2298 IPPROTO_TCP, &socket, 1);
2268 if (rc < 0) { 2299 if (rc < 0) {
2269 cERROR(1, "Error %d creating socket", rc); 2300 cERROR(1, "Error %d creating socket", rc);
2270 server->ssocket = NULL; 2301 server->ssocket = NULL;
@@ -2576,6 +2607,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2576 if (pvolume_info->multiuser) 2607 if (pvolume_info->multiuser)
2577 cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER | 2608 cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER |
2578 CIFS_MOUNT_NO_PERM); 2609 CIFS_MOUNT_NO_PERM);
2610 if (pvolume_info->strict_io)
2611 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO;
2579 if (pvolume_info->direct_io) { 2612 if (pvolume_info->direct_io) {
2580 cFYI(1, "mounting share using direct i/o"); 2613 cFYI(1, "mounting share using direct i/o");
2581 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; 2614 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
@@ -2977,7 +3010,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2977 bcc_ptr); 3010 bcc_ptr);
2978 else 3011 else
2979#endif /* CIFS_WEAK_PW_HASH */ 3012#endif /* CIFS_WEAK_PW_HASH */
2980 SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr); 3013 rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
3014 bcc_ptr);
2981 3015
2982 bcc_ptr += CIFS_AUTH_RESP_SIZE; 3016 bcc_ptr += CIFS_AUTH_RESP_SIZE;
2983 if (ses->capabilities & CAP_UNICODE) { 3017 if (ses->capabilities & CAP_UNICODE) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index d7d65a70678e..e964b1cd5dd0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -346,7 +346,6 @@ int cifs_open(struct inode *inode, struct file *file)
346 struct cifsTconInfo *tcon; 346 struct cifsTconInfo *tcon;
347 struct tcon_link *tlink; 347 struct tcon_link *tlink;
348 struct cifsFileInfo *pCifsFile = NULL; 348 struct cifsFileInfo *pCifsFile = NULL;
349 struct cifsInodeInfo *pCifsInode;
350 char *full_path = NULL; 349 char *full_path = NULL;
351 bool posix_open_ok = false; 350 bool posix_open_ok = false;
352 __u16 netfid; 351 __u16 netfid;
@@ -361,8 +360,6 @@ int cifs_open(struct inode *inode, struct file *file)
361 } 360 }
362 tcon = tlink_tcon(tlink); 361 tcon = tlink_tcon(tlink);
363 362
364 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
365
366 full_path = build_path_from_dentry(file->f_path.dentry); 363 full_path = build_path_from_dentry(file->f_path.dentry);
367 if (full_path == NULL) { 364 if (full_path == NULL) {
368 rc = -ENOMEM; 365 rc = -ENOMEM;
@@ -848,7 +845,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
848} 845}
849 846
850/* update the file size (if needed) after a write */ 847/* update the file size (if needed) after a write */
851static void 848void
852cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, 849cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
853 unsigned int bytes_written) 850 unsigned int bytes_written)
854{ 851{
@@ -1146,7 +1143,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1146 char *write_data; 1143 char *write_data;
1147 int rc = -EFAULT; 1144 int rc = -EFAULT;
1148 int bytes_written = 0; 1145 int bytes_written = 0;
1149 struct cifs_sb_info *cifs_sb;
1150 struct inode *inode; 1146 struct inode *inode;
1151 struct cifsFileInfo *open_file; 1147 struct cifsFileInfo *open_file;
1152 1148
@@ -1154,7 +1150,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1154 return -EFAULT; 1150 return -EFAULT;
1155 1151
1156 inode = page->mapping->host; 1152 inode = page->mapping->host;
1157 cifs_sb = CIFS_SB(inode->i_sb);
1158 1153
1159 offset += (loff_t)from; 1154 offset += (loff_t)from;
1160 write_data = kmap(page); 1155 write_data = kmap(page);
@@ -1619,13 +1614,215 @@ int cifs_flush(struct file *file, fl_owner_t id)
1619 return rc; 1614 return rc;
1620} 1615}
1621 1616
1617static int
1618cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
1619{
1620 int rc = 0;
1621 unsigned long i;
1622
1623 for (i = 0; i < num_pages; i++) {
1624 pages[i] = alloc_page(__GFP_HIGHMEM);
1625 if (!pages[i]) {
1626 /*
1627 * save number of pages we have already allocated and
1628 * return with ENOMEM error
1629 */
1630 num_pages = i;
1631 rc = -ENOMEM;
1632 goto error;
1633 }
1634 }
1635
1636 return rc;
1637
1638error:
1639 for (i = 0; i < num_pages; i++)
1640 put_page(pages[i]);
1641 return rc;
1642}
1643
1644static inline
1645size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
1646{
1647 size_t num_pages;
1648 size_t clen;
1649
1650 clen = min_t(const size_t, len, wsize);
1651 num_pages = clen / PAGE_CACHE_SIZE;
1652 if (clen % PAGE_CACHE_SIZE)
1653 num_pages++;
1654
1655 if (cur_len)
1656 *cur_len = clen;
1657
1658 return num_pages;
1659}
1660
1661static ssize_t
1662cifs_iovec_write(struct file *file, const struct iovec *iov,
1663 unsigned long nr_segs, loff_t *poffset)
1664{
1665 unsigned int written;
1666 unsigned long num_pages, npages, i;
1667 size_t copied, len, cur_len;
1668 ssize_t total_written = 0;
1669 struct kvec *to_send;
1670 struct page **pages;
1671 struct iov_iter it;
1672 struct inode *inode;
1673 struct cifsFileInfo *open_file;
1674 struct cifsTconInfo *pTcon;
1675 struct cifs_sb_info *cifs_sb;
1676 int xid, rc;
1677
1678 len = iov_length(iov, nr_segs);
1679 if (!len)
1680 return 0;
1681
1682 rc = generic_write_checks(file, poffset, &len, 0);
1683 if (rc)
1684 return rc;
1685
1686 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1687 num_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
1688
1689 pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL);
1690 if (!pages)
1691 return -ENOMEM;
1692
1693 to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL);
1694 if (!to_send) {
1695 kfree(pages);
1696 return -ENOMEM;
1697 }
1698
1699 rc = cifs_write_allocate_pages(pages, num_pages);
1700 if (rc) {
1701 kfree(pages);
1702 kfree(to_send);
1703 return rc;
1704 }
1705
1706 xid = GetXid();
1707 open_file = file->private_data;
1708 pTcon = tlink_tcon(open_file->tlink);
1709 inode = file->f_path.dentry->d_inode;
1710
1711 iov_iter_init(&it, iov, nr_segs, len, 0);
1712 npages = num_pages;
1713
1714 do {
1715 size_t save_len = cur_len;
1716 for (i = 0; i < npages; i++) {
1717 copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE);
1718 copied = iov_iter_copy_from_user(pages[i], &it, 0,
1719 copied);
1720 cur_len -= copied;
1721 iov_iter_advance(&it, copied);
1722 to_send[i+1].iov_base = kmap(pages[i]);
1723 to_send[i+1].iov_len = copied;
1724 }
1725
1726 cur_len = save_len - cur_len;
1727
1728 do {
1729 if (open_file->invalidHandle) {
1730 rc = cifs_reopen_file(open_file, false);
1731 if (rc != 0)
1732 break;
1733 }
1734 rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid,
1735 cur_len, *poffset, &written,
1736 to_send, npages, 0);
1737 } while (rc == -EAGAIN);
1738
1739 for (i = 0; i < npages; i++)
1740 kunmap(pages[i]);
1741
1742 if (written) {
1743 len -= written;
1744 total_written += written;
1745 cifs_update_eof(CIFS_I(inode), *poffset, written);
1746 *poffset += written;
1747 } else if (rc < 0) {
1748 if (!total_written)
1749 total_written = rc;
1750 break;
1751 }
1752
1753 /* get length and number of kvecs of the next write */
1754 npages = get_numpages(cifs_sb->wsize, len, &cur_len);
1755 } while (len > 0);
1756
1757 if (total_written > 0) {
1758 spin_lock(&inode->i_lock);
1759 if (*poffset > inode->i_size)
1760 i_size_write(inode, *poffset);
1761 spin_unlock(&inode->i_lock);
1762 }
1763
1764 cifs_stats_bytes_written(pTcon, total_written);
1765 mark_inode_dirty_sync(inode);
1766
1767 for (i = 0; i < num_pages; i++)
1768 put_page(pages[i]);
1769 kfree(to_send);
1770 kfree(pages);
1771 FreeXid(xid);
1772 return total_written;
1773}
1774
1775static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
1776 unsigned long nr_segs, loff_t pos)
1777{
1778 ssize_t written;
1779 struct inode *inode;
1780
1781 inode = iocb->ki_filp->f_path.dentry->d_inode;
1782
1783 /*
1784 * BB - optimize the way when signing is disabled. We can drop this
1785 * extra memory-to-memory copying and use iovec buffers for constructing
1786 * write request.
1787 */
1788
1789 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
1790 if (written > 0) {
1791 CIFS_I(inode)->invalid_mapping = true;
1792 iocb->ki_pos = pos;
1793 }
1794
1795 return written;
1796}
1797
1798ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
1799 unsigned long nr_segs, loff_t pos)
1800{
1801 struct inode *inode;
1802
1803 inode = iocb->ki_filp->f_path.dentry->d_inode;
1804
1805 if (CIFS_I(inode)->clientCanCacheAll)
1806 return generic_file_aio_write(iocb, iov, nr_segs, pos);
1807
1808 /*
1809 * In strict cache mode we need to write the data to the server exactly
1810 * from the pos to pos+len-1 rather than flush all affected pages
1811 * because it may cause a error with mandatory locks on these pages but
1812 * not on the region from pos to ppos+len-1.
1813 */
1814
1815 return cifs_user_writev(iocb, iov, nr_segs, pos);
1816}
1817
1622static ssize_t 1818static ssize_t
1623cifs_iovec_read(struct file *file, const struct iovec *iov, 1819cifs_iovec_read(struct file *file, const struct iovec *iov,
1624 unsigned long nr_segs, loff_t *poffset) 1820 unsigned long nr_segs, loff_t *poffset)
1625{ 1821{
1626 int rc; 1822 int rc;
1627 int xid; 1823 int xid;
1628 unsigned int total_read, bytes_read = 0; 1824 ssize_t total_read;
1825 unsigned int bytes_read = 0;
1629 size_t len, cur_len; 1826 size_t len, cur_len;
1630 int iov_offset = 0; 1827 int iov_offset = 0;
1631 struct cifs_sb_info *cifs_sb; 1828 struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 306769de2fb5..e8804d373404 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -28,7 +28,6 @@
28#include "cifsproto.h" 28#include "cifsproto.h"
29#include "cifs_debug.h" 29#include "cifs_debug.h"
30#include "cifs_fs_sb.h" 30#include "cifs_fs_sb.h"
31#include "md5.h"
32 31
33#define CIFS_MF_SYMLINK_LEN_OFFSET (4+1) 32#define CIFS_MF_SYMLINK_LEN_OFFSET (4+1)
34#define CIFS_MF_SYMLINK_MD5_OFFSET (CIFS_MF_SYMLINK_LEN_OFFSET+(4+1)) 33#define CIFS_MF_SYMLINK_MD5_OFFSET (CIFS_MF_SYMLINK_LEN_OFFSET+(4+1))
@@ -47,6 +46,45 @@
47 md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15] 46 md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15]
48 47
49static int 48static int
49symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
50{
51 int rc;
52 unsigned int size;
53 struct crypto_shash *md5;
54 struct sdesc *sdescmd5;
55
56 md5 = crypto_alloc_shash("md5", 0, 0);
57 if (IS_ERR(md5)) {
58 rc = PTR_ERR(md5);
59 cERROR(1, "%s: Crypto md5 allocation error %d\n", __func__, rc);
60 return rc;
61 }
62 size = sizeof(struct shash_desc) + crypto_shash_descsize(md5);
63 sdescmd5 = kmalloc(size, GFP_KERNEL);
64 if (!sdescmd5) {
65 rc = -ENOMEM;
66 cERROR(1, "%s: Memory allocation failure\n", __func__);
67 goto symlink_hash_err;
68 }
69 sdescmd5->shash.tfm = md5;
70 sdescmd5->shash.flags = 0x0;
71
72 rc = crypto_shash_init(&sdescmd5->shash);
73 if (rc) {
74 cERROR(1, "%s: Could not init md5 shash\n", __func__);
75 goto symlink_hash_err;
76 }
77 crypto_shash_update(&sdescmd5->shash, link_str, link_len);
78 rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
79
80symlink_hash_err:
81 crypto_free_shash(md5);
82 kfree(sdescmd5);
83
84 return rc;
85}
86
87static int
50CIFSParseMFSymlink(const u8 *buf, 88CIFSParseMFSymlink(const u8 *buf,
51 unsigned int buf_len, 89 unsigned int buf_len,
52 unsigned int *_link_len, 90 unsigned int *_link_len,
@@ -56,7 +94,6 @@ CIFSParseMFSymlink(const u8 *buf,
56 unsigned int link_len; 94 unsigned int link_len;
57 const char *md5_str1; 95 const char *md5_str1;
58 const char *link_str; 96 const char *link_str;
59 struct MD5Context md5_ctx;
60 u8 md5_hash[16]; 97 u8 md5_hash[16];
61 char md5_str2[34]; 98 char md5_str2[34];
62 99
@@ -70,9 +107,11 @@ CIFSParseMFSymlink(const u8 *buf,
70 if (rc != 1) 107 if (rc != 1)
71 return -EINVAL; 108 return -EINVAL;
72 109
73 cifs_MD5_init(&md5_ctx); 110 rc = symlink_hash(link_len, link_str, md5_hash);
74 cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); 111 if (rc) {
75 cifs_MD5_final(md5_hash, &md5_ctx); 112 cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc);
113 return rc;
114 }
76 115
77 snprintf(md5_str2, sizeof(md5_str2), 116 snprintf(md5_str2, sizeof(md5_str2),
78 CIFS_MF_SYMLINK_MD5_FORMAT, 117 CIFS_MF_SYMLINK_MD5_FORMAT,
@@ -94,9 +133,9 @@ CIFSParseMFSymlink(const u8 *buf,
94static int 133static int
95CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) 134CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str)
96{ 135{
136 int rc;
97 unsigned int link_len; 137 unsigned int link_len;
98 unsigned int ofs; 138 unsigned int ofs;
99 struct MD5Context md5_ctx;
100 u8 md5_hash[16]; 139 u8 md5_hash[16];
101 140
102 if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE) 141 if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE)
@@ -107,9 +146,11 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str)
107 if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) 146 if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN)
108 return -ENAMETOOLONG; 147 return -ENAMETOOLONG;
109 148
110 cifs_MD5_init(&md5_ctx); 149 rc = symlink_hash(link_len, link_str, md5_hash);
111 cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); 150 if (rc) {
112 cifs_MD5_final(md5_hash, &md5_ctx); 151 cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc);
152 return rc;
153 }
113 154
114 snprintf(buf, buf_len, 155 snprintf(buf, buf_len,
115 CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT, 156 CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT,
diff --git a/fs/cifs/md4.c b/fs/cifs/md4.c
deleted file mode 100644
index a725c2609d67..000000000000
--- a/fs/cifs/md4.c
+++ /dev/null
@@ -1,205 +0,0 @@
1/*
2 Unix SMB/Netbios implementation.
3 Version 1.9.
4 a implementation of MD4 designed for use in the SMB authentication protocol
5 Copyright (C) Andrew Tridgell 1997-1998.
6 Modified by Steve French (sfrench@us.ibm.com) 2002-2003
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21*/
22#include <linux/module.h>
23#include <linux/fs.h>
24#include "cifsencrypt.h"
25
26/* NOTE: This code makes no attempt to be fast! */
27
28static __u32
29F(__u32 X, __u32 Y, __u32 Z)
30{
31 return (X & Y) | ((~X) & Z);
32}
33
34static __u32
35G(__u32 X, __u32 Y, __u32 Z)
36{
37 return (X & Y) | (X & Z) | (Y & Z);
38}
39
40static __u32
41H(__u32 X, __u32 Y, __u32 Z)
42{
43 return X ^ Y ^ Z;
44}
45
46static __u32
47lshift(__u32 x, int s)
48{
49 x &= 0xFFFFFFFF;
50 return ((x << s) & 0xFFFFFFFF) | (x >> (32 - s));
51}
52
53#define ROUND1(a,b,c,d,k,s) (*a) = lshift((*a) + F(*b,*c,*d) + X[k], s)
54#define ROUND2(a,b,c,d,k,s) (*a) = lshift((*a) + G(*b,*c,*d) + X[k] + (__u32)0x5A827999,s)
55#define ROUND3(a,b,c,d,k,s) (*a) = lshift((*a) + H(*b,*c,*d) + X[k] + (__u32)0x6ED9EBA1,s)
56
57/* this applies md4 to 64 byte chunks */
58static void
59mdfour64(__u32 *M, __u32 *A, __u32 *B, __u32 *C, __u32 *D)
60{
61 int j;
62 __u32 AA, BB, CC, DD;
63 __u32 X[16];
64
65
66 for (j = 0; j < 16; j++)
67 X[j] = M[j];
68
69 AA = *A;
70 BB = *B;
71 CC = *C;
72 DD = *D;
73
74 ROUND1(A, B, C, D, 0, 3);
75 ROUND1(D, A, B, C, 1, 7);
76 ROUND1(C, D, A, B, 2, 11);
77 ROUND1(B, C, D, A, 3, 19);
78 ROUND1(A, B, C, D, 4, 3);
79 ROUND1(D, A, B, C, 5, 7);
80 ROUND1(C, D, A, B, 6, 11);
81 ROUND1(B, C, D, A, 7, 19);
82 ROUND1(A, B, C, D, 8, 3);
83 ROUND1(D, A, B, C, 9, 7);
84 ROUND1(C, D, A, B, 10, 11);
85 ROUND1(B, C, D, A, 11, 19);
86 ROUND1(A, B, C, D, 12, 3);
87 ROUND1(D, A, B, C, 13, 7);
88 ROUND1(C, D, A, B, 14, 11);
89 ROUND1(B, C, D, A, 15, 19);
90
91 ROUND2(A, B, C, D, 0, 3);
92 ROUND2(D, A, B, C, 4, 5);
93 ROUND2(C, D, A, B, 8, 9);
94 ROUND2(B, C, D, A, 12, 13);
95 ROUND2(A, B, C, D, 1, 3);
96 ROUND2(D, A, B, C, 5, 5);
97 ROUND2(C, D, A, B, 9, 9);
98 ROUND2(B, C, D, A, 13, 13);
99 ROUND2(A, B, C, D, 2, 3);
100 ROUND2(D, A, B, C, 6, 5);
101 ROUND2(C, D, A, B, 10, 9);
102 ROUND2(B, C, D, A, 14, 13);
103 ROUND2(A, B, C, D, 3, 3);
104 ROUND2(D, A, B, C, 7, 5);
105 ROUND2(C, D, A, B, 11, 9);
106 ROUND2(B, C, D, A, 15, 13);
107
108 ROUND3(A, B, C, D, 0, 3);
109 ROUND3(D, A, B, C, 8, 9);
110 ROUND3(C, D, A, B, 4, 11);
111 ROUND3(B, C, D, A, 12, 15);
112 ROUND3(A, B, C, D, 2, 3);
113 ROUND3(D, A, B, C, 10, 9);
114 ROUND3(C, D, A, B, 6, 11);
115 ROUND3(B, C, D, A, 14, 15);
116 ROUND3(A, B, C, D, 1, 3);
117 ROUND3(D, A, B, C, 9, 9);
118 ROUND3(C, D, A, B, 5, 11);
119 ROUND3(B, C, D, A, 13, 15);
120 ROUND3(A, B, C, D, 3, 3);
121 ROUND3(D, A, B, C, 11, 9);
122 ROUND3(C, D, A, B, 7, 11);
123 ROUND3(B, C, D, A, 15, 15);
124
125 *A += AA;
126 *B += BB;
127 *C += CC;
128 *D += DD;
129
130 *A &= 0xFFFFFFFF;
131 *B &= 0xFFFFFFFF;
132 *C &= 0xFFFFFFFF;
133 *D &= 0xFFFFFFFF;
134
135 for (j = 0; j < 16; j++)
136 X[j] = 0;
137}
138
139static void
140copy64(__u32 *M, unsigned char *in)
141{
142 int i;
143
144 for (i = 0; i < 16; i++)
145 M[i] = (in[i * 4 + 3] << 24) | (in[i * 4 + 2] << 16) |
146 (in[i * 4 + 1] << 8) | (in[i * 4 + 0] << 0);
147}
148
149static void
150copy4(unsigned char *out, __u32 x)
151{
152 out[0] = x & 0xFF;
153 out[1] = (x >> 8) & 0xFF;
154 out[2] = (x >> 16) & 0xFF;
155 out[3] = (x >> 24) & 0xFF;
156}
157
158/* produce a md4 message digest from data of length n bytes */
159void
160mdfour(unsigned char *out, unsigned char *in, int n)
161{
162 unsigned char buf[128];
163 __u32 M[16];
164 __u32 b = n * 8;
165 int i;
166 __u32 A = 0x67452301;
167 __u32 B = 0xefcdab89;
168 __u32 C = 0x98badcfe;
169 __u32 D = 0x10325476;
170
171 while (n > 64) {
172 copy64(M, in);
173 mdfour64(M, &A, &B, &C, &D);
174 in += 64;
175 n -= 64;
176 }
177
178 for (i = 0; i < 128; i++)
179 buf[i] = 0;
180 memcpy(buf, in, n);
181 buf[n] = 0x80;
182
183 if (n <= 55) {
184 copy4(buf + 56, b);
185 copy64(M, buf);
186 mdfour64(M, &A, &B, &C, &D);
187 } else {
188 copy4(buf + 120, b);
189 copy64(M, buf);
190 mdfour64(M, &A, &B, &C, &D);
191 copy64(M, buf + 64);
192 mdfour64(M, &A, &B, &C, &D);
193 }
194
195 for (i = 0; i < 128; i++)
196 buf[i] = 0;
197 copy64(M, buf);
198
199 copy4(out, A);
200 copy4(out + 4, B);
201 copy4(out + 8, C);
202 copy4(out + 12, D);
203
204 A = B = C = D = 0;
205}
diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c
deleted file mode 100644
index 98b66a54c319..000000000000
--- a/fs/cifs/md5.c
+++ /dev/null
@@ -1,366 +0,0 @@
1/*
2 * This code implements the MD5 message-digest algorithm.
3 * The algorithm is due to Ron Rivest. This code was
4 * written by Colin Plumb in 1993, no copyright is claimed.
5 * This code is in the public domain; do with it what you wish.
6 *
7 * Equivalent code is available from RSA Data Security, Inc.
8 * This code has been tested against that, and is equivalent,
9 * except that you don't need to include two pages of legalese
10 * with every copy.
11 *
12 * To compute the message digest of a chunk of bytes, declare an
13 * MD5Context structure, pass it to cifs_MD5_init, call cifs_MD5_update as
14 * needed on buffers full of bytes, and then call cifs_MD5_final, which
15 * will fill a supplied 16-byte array with the digest.
16 */
17
18/* This code slightly modified to fit into Samba by
19 abartlet@samba.org Jun 2001
20 and to fit the cifs vfs by
21 Steve French sfrench@us.ibm.com */
22
23#include <linux/string.h>
24#include "md5.h"
25
26static void MD5Transform(__u32 buf[4], __u32 const in[16]);
27
28/*
29 * Note: this code is harmless on little-endian machines.
30 */
31static void
32byteReverse(unsigned char *buf, unsigned longs)
33{
34 __u32 t;
35 do {
36 t = (__u32) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
37 ((unsigned) buf[1] << 8 | buf[0]);
38 *(__u32 *) buf = t;
39 buf += 4;
40 } while (--longs);
41}
42
43/*
44 * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious
45 * initialization constants.
46 */
47void
48cifs_MD5_init(struct MD5Context *ctx)
49{
50 ctx->buf[0] = 0x67452301;
51 ctx->buf[1] = 0xefcdab89;
52 ctx->buf[2] = 0x98badcfe;
53 ctx->buf[3] = 0x10325476;
54
55 ctx->bits[0] = 0;
56 ctx->bits[1] = 0;
57}
58
59/*
60 * Update context to reflect the concatenation of another buffer full
61 * of bytes.
62 */
63void
64cifs_MD5_update(struct MD5Context *ctx, unsigned char const *buf, unsigned len)
65{
66 register __u32 t;
67
68 /* Update bitcount */
69
70 t = ctx->bits[0];
71 if ((ctx->bits[0] = t + ((__u32) len << 3)) < t)
72 ctx->bits[1]++; /* Carry from low to high */
73 ctx->bits[1] += len >> 29;
74
75 t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
76
77 /* Handle any leading odd-sized chunks */
78
79 if (t) {
80 unsigned char *p = (unsigned char *) ctx->in + t;
81
82 t = 64 - t;
83 if (len < t) {
84 memmove(p, buf, len);
85 return;
86 }
87 memmove(p, buf, t);
88 byteReverse(ctx->in, 16);
89 MD5Transform(ctx->buf, (__u32 *) ctx->in);
90 buf += t;
91 len -= t;
92 }
93 /* Process data in 64-byte chunks */
94
95 while (len >= 64) {
96 memmove(ctx->in, buf, 64);
97 byteReverse(ctx->in, 16);
98 MD5Transform(ctx->buf, (__u32 *) ctx->in);
99 buf += 64;
100 len -= 64;
101 }
102
103 /* Handle any remaining bytes of data. */
104
105 memmove(ctx->in, buf, len);
106}
107
108/*
109 * Final wrapup - pad to 64-byte boundary with the bit pattern
110 * 1 0* (64-bit count of bits processed, MSB-first)
111 */
112void
113cifs_MD5_final(unsigned char digest[16], struct MD5Context *ctx)
114{
115 unsigned int count;
116 unsigned char *p;
117
118 /* Compute number of bytes mod 64 */
119 count = (ctx->bits[0] >> 3) & 0x3F;
120
121 /* Set the first char of padding to 0x80. This is safe since there is
122 always at least one byte free */
123 p = ctx->in + count;
124 *p++ = 0x80;
125
126 /* Bytes of padding needed to make 64 bytes */
127 count = 64 - 1 - count;
128
129 /* Pad out to 56 mod 64 */
130 if (count < 8) {
131 /* Two lots of padding: Pad the first block to 64 bytes */
132 memset(p, 0, count);
133 byteReverse(ctx->in, 16);
134 MD5Transform(ctx->buf, (__u32 *) ctx->in);
135
136 /* Now fill the next block with 56 bytes */
137 memset(ctx->in, 0, 56);
138 } else {
139 /* Pad block to 56 bytes */
140 memset(p, 0, count - 8);
141 }
142 byteReverse(ctx->in, 14);
143
144 /* Append length in bits and transform */
145 ((__u32 *) ctx->in)[14] = ctx->bits[0];
146 ((__u32 *) ctx->in)[15] = ctx->bits[1];
147
148 MD5Transform(ctx->buf, (__u32 *) ctx->in);
149 byteReverse((unsigned char *) ctx->buf, 4);
150 memmove(digest, ctx->buf, 16);
151 memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */
152}
153
154/* The four core functions - F1 is optimized somewhat */
155
156/* #define F1(x, y, z) (x & y | ~x & z) */
157#define F1(x, y, z) (z ^ (x & (y ^ z)))
158#define F2(x, y, z) F1(z, x, y)
159#define F3(x, y, z) (x ^ y ^ z)
160#define F4(x, y, z) (y ^ (x | ~z))
161
162/* This is the central step in the MD5 algorithm. */
163#define MD5STEP(f, w, x, y, z, data, s) \
164 (w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x)
165
166/*
167 * The core of the MD5 algorithm, this alters an existing MD5 hash to
168 * reflect the addition of 16 longwords of new data. cifs_MD5_update blocks
169 * the data and converts bytes into longwords for this routine.
170 */
171static void
172MD5Transform(__u32 buf[4], __u32 const in[16])
173{
174 register __u32 a, b, c, d;
175
176 a = buf[0];
177 b = buf[1];
178 c = buf[2];
179 d = buf[3];
180
181 MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
182 MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
183 MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
184 MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
185 MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
186 MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
187 MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
188 MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
189 MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
190 MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
191 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
192 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
193 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
194 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
195 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
196 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
197
198 MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
199 MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
200 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
201 MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
202 MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
203 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
204 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
205 MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
206 MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
207 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
208 MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
209 MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
210 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
211 MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
212 MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
213 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
214
215 MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
216 MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
217 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
218 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
219 MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
220 MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
221 MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
222 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
223 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
224 MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
225 MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
226 MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
227 MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
228 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
229 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
230 MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
231
232 MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
233 MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
234 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
235 MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
236 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
237 MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
238 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
239 MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
240 MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
241 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
242 MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
243 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
244 MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
245 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
246 MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
247 MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
248
249 buf[0] += a;
250 buf[1] += b;
251 buf[2] += c;
252 buf[3] += d;
253}
254
255#if 0 /* currently unused */
256/***********************************************************************
257 the rfc 2104 version of hmac_md5 initialisation.
258***********************************************************************/
259static void
260hmac_md5_init_rfc2104(unsigned char *key, int key_len,
261 struct HMACMD5Context *ctx)
262{
263 int i;
264
265 /* if key is longer than 64 bytes reset it to key=MD5(key) */
266 if (key_len > 64) {
267 unsigned char tk[16];
268 struct MD5Context tctx;
269
270 cifs_MD5_init(&tctx);
271 cifs_MD5_update(&tctx, key, key_len);
272 cifs_MD5_final(tk, &tctx);
273
274 key = tk;
275 key_len = 16;
276 }
277
278 /* start out by storing key in pads */
279 memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad));
280 memset(ctx->k_opad, 0, sizeof(ctx->k_opad));
281 memcpy(ctx->k_ipad, key, key_len);
282 memcpy(ctx->k_opad, key, key_len);
283
284 /* XOR key with ipad and opad values */
285 for (i = 0; i < 64; i++) {
286 ctx->k_ipad[i] ^= 0x36;
287 ctx->k_opad[i] ^= 0x5c;
288 }
289
290 cifs_MD5_init(&ctx->ctx);
291 cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64);
292}
293#endif
294
295/***********************************************************************
296 the microsoft version of hmac_md5 initialisation.
297***********************************************************************/
298void
299hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
300 struct HMACMD5Context *ctx)
301{
302 int i;
303
304 /* if key is longer than 64 bytes truncate it */
305 if (key_len > 64)
306 key_len = 64;
307
308 /* start out by storing key in pads */
309 memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad));
310 memset(ctx->k_opad, 0, sizeof(ctx->k_opad));
311 memcpy(ctx->k_ipad, key, key_len);
312 memcpy(ctx->k_opad, key, key_len);
313
314 /* XOR key with ipad and opad values */
315 for (i = 0; i < 64; i++) {
316 ctx->k_ipad[i] ^= 0x36;
317 ctx->k_opad[i] ^= 0x5c;
318 }
319
320 cifs_MD5_init(&ctx->ctx);
321 cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64);
322}
323
324/***********************************************************************
325 update hmac_md5 "inner" buffer
326***********************************************************************/
327void
328hmac_md5_update(const unsigned char *text, int text_len,
329 struct HMACMD5Context *ctx)
330{
331 cifs_MD5_update(&ctx->ctx, text, text_len); /* then text of datagram */
332}
333
334/***********************************************************************
335 finish off hmac_md5 "inner" buffer and generate outer one.
336***********************************************************************/
337void
338hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx)
339{
340 struct MD5Context ctx_o;
341
342 cifs_MD5_final(digest, &ctx->ctx);
343
344 cifs_MD5_init(&ctx_o);
345 cifs_MD5_update(&ctx_o, ctx->k_opad, 64);
346 cifs_MD5_update(&ctx_o, digest, 16);
347 cifs_MD5_final(digest, &ctx_o);
348}
349
350/***********************************************************
351 single function to calculate an HMAC MD5 digest from data.
352 use the microsoft hmacmd5 init method because the key is 16 bytes.
353************************************************************/
354#if 0 /* currently unused */
355static void
356hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
357 unsigned char *digest)
358{
359 struct HMACMD5Context ctx;
360 hmac_md5_init_limK_to_64(key, 16, &ctx);
361 if (data_len != 0)
362 hmac_md5_update(data, data_len, &ctx);
363
364 hmac_md5_final(digest, &ctx);
365}
366#endif
diff --git a/fs/cifs/md5.h b/fs/cifs/md5.h
deleted file mode 100644
index 6fba8cb402fd..000000000000
--- a/fs/cifs/md5.h
+++ /dev/null
@@ -1,38 +0,0 @@
1#ifndef MD5_H
2#define MD5_H
3#ifndef HEADER_MD5_H
4/* Try to avoid clashes with OpenSSL */
5#define HEADER_MD5_H
6#endif
7
8struct MD5Context {
9 __u32 buf[4];
10 __u32 bits[2];
11 unsigned char in[64];
12};
13#endif /* !MD5_H */
14
15#ifndef _HMAC_MD5_H
16struct HMACMD5Context {
17 struct MD5Context ctx;
18 unsigned char k_ipad[65];
19 unsigned char k_opad[65];
20};
21#endif /* _HMAC_MD5_H */
22
23void cifs_MD5_init(struct MD5Context *context);
24void cifs_MD5_update(struct MD5Context *context, unsigned char const *buf,
25 unsigned len);
26void cifs_MD5_final(unsigned char digest[16], struct MD5Context *context);
27
28/* The following definitions come from lib/hmacmd5.c */
29
30/* void hmac_md5_init_rfc2104(unsigned char *key, int key_len,
31 struct HMACMD5Context *ctx);*/
32void hmac_md5_init_limK_to_64(const unsigned char *key, int key_len,
33 struct HMACMD5Context *ctx);
34void hmac_md5_update(const unsigned char *text, int text_len,
35 struct HMACMD5Context *ctx);
36void hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx);
37/* void hmac_md5(unsigned char key[16], unsigned char *data, int data_len,
38 unsigned char *digest);*/
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index a09e077ba925..2a930a752a78 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -236,10 +236,7 @@ __u16 GetNextMid(struct TCP_Server_Info *server)
236{ 236{
237 __u16 mid = 0; 237 __u16 mid = 0;
238 __u16 last_mid; 238 __u16 last_mid;
239 int collision; 239 bool collision;
240
241 if (server == NULL)
242 return mid;
243 240
244 spin_lock(&GlobalMid_Lock); 241 spin_lock(&GlobalMid_Lock);
245 last_mid = server->CurrentMid; /* we do not want to loop forever */ 242 last_mid = server->CurrentMid; /* we do not want to loop forever */
@@ -252,24 +249,38 @@ __u16 GetNextMid(struct TCP_Server_Info *server)
252 (and it would also have to have been a request that 249 (and it would also have to have been a request that
253 did not time out) */ 250 did not time out) */
254 while (server->CurrentMid != last_mid) { 251 while (server->CurrentMid != last_mid) {
255 struct list_head *tmp;
256 struct mid_q_entry *mid_entry; 252 struct mid_q_entry *mid_entry;
253 unsigned int num_mids;
257 254
258 collision = 0; 255 collision = false;
259 if (server->CurrentMid == 0) 256 if (server->CurrentMid == 0)
260 server->CurrentMid++; 257 server->CurrentMid++;
261 258
262 list_for_each(tmp, &server->pending_mid_q) { 259 num_mids = 0;
263 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 260 list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) {
264 261 ++num_mids;
265 if ((mid_entry->mid == server->CurrentMid) && 262 if (mid_entry->mid == server->CurrentMid &&
266 (mid_entry->midState == MID_REQUEST_SUBMITTED)) { 263 mid_entry->midState == MID_REQUEST_SUBMITTED) {
267 /* This mid is in use, try a different one */ 264 /* This mid is in use, try a different one */
268 collision = 1; 265 collision = true;
269 break; 266 break;
270 } 267 }
271 } 268 }
272 if (collision == 0) { 269
270 /*
271 * if we have more than 32k mids in the list, then something
272 * is very wrong. Possibly a local user is trying to DoS the
273 * box by issuing long-running calls and SIGKILL'ing them. If
274 * we get to 2^16 mids then we're in big trouble as this
275 * function could loop forever.
276 *
277 * Go ahead and assign out the mid in this situation, but force
278 * an eventual reconnect to clean out the pending_mid_q.
279 */
280 if (num_mids > 32768)
281 server->tcpStatus = CifsNeedReconnect;
282
283 if (!collision) {
273 mid = server->CurrentMid; 284 mid = server->CurrentMid;
274 break; 285 break;
275 } 286 }
@@ -381,29 +392,31 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
381} 392}
382 393
383static int 394static int
384checkSMBhdr(struct smb_hdr *smb, __u16 mid) 395check_smb_hdr(struct smb_hdr *smb, __u16 mid)
385{ 396{
386 /* Make sure that this really is an SMB, that it is a response, 397 /* does it have the right SMB "signature" ? */
387 and that the message ids match */ 398 if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) {
388 if ((*(__le32 *) smb->Protocol == cpu_to_le32(0x424d53ff)) && 399 cERROR(1, "Bad protocol string signature header 0x%x",
389 (mid == smb->Mid)) { 400 *(unsigned int *)smb->Protocol);
390 if (smb->Flags & SMBFLG_RESPONSE) 401 return 1;
391 return 0; 402 }
392 else { 403
393 /* only one valid case where server sends us request */ 404 /* Make sure that message ids match */
394 if (smb->Command == SMB_COM_LOCKING_ANDX) 405 if (mid != smb->Mid) {
395 return 0; 406 cERROR(1, "Mids do not match. received=%u expected=%u",
396 else 407 smb->Mid, mid);
397 cERROR(1, "Received Request not response"); 408 return 1;
398 }
399 } else { /* bad signature or mid */
400 if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff))
401 cERROR(1, "Bad protocol string signature header %x",
402 *(unsigned int *) smb->Protocol);
403 if (mid != smb->Mid)
404 cERROR(1, "Mids do not match");
405 } 409 }
406 cERROR(1, "bad smb detected. The Mid=%d", smb->Mid); 410
411 /* if it's a response then accept */
412 if (smb->Flags & SMBFLG_RESPONSE)
413 return 0;
414
415 /* only one valid case where server sends us request */
416 if (smb->Command == SMB_COM_LOCKING_ANDX)
417 return 0;
418
419 cERROR(1, "Server sent request, not response. mid=%u", smb->Mid);
407 return 1; 420 return 1;
408} 421}
409 422
@@ -448,7 +461,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
448 return 1; 461 return 1;
449 } 462 }
450 463
451 if (checkSMBhdr(smb, mid)) 464 if (check_smb_hdr(smb, mid))
452 return 1; 465 return 1;
453 clc_len = smbCalcSize_LE(smb); 466 clc_len = smbCalcSize_LE(smb);
454 467
@@ -465,25 +478,26 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
465 if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF)) 478 if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF))
466 return 0; /* bcc wrapped */ 479 return 0; /* bcc wrapped */
467 } 480 }
468 cFYI(1, "Calculated size %d vs length %d mismatch for mid %d", 481 cFYI(1, "Calculated size %u vs length %u mismatch for mid=%u",
469 clc_len, 4 + len, smb->Mid); 482 clc_len, 4 + len, smb->Mid);
470 /* Windows XP can return a few bytes too much, presumably 483
471 an illegal pad, at the end of byte range lock responses 484 if (4 + len < clc_len) {
472 so we allow for that three byte pad, as long as actual 485 cERROR(1, "RFC1001 size %u smaller than SMB for mid=%u",
473 received length is as long or longer than calculated length */
474 /* We have now had to extend this more, since there is a
475 case in which it needs to be bigger still to handle a
476 malformed response to transact2 findfirst from WinXP when
477 access denied is returned and thus bcc and wct are zero
478 but server says length is 0x21 bytes too long as if the server
479 forget to reset the smb rfc1001 length when it reset the
480 wct and bcc to minimum size and drop the t2 parms and data */
481 if ((4+len > clc_len) && (len <= clc_len + 512))
482 return 0;
483 else {
484 cERROR(1, "RFC1001 size %d bigger than SMB for Mid=%d",
485 len, smb->Mid); 486 len, smb->Mid);
486 return 1; 487 return 1;
488 } else if (len > clc_len + 512) {
489 /*
490 * Some servers (Windows XP in particular) send more
491 * data than the lengths in the SMB packet would
492 * indicate on certain calls (byte range locks and
493 * trans2 find first calls in particular). While the
494 * client can handle such a frame by ignoring the
495 * trailing data, we choose limit the amount of extra
496 * data to 512 bytes.
497 */
498 cERROR(1, "RFC1001 size %u more than 512 bytes larger "
499 "than SMB for mid=%u", len, smb->Mid);
500 return 1;
487 } 501 }
488 } 502 }
489 return 0; 503 return 0;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 8d9189f64477..79f641eeda30 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -170,7 +170,7 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
170{ 170{
171 int rc, alen, slen; 171 int rc, alen, slen;
172 const char *pct; 172 const char *pct;
173 char *endp, scope_id[13]; 173 char scope_id[13];
174 struct sockaddr_in *s4 = (struct sockaddr_in *) dst; 174 struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
175 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; 175 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
176 176
@@ -197,9 +197,9 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
197 memcpy(scope_id, pct + 1, slen); 197 memcpy(scope_id, pct + 1, slen);
198 scope_id[slen] = '\0'; 198 scope_id[slen] = '\0';
199 199
200 s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); 200 rc = strict_strtoul(scope_id, 0,
201 if (endp != scope_id + slen) 201 (unsigned long *)&s6->sin6_scope_id);
202 return 0; 202 rc = (rc == 0) ? 1 : 0;
203 } 203 }
204 204
205 return rc; 205 return rc;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 7f25cc3d2256..f8e4cd2a7912 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -764,7 +764,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
764{ 764{
765 int rc = 0; 765 int rc = 0;
766 int xid, i; 766 int xid, i;
767 struct cifs_sb_info *cifs_sb;
768 struct cifsTconInfo *pTcon; 767 struct cifsTconInfo *pTcon;
769 struct cifsFileInfo *cifsFile = NULL; 768 struct cifsFileInfo *cifsFile = NULL;
770 char *current_entry; 769 char *current_entry;
@@ -775,8 +774,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
775 774
776 xid = GetXid(); 775 xid = GetXid();
777 776
778 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
779
780 /* 777 /*
781 * Ensure FindFirst doesn't fail before doing filldir() for '.' and 778 * Ensure FindFirst doesn't fail before doing filldir() for '.' and
782 * '..'. Otherwise we won't be able to notify VFS in case of failure. 779 * '..'. Otherwise we won't be able to notify VFS in case of failure.
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 1adc9625a344..16765703131b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -656,13 +656,13 @@ ssetup_ntlmssp_authenticate:
656 656
657 if (type == LANMAN) { 657 if (type == LANMAN) {
658#ifdef CONFIG_CIFS_WEAK_PW_HASH 658#ifdef CONFIG_CIFS_WEAK_PW_HASH
659 char lnm_session_key[CIFS_SESS_KEY_SIZE]; 659 char lnm_session_key[CIFS_AUTH_RESP_SIZE];
660 660
661 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; 661 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
662 662
663 /* no capabilities flags in old lanman negotiation */ 663 /* no capabilities flags in old lanman negotiation */
664 664
665 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); 665 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
666 666
667 /* Calculate hash with password and copy into bcc_ptr. 667 /* Calculate hash with password and copy into bcc_ptr.
668 * Encryption Key (stored as in cryptkey) gets used if the 668 * Encryption Key (stored as in cryptkey) gets used if the
@@ -675,8 +675,8 @@ ssetup_ntlmssp_authenticate:
675 true : false, lnm_session_key); 675 true : false, lnm_session_key);
676 676
677 ses->flags |= CIFS_SES_LANMAN; 677 ses->flags |= CIFS_SES_LANMAN;
678 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE); 678 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
679 bcc_ptr += CIFS_SESS_KEY_SIZE; 679 bcc_ptr += CIFS_AUTH_RESP_SIZE;
680 680
681 /* can not sign if LANMAN negotiated so no need 681 /* can not sign if LANMAN negotiated so no need
682 to calculate signing key? but what if server 682 to calculate signing key? but what if server
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index b6b6dcb500bf..04721485925d 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -45,7 +45,6 @@
45 up with a different answer to the one above) 45 up with a different answer to the one above)
46*/ 46*/
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include "cifsencrypt.h"
49#define uchar unsigned char 48#define uchar unsigned char
50 49
51static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9, 50static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 192ea51af20f..b5041c849981 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -32,9 +32,8 @@
32#include "cifs_unicode.h" 32#include "cifs_unicode.h"
33#include "cifspdu.h" 33#include "cifspdu.h"
34#include "cifsglob.h" 34#include "cifsglob.h"
35#include "md5.h"
36#include "cifs_debug.h" 35#include "cifs_debug.h"
37#include "cifsencrypt.h" 36#include "cifsproto.h"
38 37
39#ifndef false 38#ifndef false
40#define false 0 39#define false 0
@@ -48,14 +47,58 @@
48#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) 47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
49#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val))) 48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
50 49
51/*The following definitions come from libsmb/smbencrypt.c */ 50/* produce a md4 message digest from data of length n bytes */
51int
52mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
53{
54 int rc;
55 unsigned int size;
56 struct crypto_shash *md4;
57 struct sdesc *sdescmd4;
58
59 md4 = crypto_alloc_shash("md4", 0, 0);
60 if (IS_ERR(md4)) {
61 rc = PTR_ERR(md4);
62 cERROR(1, "%s: Crypto md4 allocation error %d\n", __func__, rc);
63 return rc;
64 }
65 size = sizeof(struct shash_desc) + crypto_shash_descsize(md4);
66 sdescmd4 = kmalloc(size, GFP_KERNEL);
67 if (!sdescmd4) {
68 rc = -ENOMEM;
69 cERROR(1, "%s: Memory allocation failure\n", __func__);
70 goto mdfour_err;
71 }
72 sdescmd4->shash.tfm = md4;
73 sdescmd4->shash.flags = 0x0;
74
75 rc = crypto_shash_init(&sdescmd4->shash);
76 if (rc) {
77 cERROR(1, "%s: Could not init md4 shash\n", __func__);
78 goto mdfour_err;
79 }
80 crypto_shash_update(&sdescmd4->shash, link_str, link_len);
81 rc = crypto_shash_final(&sdescmd4->shash, md4_hash);
52 82
53void SMBencrypt(unsigned char *passwd, const unsigned char *c8, 83mdfour_err:
54 unsigned char *p24); 84 crypto_free_shash(md4);
55void E_md4hash(const unsigned char *passwd, unsigned char *p16); 85 kfree(sdescmd4);
56static void SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8, 86
57 unsigned char p24[24]); 87 return rc;
58void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); 88}
89
90/* Does the des encryption from the NT or LM MD4 hash. */
91static void
92SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
93 unsigned char p24[24])
94{
95 unsigned char p21[21];
96
97 memset(p21, '\0', 21);
98
99 memcpy(p21, passwd, 16);
100 E_P24(p21, c8, p24);
101}
59 102
60/* 103/*
61 This implements the X/Open SMB password encryption 104 This implements the X/Open SMB password encryption
@@ -118,9 +161,10 @@ _my_mbstowcs(__u16 *dst, const unsigned char *src, int len)
118 * Creates the MD4 Hash of the users password in NT UNICODE. 161 * Creates the MD4 Hash of the users password in NT UNICODE.
119 */ 162 */
120 163
121void 164int
122E_md4hash(const unsigned char *passwd, unsigned char *p16) 165E_md4hash(const unsigned char *passwd, unsigned char *p16)
123{ 166{
167 int rc;
124 int len; 168 int len;
125 __u16 wpwd[129]; 169 __u16 wpwd[129];
126 170
@@ -139,8 +183,10 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16)
139 /* Calculate length in bytes */ 183 /* Calculate length in bytes */
140 len = _my_wcslen(wpwd) * sizeof(__u16); 184 len = _my_wcslen(wpwd) * sizeof(__u16);
141 185
142 mdfour(p16, (unsigned char *) wpwd, len); 186 rc = mdfour(p16, (unsigned char *) wpwd, len);
143 memset(wpwd, 0, 129 * 2); 187 memset(wpwd, 0, 129 * 2);
188
189 return rc;
144} 190}
145 191
146#if 0 /* currently unused */ 192#if 0 /* currently unused */
@@ -212,19 +258,6 @@ ntv2_owf_gen(const unsigned char owf[16], const char *user_n,
212} 258}
213#endif 259#endif
214 260
215/* Does the des encryption from the NT or LM MD4 hash. */
216static void
217SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
218 unsigned char p24[24])
219{
220 unsigned char p21[21];
221
222 memset(p21, '\0', 21);
223
224 memcpy(p21, passwd, 16);
225 E_P24(p21, c8, p24);
226}
227
228/* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */ 261/* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */
229#if 0 /* currently unused */ 262#if 0 /* currently unused */
230static void 263static void
@@ -242,16 +275,21 @@ NTLMSSPOWFencrypt(unsigned char passwd[8],
242#endif 275#endif
243 276
244/* Does the NT MD4 hash then des encryption. */ 277/* Does the NT MD4 hash then des encryption. */
245 278int
246void
247SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) 279SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
248{ 280{
281 int rc;
249 unsigned char p21[21]; 282 unsigned char p21[21];
250 283
251 memset(p21, '\0', 21); 284 memset(p21, '\0', 21);
252 285
253 E_md4hash(passwd, p21); 286 rc = E_md4hash(passwd, p21);
287 if (rc) {
288 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
289 return rc;
290 }
254 SMBOWFencrypt(p21, c8, p24); 291 SMBOWFencrypt(p21, c8, p24);
292 return rc;
255} 293}
256 294
257 295
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c1ccca1a933f..46d8756f2b24 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -236,9 +236,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
236 server->tcpStatus = CifsNeedReconnect; 236 server->tcpStatus = CifsNeedReconnect;
237 } 237 }
238 238
239 if (rc < 0) { 239 if (rc < 0 && rc != -EINTR)
240 cERROR(1, "Error %d sending data on socket to server", rc); 240 cERROR(1, "Error %d sending data on socket to server", rc);
241 } else 241 else
242 rc = 0; 242 rc = 0;
243 243
244 /* Don't want to modify the buffer as a 244 /* Don't want to modify the buffer as a
@@ -359,6 +359,10 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
359 if (rc) 359 if (rc)
360 return rc; 360 return rc;
361 361
362 /* enable signing if server requires it */
363 if (server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
364 in_buf->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
365
362 mutex_lock(&server->srv_mutex); 366 mutex_lock(&server->srv_mutex);
363 mid = AllocMidQEntry(in_buf, server); 367 mid = AllocMidQEntry(in_buf, server);
364 if (mid == NULL) { 368 if (mid == NULL) {
@@ -453,6 +457,9 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
453 case MID_RETRY_NEEDED: 457 case MID_RETRY_NEEDED:
454 rc = -EAGAIN; 458 rc = -EAGAIN;
455 break; 459 break;
460 case MID_RESPONSE_MALFORMED:
461 rc = -EIO;
462 break;
456 default: 463 default:
457 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__, 464 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__,
458 mid->mid, mid->midState); 465 mid->mid, mid->midState);
@@ -570,17 +577,33 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
570#endif 577#endif
571 578
572 mutex_unlock(&ses->server->srv_mutex); 579 mutex_unlock(&ses->server->srv_mutex);
573 cifs_small_buf_release(in_buf);
574 580
575 if (rc < 0) 581 if (rc < 0) {
582 cifs_small_buf_release(in_buf);
576 goto out; 583 goto out;
584 }
577 585
578 if (long_op == CIFS_ASYNC_OP) 586 if (long_op == CIFS_ASYNC_OP) {
587 cifs_small_buf_release(in_buf);
579 goto out; 588 goto out;
589 }
580 590
581 rc = wait_for_response(ses->server, midQ); 591 rc = wait_for_response(ses->server, midQ);
582 if (rc != 0) 592 if (rc != 0) {
583 goto out; 593 send_nt_cancel(ses->server, in_buf, midQ);
594 spin_lock(&GlobalMid_Lock);
595 if (midQ->midState == MID_REQUEST_SUBMITTED) {
596 midQ->callback = DeleteMidQEntry;
597 spin_unlock(&GlobalMid_Lock);
598 cifs_small_buf_release(in_buf);
599 atomic_dec(&ses->server->inFlight);
600 wake_up(&ses->server->request_q);
601 return rc;
602 }
603 spin_unlock(&GlobalMid_Lock);
604 }
605
606 cifs_small_buf_release(in_buf);
584 607
585 rc = sync_mid_result(midQ, ses->server); 608 rc = sync_mid_result(midQ, ses->server);
586 if (rc != 0) { 609 if (rc != 0) {
@@ -724,8 +747,19 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
724 goto out; 747 goto out;
725 748
726 rc = wait_for_response(ses->server, midQ); 749 rc = wait_for_response(ses->server, midQ);
727 if (rc != 0) 750 if (rc != 0) {
728 goto out; 751 send_nt_cancel(ses->server, in_buf, midQ);
752 spin_lock(&GlobalMid_Lock);
753 if (midQ->midState == MID_REQUEST_SUBMITTED) {
754 /* no longer considered to be "in-flight" */
755 midQ->callback = DeleteMidQEntry;
756 spin_unlock(&GlobalMid_Lock);
757 atomic_dec(&ses->server->inFlight);
758 wake_up(&ses->server->request_q);
759 return rc;
760 }
761 spin_unlock(&GlobalMid_Lock);
762 }
729 763
730 rc = sync_mid_result(midQ, ses->server); 764 rc = sync_mid_result(midQ, ses->server);
731 if (rc != 0) { 765 if (rc != 0) {
@@ -922,10 +956,21 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
922 } 956 }
923 } 957 }
924 958
925 if (wait_for_response(ses->server, midQ) == 0) { 959 rc = wait_for_response(ses->server, midQ);
926 /* We got the response - restart system call. */ 960 if (rc) {
927 rstart = 1; 961 send_nt_cancel(ses->server, in_buf, midQ);
962 spin_lock(&GlobalMid_Lock);
963 if (midQ->midState == MID_REQUEST_SUBMITTED) {
964 /* no longer considered to be "in-flight" */
965 midQ->callback = DeleteMidQEntry;
966 spin_unlock(&GlobalMid_Lock);
967 return rc;
968 }
969 spin_unlock(&GlobalMid_Lock);
928 } 970 }
971
972 /* We got the response - restart system call. */
973 rstart = 1;
929 } 974 }
930 975
931 rc = sync_mid_result(midQ, ses->server); 976 rc = sync_mid_result(midQ, ses->server);
diff --git a/fs/dcache.c b/fs/dcache.c
index 9f493ee4dcba..2a6bd9a4ae97 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -176,6 +176,7 @@ static void d_free(struct dentry *dentry)
176 176
177/** 177/**
178 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups 178 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
179 * @dentry: the target dentry
179 * After this call, in-progress rcu-walk path lookup will fail. This 180 * After this call, in-progress rcu-walk path lookup will fail. This
180 * should be called after unhashing, and after changing d_inode (if 181 * should be called after unhashing, and after changing d_inode (if
181 * the dentry has not already been unhashed). 182 * the dentry has not already been unhashed).
@@ -281,6 +282,7 @@ static void dentry_lru_move_tail(struct dentry *dentry)
281/** 282/**
282 * d_kill - kill dentry and return parent 283 * d_kill - kill dentry and return parent
283 * @dentry: dentry to kill 284 * @dentry: dentry to kill
285 * @parent: parent dentry
284 * 286 *
285 * The dentry must already be unhashed and removed from the LRU. 287 * The dentry must already be unhashed and removed from the LRU.
286 * 288 *
@@ -1973,7 +1975,7 @@ out:
1973/** 1975/**
1974 * d_validate - verify dentry provided from insecure source (deprecated) 1976 * d_validate - verify dentry provided from insecure source (deprecated)
1975 * @dentry: The dentry alleged to be valid child of @dparent 1977 * @dentry: The dentry alleged to be valid child of @dparent
1976 * @parent: The parent dentry (known to be valid) 1978 * @dparent: The parent dentry (known to be valid)
1977 * 1979 *
1978 * An insecure source has sent us a dentry, here we verify it and dget() it. 1980 * An insecure source has sent us a dentry, here we verify it and dget() it.
1979 * This is used by ncpfs in its readdir implementation. 1981 * This is used by ncpfs in its readdir implementation.
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9c64ae9e4c1a..2d8c87b951c2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1468,15 +1468,13 @@ static void work_stop(void)
1468 1468
1469static int work_start(void) 1469static int work_start(void)
1470{ 1470{
1471 recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM | 1471 recv_workqueue = create_singlethread_workqueue("dlm_recv");
1472 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1473 if (!recv_workqueue) { 1472 if (!recv_workqueue) {
1474 log_print("can't start dlm_recv"); 1473 log_print("can't start dlm_recv");
1475 return -ENOMEM; 1474 return -ENOMEM;
1476 } 1475 }
1477 1476
1478 send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM | 1477 send_workqueue = create_singlethread_workqueue("dlm_send");
1479 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1480 if (!send_workqueue) { 1478 if (!send_workqueue) {
1481 log_print("can't start dlm_send"); 1479 log_print("can't start dlm_send");
1482 destroy_workqueue(recv_workqueue); 1480 destroy_workqueue(recv_workqueue);
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 6fc4f319b550..534c1d46e69e 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -46,24 +46,28 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
46{ 46{
47 struct dentry *lower_dentry; 47 struct dentry *lower_dentry;
48 struct vfsmount *lower_mnt; 48 struct vfsmount *lower_mnt;
49 struct dentry *dentry_save; 49 struct dentry *dentry_save = NULL;
50 struct vfsmount *vfsmount_save; 50 struct vfsmount *vfsmount_save = NULL;
51 int rc = 1; 51 int rc = 1;
52 52
53 if (nd->flags & LOOKUP_RCU) 53 if (nd && nd->flags & LOOKUP_RCU)
54 return -ECHILD; 54 return -ECHILD;
55 55
56 lower_dentry = ecryptfs_dentry_to_lower(dentry); 56 lower_dentry = ecryptfs_dentry_to_lower(dentry);
57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) 58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
59 goto out; 59 goto out;
60 dentry_save = nd->path.dentry; 60 if (nd) {
61 vfsmount_save = nd->path.mnt; 61 dentry_save = nd->path.dentry;
62 nd->path.dentry = lower_dentry; 62 vfsmount_save = nd->path.mnt;
63 nd->path.mnt = lower_mnt; 63 nd->path.dentry = lower_dentry;
64 nd->path.mnt = lower_mnt;
65 }
64 rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); 66 rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
65 nd->path.dentry = dentry_save; 67 if (nd) {
66 nd->path.mnt = vfsmount_save; 68 nd->path.dentry = dentry_save;
69 nd->path.mnt = vfsmount_save;
70 }
67 if (dentry->d_inode) { 71 if (dentry->d_inode) {
68 struct inode *lower_inode = 72 struct inode *lower_inode =
69 ecryptfs_inode_to_lower(dentry->d_inode); 73 ecryptfs_inode_to_lower(dentry->d_inode);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index dbc84ed96336..e00753496e3e 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -632,8 +632,7 @@ int ecryptfs_interpose(struct dentry *hidden_dentry,
632 u32 flags); 632 u32 flags);
633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
634 struct dentry *lower_dentry, 634 struct dentry *lower_dentry,
635 struct inode *ecryptfs_dir_inode, 635 struct inode *ecryptfs_dir_inode);
636 struct nameidata *ecryptfs_nd);
637int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, 636int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
638 size_t *decrypted_name_size, 637 size_t *decrypted_name_size,
639 struct dentry *ecryptfs_dentry, 638 struct dentry *ecryptfs_dentry,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 81e10e6a9443..7d1050e254f9 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -317,6 +317,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
317 317
318const struct file_operations ecryptfs_dir_fops = { 318const struct file_operations ecryptfs_dir_fops = {
319 .readdir = ecryptfs_readdir, 319 .readdir = ecryptfs_readdir,
320 .read = generic_read_dir,
320 .unlocked_ioctl = ecryptfs_unlocked_ioctl, 321 .unlocked_ioctl = ecryptfs_unlocked_ioctl,
321#ifdef CONFIG_COMPAT 322#ifdef CONFIG_COMPAT
322 .compat_ioctl = ecryptfs_compat_ioctl, 323 .compat_ioctl = ecryptfs_compat_ioctl,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index bd33f87a1907..b592938a84bc 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -74,16 +74,20 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
74 unsigned int flags_save; 74 unsigned int flags_save;
75 int rc; 75 int rc;
76 76
77 dentry_save = nd->path.dentry; 77 if (nd) {
78 vfsmount_save = nd->path.mnt; 78 dentry_save = nd->path.dentry;
79 flags_save = nd->flags; 79 vfsmount_save = nd->path.mnt;
80 nd->path.dentry = lower_dentry; 80 flags_save = nd->flags;
81 nd->path.mnt = lower_mnt; 81 nd->path.dentry = lower_dentry;
82 nd->flags &= ~LOOKUP_OPEN; 82 nd->path.mnt = lower_mnt;
83 nd->flags &= ~LOOKUP_OPEN;
84 }
83 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); 85 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
84 nd->path.dentry = dentry_save; 86 if (nd) {
85 nd->path.mnt = vfsmount_save; 87 nd->path.dentry = dentry_save;
86 nd->flags = flags_save; 88 nd->path.mnt = vfsmount_save;
89 nd->flags = flags_save;
90 }
87 return rc; 91 return rc;
88} 92}
89 93
@@ -241,8 +245,7 @@ out:
241 */ 245 */
242int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 246int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
243 struct dentry *lower_dentry, 247 struct dentry *lower_dentry,
244 struct inode *ecryptfs_dir_inode, 248 struct inode *ecryptfs_dir_inode)
245 struct nameidata *ecryptfs_nd)
246{ 249{
247 struct dentry *lower_dir_dentry; 250 struct dentry *lower_dir_dentry;
248 struct vfsmount *lower_mnt; 251 struct vfsmount *lower_mnt;
@@ -290,8 +293,6 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
290 goto out; 293 goto out;
291 if (special_file(lower_inode->i_mode)) 294 if (special_file(lower_inode->i_mode))
292 goto out; 295 goto out;
293 if (!ecryptfs_nd)
294 goto out;
295 /* Released in this function */ 296 /* Released in this function */
296 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER); 297 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER);
297 if (!page_virt) { 298 if (!page_virt) {
@@ -349,75 +350,6 @@ out:
349} 350}
350 351
351/** 352/**
352 * ecryptfs_new_lower_dentry
353 * @name: The name of the new dentry.
354 * @lower_dir_dentry: Parent directory of the new dentry.
355 * @nd: nameidata from last lookup.
356 *
357 * Create a new dentry or get it from lower parent dir.
358 */
359static struct dentry *
360ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
361 struct nameidata *nd)
362{
363 struct dentry *new_dentry;
364 struct dentry *tmp;
365 struct inode *lower_dir_inode;
366
367 lower_dir_inode = lower_dir_dentry->d_inode;
368
369 tmp = d_alloc(lower_dir_dentry, name);
370 if (!tmp)
371 return ERR_PTR(-ENOMEM);
372
373 mutex_lock(&lower_dir_inode->i_mutex);
374 new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd);
375 mutex_unlock(&lower_dir_inode->i_mutex);
376
377 if (!new_dentry)
378 new_dentry = tmp;
379 else
380 dput(tmp);
381
382 return new_dentry;
383}
384
385
386/**
387 * ecryptfs_lookup_one_lower
388 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
389 * @lower_dir_dentry: lower parent directory
390 * @name: lower file name
391 *
392 * Get the lower dentry from vfs. If lower dentry does not exist yet,
393 * create it.
394 */
395static struct dentry *
396ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
397 struct dentry *lower_dir_dentry, struct qstr *name)
398{
399 struct nameidata nd;
400 struct vfsmount *lower_mnt;
401 int err;
402
403 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
404 ecryptfs_dentry->d_parent));
405 err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
406 mntput(lower_mnt);
407
408 if (!err) {
409 /* we dont need the mount */
410 mntput(nd.path.mnt);
411 return nd.path.dentry;
412 }
413 if (err != -ENOENT)
414 return ERR_PTR(err);
415
416 /* create a new lower dentry */
417 return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd);
418}
419
420/**
421 * ecryptfs_lookup 353 * ecryptfs_lookup
422 * @ecryptfs_dir_inode: The eCryptfs directory inode 354 * @ecryptfs_dir_inode: The eCryptfs directory inode
423 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up 355 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
@@ -434,7 +366,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
434 size_t encrypted_and_encoded_name_size; 366 size_t encrypted_and_encoded_name_size;
435 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; 367 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
436 struct dentry *lower_dir_dentry, *lower_dentry; 368 struct dentry *lower_dir_dentry, *lower_dentry;
437 struct qstr lower_name;
438 int rc = 0; 369 int rc = 0;
439 370
440 if ((ecryptfs_dentry->d_name.len == 1 371 if ((ecryptfs_dentry->d_name.len == 1
@@ -444,20 +375,14 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
444 goto out_d_drop; 375 goto out_d_drop;
445 } 376 }
446 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); 377 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
447 lower_name.name = ecryptfs_dentry->d_name.name; 378 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
448 lower_name.len = ecryptfs_dentry->d_name.len; 379 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
449 lower_name.hash = ecryptfs_dentry->d_name.hash; 380 lower_dir_dentry,
450 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 381 ecryptfs_dentry->d_name.len);
451 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 382 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
452 lower_dir_dentry->d_inode, &lower_name);
453 if (rc < 0)
454 goto out_d_drop;
455 }
456 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
457 lower_dir_dentry, &lower_name);
458 if (IS_ERR(lower_dentry)) { 383 if (IS_ERR(lower_dentry)) {
459 rc = PTR_ERR(lower_dentry); 384 rc = PTR_ERR(lower_dentry);
460 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " 385 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
461 "[%d] on lower_dentry = [%s]\n", __func__, rc, 386 "[%d] on lower_dentry = [%s]\n", __func__, rc,
462 encrypted_and_encoded_name); 387 encrypted_and_encoded_name);
463 goto out_d_drop; 388 goto out_d_drop;
@@ -479,28 +404,21 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
479 "filename; rc = [%d]\n", __func__, rc); 404 "filename; rc = [%d]\n", __func__, rc);
480 goto out_d_drop; 405 goto out_d_drop;
481 } 406 }
482 lower_name.name = encrypted_and_encoded_name; 407 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
483 lower_name.len = encrypted_and_encoded_name_size; 408 lower_dentry = lookup_one_len(encrypted_and_encoded_name,
484 lower_name.hash = full_name_hash(lower_name.name, lower_name.len); 409 lower_dir_dentry,
485 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 410 encrypted_and_encoded_name_size);
486 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 411 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
487 lower_dir_dentry->d_inode, &lower_name);
488 if (rc < 0)
489 goto out_d_drop;
490 }
491 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
492 lower_dir_dentry, &lower_name);
493 if (IS_ERR(lower_dentry)) { 412 if (IS_ERR(lower_dentry)) {
494 rc = PTR_ERR(lower_dentry); 413 rc = PTR_ERR(lower_dentry);
495 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " 414 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
496 "[%d] on lower_dentry = [%s]\n", __func__, rc, 415 "[%d] on lower_dentry = [%s]\n", __func__, rc,
497 encrypted_and_encoded_name); 416 encrypted_and_encoded_name);
498 goto out_d_drop; 417 goto out_d_drop;
499 } 418 }
500lookup_and_interpose: 419lookup_and_interpose:
501 rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry, 420 rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
502 ecryptfs_dir_inode, 421 ecryptfs_dir_inode);
503 ecryptfs_nd);
504 goto out; 422 goto out;
505out_d_drop: 423out_d_drop:
506 d_drop(ecryptfs_dentry); 424 d_drop(ecryptfs_dentry);
@@ -1092,6 +1010,8 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1092 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry), 1010 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
1093 ecryptfs_dentry_to_lower(dentry), &lower_stat); 1011 ecryptfs_dentry_to_lower(dentry), &lower_stat);
1094 if (!rc) { 1012 if (!rc) {
1013 fsstack_copy_attr_all(dentry->d_inode,
1014 ecryptfs_inode_to_lower(dentry->d_inode));
1095 generic_fillattr(dentry->d_inode, stat); 1015 generic_fillattr(dentry->d_inode, stat);
1096 stat->blocks = lower_stat.blocks; 1016 stat->blocks = lower_stat.blocks;
1097 } 1017 }
diff --git a/fs/eventfd.c b/fs/eventfd.c
index e0194b3e14d6..d9a591773919 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -99,7 +99,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_get);
99 * @ctx: [in] Pointer to eventfd context. 99 * @ctx: [in] Pointer to eventfd context.
100 * 100 *
101 * The eventfd context reference must have been previously acquired either 101 * The eventfd context reference must have been previously acquired either
102 * with eventfd_ctx_get() or eventfd_ctx_fdget()). 102 * with eventfd_ctx_get() or eventfd_ctx_fdget().
103 */ 103 */
104void eventfd_ctx_put(struct eventfd_ctx *ctx) 104void eventfd_ctx_put(struct eventfd_ctx *ctx)
105{ 105{
@@ -146,9 +146,9 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
146 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. 146 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
147 * @ctx: [in] Pointer to eventfd context. 147 * @ctx: [in] Pointer to eventfd context.
148 * @wait: [in] Wait queue to be removed. 148 * @wait: [in] Wait queue to be removed.
149 * @cnt: [out] Pointer to the 64bit conter value. 149 * @cnt: [out] Pointer to the 64-bit counter value.
150 * 150 *
151 * Returns zero if successful, or the following error codes: 151 * Returns %0 if successful, or the following error codes:
152 * 152 *
153 * -EAGAIN : The operation would have blocked. 153 * -EAGAIN : The operation would have blocked.
154 * 154 *
@@ -175,11 +175,11 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
175 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero. 175 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
176 * @ctx: [in] Pointer to eventfd context. 176 * @ctx: [in] Pointer to eventfd context.
177 * @no_wait: [in] Different from zero if the operation should not block. 177 * @no_wait: [in] Different from zero if the operation should not block.
178 * @cnt: [out] Pointer to the 64bit conter value. 178 * @cnt: [out] Pointer to the 64-bit counter value.
179 * 179 *
180 * Returns zero if successful, or the following error codes: 180 * Returns %0 if successful, or the following error codes:
181 * 181 *
182 * -EAGAIN : The operation would have blocked but @no_wait was nonzero. 182 * -EAGAIN : The operation would have blocked but @no_wait was non-zero.
183 * -ERESTARTSYS : A signal interrupted the wait operation. 183 * -ERESTARTSYS : A signal interrupted the wait operation.
184 * 184 *
185 * If @no_wait is zero, the function might sleep until the eventfd internal 185 * If @no_wait is zero, the function might sleep until the eventfd internal
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cc8a9b7d6064..4a09af9e9a63 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
63 * cleanup path and it is also acquired by eventpoll_release_file() 63 * cleanup path and it is also acquired by eventpoll_release_file()
64 * if a file has been pushed inside an epoll set and it is then 64 * if a file has been pushed inside an epoll set and it is then
65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). 65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
66 * It is also acquired when inserting an epoll fd onto another epoll
67 * fd. We do this so that we walk the epoll tree and ensure that this
68 * insertion does not create a cycle of epoll file descriptors, which
69 * could lead to deadlock. We need a global mutex to prevent two
70 * simultaneous inserts (A into B and B into A) from racing and
71 * constructing a cycle without either insert observing that it is
72 * going to.
66 * It is possible to drop the "ep->mtx" and to use the global 73 * It is possible to drop the "ep->mtx" and to use the global
67 * mutex "epmutex" (together with "ep->lock") to have it working, 74 * mutex "epmutex" (together with "ep->lock") to have it working,
68 * but having "ep->mtx" will make the interface more scalable. 75 * but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
224 */ 231 */
225static DEFINE_MUTEX(epmutex); 232static DEFINE_MUTEX(epmutex);
226 233
234/* Used to check for epoll file descriptor inclusion loops */
235static struct nested_calls poll_loop_ncalls;
236
227/* Used for safe wake up implementation */ 237/* Used for safe wake up implementation */
228static struct nested_calls poll_safewake_ncalls; 238static struct nested_calls poll_safewake_ncalls;
229 239
@@ -1114,6 +1124,17 @@ static int ep_send_events(struct eventpoll *ep,
1114 return ep_scan_ready_list(ep, ep_send_events_proc, &esed); 1124 return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
1115} 1125}
1116 1126
1127static inline struct timespec ep_set_mstimeout(long ms)
1128{
1129 struct timespec now, ts = {
1130 .tv_sec = ms / MSEC_PER_SEC,
1131 .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
1132 };
1133
1134 ktime_get_ts(&now);
1135 return timespec_add_safe(now, ts);
1136}
1137
1117static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, 1138static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1118 int maxevents, long timeout) 1139 int maxevents, long timeout)
1119{ 1140{
@@ -1121,12 +1142,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1121 unsigned long flags; 1142 unsigned long flags;
1122 long slack; 1143 long slack;
1123 wait_queue_t wait; 1144 wait_queue_t wait;
1124 struct timespec end_time;
1125 ktime_t expires, *to = NULL; 1145 ktime_t expires, *to = NULL;
1126 1146
1127 if (timeout > 0) { 1147 if (timeout > 0) {
1128 ktime_get_ts(&end_time); 1148 struct timespec end_time = ep_set_mstimeout(timeout);
1129 timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); 1149
1130 slack = select_estimate_accuracy(&end_time); 1150 slack = select_estimate_accuracy(&end_time);
1131 to = &expires; 1151 to = &expires;
1132 *to = timespec_to_ktime(end_time); 1152 *to = timespec_to_ktime(end_time);
@@ -1188,6 +1208,62 @@ retry:
1188 return res; 1208 return res;
1189} 1209}
1190 1210
1211/**
1212 * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
1213 * API, to verify that adding an epoll file inside another
1214 * epoll structure, does not violate the constraints, in
1215 * terms of closed loops, or too deep chains (which can
1216 * result in excessive stack usage).
1217 *
1218 * @priv: Pointer to the epoll file to be currently checked.
1219 * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
1220 * data structure pointer.
1221 * @call_nests: Current dept of the @ep_call_nested() call stack.
1222 *
1223 * Returns: Returns zero if adding the epoll @file inside current epoll
1224 * structure @ep does not violate the constraints, or -1 otherwise.
1225 */
1226static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1227{
1228 int error = 0;
1229 struct file *file = priv;
1230 struct eventpoll *ep = file->private_data;
1231 struct rb_node *rbp;
1232 struct epitem *epi;
1233
1234 mutex_lock(&ep->mtx);
1235 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1236 epi = rb_entry(rbp, struct epitem, rbn);
1237 if (unlikely(is_file_epoll(epi->ffd.file))) {
1238 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1239 ep_loop_check_proc, epi->ffd.file,
1240 epi->ffd.file->private_data, current);
1241 if (error != 0)
1242 break;
1243 }
1244 }
1245 mutex_unlock(&ep->mtx);
1246
1247 return error;
1248}
1249
1250/**
1251 * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
1252 * another epoll file (represented by @ep) does not create
1253 * closed loops or too deep chains.
1254 *
1255 * @ep: Pointer to the epoll private data structure.
1256 * @file: Pointer to the epoll file to be checked.
1257 *
1258 * Returns: Returns zero if adding the epoll @file inside current epoll
1259 * structure @ep does not violate the constraints, or -1 otherwise.
1260 */
1261static int ep_loop_check(struct eventpoll *ep, struct file *file)
1262{
1263 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1264 ep_loop_check_proc, file, ep, current);
1265}
1266
1191/* 1267/*
1192 * Open an eventpoll file descriptor. 1268 * Open an eventpoll file descriptor.
1193 */ 1269 */
@@ -1236,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1236 struct epoll_event __user *, event) 1312 struct epoll_event __user *, event)
1237{ 1313{
1238 int error; 1314 int error;
1315 int did_lock_epmutex = 0;
1239 struct file *file, *tfile; 1316 struct file *file, *tfile;
1240 struct eventpoll *ep; 1317 struct eventpoll *ep;
1241 struct epitem *epi; 1318 struct epitem *epi;
@@ -1277,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1277 */ 1354 */
1278 ep = file->private_data; 1355 ep = file->private_data;
1279 1356
1357 /*
1358 * When we insert an epoll file descriptor, inside another epoll file
1359 * descriptor, there is the change of creating closed loops, which are
1360 * better be handled here, than in more critical paths.
1361 *
1362 * We hold epmutex across the loop check and the insert in this case, in
1363 * order to prevent two separate inserts from racing and each doing the
1364 * insert "at the same time" such that ep_loop_check passes on both
1365 * before either one does the insert, thereby creating a cycle.
1366 */
1367 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
1368 mutex_lock(&epmutex);
1369 did_lock_epmutex = 1;
1370 error = -ELOOP;
1371 if (ep_loop_check(ep, tfile) != 0)
1372 goto error_tgt_fput;
1373 }
1374
1375
1280 mutex_lock(&ep->mtx); 1376 mutex_lock(&ep->mtx);
1281 1377
1282 /* 1378 /*
@@ -1312,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1312 mutex_unlock(&ep->mtx); 1408 mutex_unlock(&ep->mtx);
1313 1409
1314error_tgt_fput: 1410error_tgt_fput:
1411 if (unlikely(did_lock_epmutex))
1412 mutex_unlock(&epmutex);
1413
1315 fput(tfile); 1414 fput(tfile);
1316error_fput: 1415error_fput:
1317 fput(file); 1416 fput(file);
@@ -1431,6 +1530,12 @@ static int __init eventpoll_init(void)
1431 EP_ITEM_COST; 1530 EP_ITEM_COST;
1432 BUG_ON(max_user_watches < 0); 1531 BUG_ON(max_user_watches < 0);
1433 1532
1533 /*
1534 * Initialize the structure used to perform epoll file descriptor
1535 * inclusion loops checks.
1536 */
1537 ep_nested_calls_init(&poll_loop_ncalls);
1538
1434 /* Initialize the structure used to perform safe poll wait head wake ups */ 1539 /* Initialize the structure used to perform safe poll wait head wake ups */
1435 ep_nested_calls_init(&poll_safewake_ncalls); 1540 ep_nested_calls_init(&poll_safewake_ncalls);
1436 1541
diff --git a/fs/exec.c b/fs/exec.c
index c62efcb959c7..52a447d9b6ab 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -120,7 +120,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
120 goto out; 120 goto out;
121 121
122 file = do_filp_open(AT_FDCWD, tmp, 122 file = do_filp_open(AT_FDCWD, tmp,
123 O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, 123 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
124 MAY_READ | MAY_EXEC | MAY_OPEN); 124 MAY_READ | MAY_EXEC | MAY_OPEN);
125 putname(tmp); 125 putname(tmp);
126 error = PTR_ERR(file); 126 error = PTR_ERR(file);
@@ -723,7 +723,7 @@ struct file *open_exec(const char *name)
723 int err; 723 int err;
724 724
725 file = do_filp_open(AT_FDCWD, name, 725 file = do_filp_open(AT_FDCWD, name,
726 O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, 726 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
727 MAY_EXEC | MAY_OPEN); 727 MAY_EXEC | MAY_OPEN);
728 if (IS_ERR(file)) 728 if (IS_ERR(file))
729 goto out; 729 goto out;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 42685424817b..a7555238c41a 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1030,7 +1030,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1030 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1030 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
1031 } 1031 }
1032 1032
1033 inode->i_mapping->backing_dev_info = sb->s_bdi;
1034 if (S_ISREG(inode->i_mode)) { 1033 if (S_ISREG(inode->i_mode)) {
1035 inode->i_op = &exofs_file_inode_operations; 1034 inode->i_op = &exofs_file_inode_operations;
1036 inode->i_fop = &exofs_file_operations; 1035 inode->i_fop = &exofs_file_operations;
@@ -1131,7 +1130,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1131 1130
1132 sbi = sb->s_fs_info; 1131 sbi = sb->s_fs_info;
1133 1132
1134 inode->i_mapping->backing_dev_info = sb->s_bdi;
1135 sb->s_dirt = 1; 1133 sb->s_dirt = 1;
1136 inode_init_owner(inode, dir, mode); 1134 inode_init_owner(inode, dir, mode);
1137 inode->i_ino = sbi->s_nextid++; 1135 inode->i_ino = sbi->s_nextid++;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d02830..4d70db110cfc 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page); 272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
273 if (!new_de) 273 if (!new_de)
274 goto out_dir; 274 goto out_dir;
275 inode_inc_link_count(old_inode);
276 err = exofs_set_link(new_dir, new_de, new_page, old_inode); 275 err = exofs_set_link(new_dir, new_de, new_page, old_inode);
277 new_inode->i_ctime = CURRENT_TIME; 276 new_inode->i_ctime = CURRENT_TIME;
278 if (dir_de) 277 if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
286 if (new_dir->i_nlink >= EXOFS_LINK_MAX) 285 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
287 goto out_dir; 286 goto out_dir;
288 } 287 }
289 inode_inc_link_count(old_inode);
290 err = exofs_add_link(new_dentry, old_inode); 288 err = exofs_add_link(new_dentry, old_inode);
291 if (err) { 289 if (err)
292 inode_dec_link_count(old_inode);
293 goto out_dir; 290 goto out_dir;
294 }
295 if (dir_de) 291 if (dir_de)
296 inode_inc_link_count(new_dir); 292 inode_inc_link_count(new_dir);
297 } 293 }
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
299 old_inode->i_ctime = CURRENT_TIME; 295 old_inode->i_ctime = CURRENT_TIME;
300 296
301 exofs_delete_entry(old_de, old_page); 297 exofs_delete_entry(old_de, old_page);
302 inode_dec_link_count(old_inode); 298 mark_inode_dirty(old_inode);
303 299
304 if (dir_de) { 300 if (dir_de) {
305 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir); 301 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d8341d827..adb91855ccd0 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page); 344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
345 if (!new_de) 345 if (!new_de)
346 goto out_dir; 346 goto out_dir;
347 inode_inc_link_count(old_inode);
348 ext2_set_link(new_dir, new_de, new_page, old_inode, 1); 347 ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
349 new_inode->i_ctime = CURRENT_TIME_SEC; 348 new_inode->i_ctime = CURRENT_TIME_SEC;
350 if (dir_de) 349 if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
356 if (new_dir->i_nlink >= EXT2_LINK_MAX) 355 if (new_dir->i_nlink >= EXT2_LINK_MAX)
357 goto out_dir; 356 goto out_dir;
358 } 357 }
359 inode_inc_link_count(old_inode);
360 err = ext2_add_link(new_dentry, old_inode); 358 err = ext2_add_link(new_dentry, old_inode);
361 if (err) { 359 if (err)
362 inode_dec_link_count(old_inode);
363 goto out_dir; 360 goto out_dir;
364 }
365 if (dir_de) 361 if (dir_de)
366 inode_inc_link_count(new_dir); 362 inode_inc_link_count(new_dir);
367 } 363 }
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
369 /* 365 /*
370 * Like most other Unix systems, set the ctime for inodes on a 366 * Like most other Unix systems, set the ctime for inodes on a
371 * rename. 367 * rename.
372 * inode_dec_link_count() will mark the inode dirty.
373 */ 368 */
374 old_inode->i_ctime = CURRENT_TIME_SEC; 369 old_inode->i_ctime = CURRENT_TIME_SEC;
370 mark_inode_dirty(old_inode);
375 371
376 ext2_delete_entry (old_de, old_page); 372 ext2_delete_entry (old_de, old_page);
377 inode_dec_link_count(old_inode);
378 373
379 if (dir_de) { 374 if (dir_de) {
380 if (old_dir != new_dir) 375 if (old_dir != new_dir)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0c8d97b56f34..3aa0b72b3b94 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -848,6 +848,7 @@ struct ext4_inode_info {
848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
849 /* current io_end structure for async DIO write*/ 849 /* current io_end structure for async DIO write*/
850 ext4_io_end_t *cur_aio_dio; 850 ext4_io_end_t *cur_aio_dio;
851 atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
851 852
852 spinlock_t i_block_reservation_lock; 853 spinlock_t i_block_reservation_lock;
853 854
@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
2119 2120
2120#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 2121#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
2121 2122
2123/* For ioend & aio unwritten conversion wait queues */
2124#define EXT4_WQ_HASH_SZ 37
2125#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
2126 EXT4_WQ_HASH_SZ])
2127#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
2128 EXT4_WQ_HASH_SZ])
2129extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
2130extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
2131
2122#endif /* __KERNEL__ */ 2132#endif /* __KERNEL__ */
2123 2133
2124#endif /* _EXT4_H */ 2134#endif /* _EXT4_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 63a75810b7c3..ccce8a7e94ed 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3174 * that this IO needs to convertion to written when IO is 3174 * that this IO needs to convertion to written when IO is
3175 * completed 3175 * completed
3176 */ 3176 */
3177 if (io) 3177 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3178 io->flag = EXT4_IO_END_UNWRITTEN; 3178 io->flag = EXT4_IO_END_UNWRITTEN;
3179 else 3179 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3180 } else
3180 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3181 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3181 if (ext4_should_dioread_nolock(inode)) 3182 if (ext4_should_dioread_nolock(inode))
3182 map->m_flags |= EXT4_MAP_UNINIT; 3183 map->m_flags |= EXT4_MAP_UNINIT;
@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3463 * that we need to perform convertion when IO is done. 3464 * that we need to perform convertion when IO is done.
3464 */ 3465 */
3465 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3466 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3466 if (io) 3467 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3467 io->flag = EXT4_IO_END_UNWRITTEN; 3468 io->flag = EXT4_IO_END_UNWRITTEN;
3468 else 3469 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3470 } else
3469 ext4_set_inode_state(inode, 3471 ext4_set_inode_state(inode,
3470 EXT4_STATE_DIO_UNWRITTEN); 3472 EXT4_STATE_DIO_UNWRITTEN);
3471 } 3473 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2e8322c8aa88..7b80d543b89e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
55 return 0; 55 return 0;
56} 56}
57 57
58static void ext4_aiodio_wait(struct inode *inode)
59{
60 wait_queue_head_t *wq = ext4_ioend_wq(inode);
61
62 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
63}
64
65/*
66 * This tests whether the IO in question is block-aligned or not.
67 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
68 * are converted to written only after the IO is complete. Until they are
69 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
70 * it needs to zero out portions of the start and/or end block. If 2 AIO
71 * threads are at work on the same unwritten block, they must be synchronized
72 * or one thread will zero the other's data, causing corruption.
73 */
74static int
75ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
76 unsigned long nr_segs, loff_t pos)
77{
78 struct super_block *sb = inode->i_sb;
79 int blockmask = sb->s_blocksize - 1;
80 size_t count = iov_length(iov, nr_segs);
81 loff_t final_size = pos + count;
82
83 if (pos >= inode->i_size)
84 return 0;
85
86 if ((pos & blockmask) || (final_size & blockmask))
87 return 1;
88
89 return 0;
90}
91
58static ssize_t 92static ssize_t
59ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 93ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
60 unsigned long nr_segs, loff_t pos) 94 unsigned long nr_segs, loff_t pos)
61{ 95{
62 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 96 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
97 int unaligned_aio = 0;
98 int ret;
63 99
64 /* 100 /*
65 * If we have encountered a bitmap-format file, the size limit 101 * If we have encountered a bitmap-format file, the size limit
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
78 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, 114 nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
79 sbi->s_bitmap_maxbytes - pos); 115 sbi->s_bitmap_maxbytes - pos);
80 } 116 }
117 } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
118 !is_sync_kiocb(iocb))) {
119 unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
81 } 120 }
82 121
83 return generic_file_aio_write(iocb, iov, nr_segs, pos); 122 /* Unaligned direct AIO must be serialized; see comment above */
123 if (unaligned_aio) {
124 static unsigned long unaligned_warn_time;
125
126 /* Warn about this once per day */
127 if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
128 ext4_msg(inode->i_sb, KERN_WARNING,
129 "Unaligned AIO/DIO on inode %ld by %s; "
130 "performance will be poor.",
131 inode->i_ino, current->comm);
132 mutex_lock(ext4_aio_mutex(inode));
133 ext4_aiodio_wait(inode);
134 }
135
136 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
137
138 if (unaligned_aio)
139 mutex_unlock(ext4_aio_mutex(inode));
140
141 return ret;
84} 142}
85 143
86static const struct vm_operations_struct ext4_file_vm_ops = { 144static const struct vm_operations_struct ext4_file_vm_ops = {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 851f49b2f9d2..d1fe09aea73d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep;
342/* We create slab caches for groupinfo data structures based on the 342/* We create slab caches for groupinfo data structures based on the
343 * superblock block size. There will be one per mounted filesystem for 343 * superblock block size. There will be one per mounted filesystem for
344 * each unique s_blocksize_bits */ 344 * each unique s_blocksize_bits */
345#define NR_GRPINFO_CACHES \ 345#define NR_GRPINFO_CACHES 8
346 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; 346static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348 347
348static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
349 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
350 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
351 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
352};
353
349static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 354static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
350 ext4_group_t group); 355 ext4_group_t group);
351static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 356static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -2414,6 +2419,55 @@ err_freesgi:
2414 return -ENOMEM; 2419 return -ENOMEM;
2415} 2420}
2416 2421
2422static void ext4_groupinfo_destroy_slabs(void)
2423{
2424 int i;
2425
2426 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2427 if (ext4_groupinfo_caches[i])
2428 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2429 ext4_groupinfo_caches[i] = NULL;
2430 }
2431}
2432
2433static int ext4_groupinfo_create_slab(size_t size)
2434{
2435 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2436 int slab_size;
2437 int blocksize_bits = order_base_2(size);
2438 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2439 struct kmem_cache *cachep;
2440
2441 if (cache_index >= NR_GRPINFO_CACHES)
2442 return -EINVAL;
2443
2444 if (unlikely(cache_index < 0))
2445 cache_index = 0;
2446
2447 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2448 if (ext4_groupinfo_caches[cache_index]) {
2449 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2450 return 0; /* Already created */
2451 }
2452
2453 slab_size = offsetof(struct ext4_group_info,
2454 bb_counters[blocksize_bits + 2]);
2455
2456 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2457 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2458 NULL);
2459
2460 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2461 if (!cachep) {
2462 printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
2463 return -ENOMEM;
2464 }
2465
2466 ext4_groupinfo_caches[cache_index] = cachep;
2467
2468 return 0;
2469}
2470
2417int ext4_mb_init(struct super_block *sb, int needs_recovery) 2471int ext4_mb_init(struct super_block *sb, int needs_recovery)
2418{ 2472{
2419 struct ext4_sb_info *sbi = EXT4_SB(sb); 2473 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2421 unsigned offset; 2475 unsigned offset;
2422 unsigned max; 2476 unsigned max;
2423 int ret; 2477 int ret;
2424 int cache_index;
2425 struct kmem_cache *cachep;
2426 char *namep = NULL;
2427 2478
2428 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); 2479 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2429 2480
@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2440 goto out; 2491 goto out;
2441 } 2492 }
2442 2493
2443 cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; 2494 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2444 cachep = ext4_groupinfo_caches[cache_index]; 2495 if (ret < 0)
2445 if (!cachep) { 2496 goto out;
2446 char name[32];
2447 int len = offsetof(struct ext4_group_info,
2448 bb_counters[sb->s_blocksize_bits + 2]);
2449
2450 sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
2451 namep = kstrdup(name, GFP_KERNEL);
2452 if (!namep) {
2453 ret = -ENOMEM;
2454 goto out;
2455 }
2456
2457 /* Need to free the kmem_cache_name() when we
2458 * destroy the slab */
2459 cachep = kmem_cache_create(namep, len, 0,
2460 SLAB_RECLAIM_ACCOUNT, NULL);
2461 if (!cachep) {
2462 ret = -ENOMEM;
2463 goto out;
2464 }
2465 ext4_groupinfo_caches[cache_index] = cachep;
2466 }
2467 2497
2468 /* order 0 is regular bitmap */ 2498 /* order 0 is regular bitmap */
2469 sbi->s_mb_maxs[0] = sb->s_blocksize << 3; 2499 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
@@ -2520,7 +2550,6 @@ out:
2520 if (ret) { 2550 if (ret) {
2521 kfree(sbi->s_mb_offsets); 2551 kfree(sbi->s_mb_offsets);
2522 kfree(sbi->s_mb_maxs); 2552 kfree(sbi->s_mb_maxs);
2523 kfree(namep);
2524 } 2553 }
2525 return ret; 2554 return ret;
2526} 2555}
@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void)
2734 2763
2735void ext4_exit_mballoc(void) 2764void ext4_exit_mballoc(void)
2736{ 2765{
2737 int i;
2738 /* 2766 /*
2739 * Wait for completion of call_rcu()'s on ext4_pspace_cachep 2767 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
2740 * before destroying the slab cache. 2768 * before destroying the slab cache.
@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void)
2743 kmem_cache_destroy(ext4_pspace_cachep); 2771 kmem_cache_destroy(ext4_pspace_cachep);
2744 kmem_cache_destroy(ext4_ac_cachep); 2772 kmem_cache_destroy(ext4_ac_cachep);
2745 kmem_cache_destroy(ext4_free_ext_cachep); 2773 kmem_cache_destroy(ext4_free_ext_cachep);
2746 2774 ext4_groupinfo_destroy_slabs();
2747 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2748 struct kmem_cache *cachep = ext4_groupinfo_caches[i];
2749 if (cachep) {
2750 char *name = (char *)kmem_cache_name(cachep);
2751 kmem_cache_destroy(cachep);
2752 kfree(name);
2753 }
2754 }
2755 ext4_remove_debugfs_entry(); 2775 ext4_remove_debugfs_entry();
2756} 2776}
2757 2777
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7270dcfca92a..955cc309142f 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,14 +32,8 @@
32 32
33static struct kmem_cache *io_page_cachep, *io_end_cachep; 33static struct kmem_cache *io_page_cachep, *io_end_cachep;
34 34
35#define WQ_HASH_SZ 37
36#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
37static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
38
39int __init ext4_init_pageio(void) 35int __init ext4_init_pageio(void)
40{ 36{
41 int i;
42
43 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); 37 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
44 if (io_page_cachep == NULL) 38 if (io_page_cachep == NULL)
45 return -ENOMEM; 39 return -ENOMEM;
@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void)
48 kmem_cache_destroy(io_page_cachep); 42 kmem_cache_destroy(io_page_cachep);
49 return -ENOMEM; 43 return -ENOMEM;
50 } 44 }
51 for (i = 0; i < WQ_HASH_SZ; i++)
52 init_waitqueue_head(&ioend_wq[i]);
53
54 return 0; 45 return 0;
55} 46}
56 47
@@ -62,7 +53,7 @@ void ext4_exit_pageio(void)
62 53
63void ext4_ioend_wait(struct inode *inode) 54void ext4_ioend_wait(struct inode *inode)
64{ 55{
65 wait_queue_head_t *wq = to_ioend_wq(inode); 56 wait_queue_head_t *wq = ext4_ioend_wq(inode);
66 57
67 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); 58 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
68} 59}
@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
87 for (i = 0; i < io->num_io_pages; i++) 78 for (i = 0; i < io->num_io_pages; i++)
88 put_io_page(io->pages[i]); 79 put_io_page(io->pages[i]);
89 io->num_io_pages = 0; 80 io->num_io_pages = 0;
90 wq = to_ioend_wq(io->inode); 81 wq = ext4_ioend_wq(io->inode);
91 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && 82 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
92 waitqueue_active(wq)) 83 waitqueue_active(wq))
93 wake_up_all(wq); 84 wake_up_all(wq);
@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
102 struct inode *inode = io->inode; 93 struct inode *inode = io->inode;
103 loff_t offset = io->offset; 94 loff_t offset = io->offset;
104 ssize_t size = io->size; 95 ssize_t size = io->size;
96 wait_queue_head_t *wq;
105 int ret = 0; 97 int ret = 0;
106 98
107 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," 99 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
126 if (io->iocb) 118 if (io->iocb)
127 aio_complete(io->iocb, io->result, 0); 119 aio_complete(io->iocb, io->result, 0);
128 /* clear the DIO AIO unwritten flag */ 120 /* clear the DIO AIO unwritten flag */
129 io->flag &= ~EXT4_IO_END_UNWRITTEN; 121 if (io->flag & EXT4_IO_END_UNWRITTEN) {
122 io->flag &= ~EXT4_IO_END_UNWRITTEN;
123 /* Wake up anyone waiting on unwritten extent conversion */
124 wq = ext4_ioend_wq(io->inode);
125 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
126 waitqueue_active(wq)) {
127 wake_up_all(wq);
128 }
129 }
130
130 return ret; 131 return ret;
131} 132}
132 133
@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error)
190 struct inode *inode; 191 struct inode *inode;
191 unsigned long flags; 192 unsigned long flags;
192 int i; 193 int i;
194 sector_t bi_sector = bio->bi_sector;
193 195
194 BUG_ON(!io_end); 196 BUG_ON(!io_end);
195 bio->bi_private = NULL; 197 bio->bi_private = NULL;
@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error)
207 if (error) 209 if (error)
208 SetPageError(page); 210 SetPageError(page);
209 BUG_ON(!head); 211 BUG_ON(!head);
210 if (head->b_size == PAGE_CACHE_SIZE) 212 if (head->b_size != PAGE_CACHE_SIZE) {
211 clear_buffer_dirty(head);
212 else {
213 loff_t offset; 213 loff_t offset;
214 loff_t io_end_offset = io_end->offset + io_end->size; 214 loff_t io_end_offset = io_end->offset + io_end->size;
215 215
@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error)
221 if (error) 221 if (error)
222 buffer_io_error(bh); 222 buffer_io_error(bh);
223 223
224 clear_buffer_dirty(bh);
225 } 224 }
226 if (buffer_delay(bh)) 225 if (buffer_delay(bh))
227 partial_write = 1; 226 partial_write = 1;
@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error)
257 (unsigned long long) io_end->offset, 256 (unsigned long long) io_end->offset,
258 (long) io_end->size, 257 (long) io_end->size,
259 (unsigned long long) 258 (unsigned long long)
260 bio->bi_sector >> (inode->i_blkbits - 9)); 259 bi_sector >> (inode->i_blkbits - 9));
261 } 260 }
262 261
263 /* Add the io_end to per-inode completed io list*/ 262 /* Add the io_end to per-inode completed io list*/
@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
380 379
381 blocksize = 1 << inode->i_blkbits; 380 blocksize = 1 << inode->i_blkbits;
382 381
382 BUG_ON(!PageLocked(page));
383 BUG_ON(PageWriteback(page)); 383 BUG_ON(PageWriteback(page));
384 set_page_writeback(page); 384 set_page_writeback(page);
385 ClearPageError(page); 385 ClearPageError(page);
@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
397 for (bh = head = page_buffers(page), block_start = 0; 397 for (bh = head = page_buffers(page), block_start = 0;
398 bh != head || !block_start; 398 bh != head || !block_start;
399 block_start = block_end, bh = bh->b_this_page) { 399 block_start = block_end, bh = bh->b_this_page) {
400
400 block_end = block_start + blocksize; 401 block_end = block_start + blocksize;
401 if (block_start >= len) { 402 if (block_start >= len) {
402 clear_buffer_dirty(bh); 403 clear_buffer_dirty(bh);
403 set_buffer_uptodate(bh); 404 set_buffer_uptodate(bh);
404 continue; 405 continue;
405 } 406 }
407 clear_buffer_dirty(bh);
406 ret = io_submit_add_bh(io, io_page, inode, wbc, bh); 408 ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
407 if (ret) { 409 if (ret) {
408 /* 410 /*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 48ce561fafac..f6a318f836b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
77 const char *dev_name, void *data); 77 const char *dev_name, void *data);
78static void ext4_destroy_lazyinit_thread(void); 78static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb); 79static void ext4_unregister_li_request(struct super_block *sb);
80static void ext4_clear_request_list(void);
80 81
81#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 82#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
82static struct file_system_type ext3_fs_type = { 83static struct file_system_type ext3_fs_type = {
@@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
832 ei->i_sync_tid = 0; 833 ei->i_sync_tid = 0;
833 ei->i_datasync_tid = 0; 834 ei->i_datasync_tid = 0;
834 atomic_set(&ei->i_ioend_count, 0); 835 atomic_set(&ei->i_ioend_count, 0);
836 atomic_set(&ei->i_aiodio_unwritten, 0);
835 837
836 return &ei->vfs_inode; 838 return &ei->vfs_inode;
837} 839}
@@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb)
2716 mutex_unlock(&ext4_li_info->li_list_mtx); 2718 mutex_unlock(&ext4_li_info->li_list_mtx);
2717} 2719}
2718 2720
2721static struct task_struct *ext4_lazyinit_task;
2722
2719/* 2723/*
2720 * This is the function where ext4lazyinit thread lives. It walks 2724 * This is the function where ext4lazyinit thread lives. It walks
2721 * through the request list searching for next scheduled filesystem. 2725 * through the request list searching for next scheduled filesystem.
@@ -2784,6 +2788,10 @@ cont_thread:
2784 if (time_before(jiffies, next_wakeup)) 2788 if (time_before(jiffies, next_wakeup))
2785 schedule(); 2789 schedule();
2786 finish_wait(&eli->li_wait_daemon, &wait); 2790 finish_wait(&eli->li_wait_daemon, &wait);
2791 if (kthread_should_stop()) {
2792 ext4_clear_request_list();
2793 goto exit_thread;
2794 }
2787 } 2795 }
2788 2796
2789exit_thread: 2797exit_thread:
@@ -2808,6 +2816,7 @@ exit_thread:
2808 wake_up(&eli->li_wait_task); 2816 wake_up(&eli->li_wait_task);
2809 2817
2810 kfree(ext4_li_info); 2818 kfree(ext4_li_info);
2819 ext4_lazyinit_task = NULL;
2811 ext4_li_info = NULL; 2820 ext4_li_info = NULL;
2812 mutex_unlock(&ext4_li_mtx); 2821 mutex_unlock(&ext4_li_mtx);
2813 2822
@@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void)
2830 2839
2831static int ext4_run_lazyinit_thread(void) 2840static int ext4_run_lazyinit_thread(void)
2832{ 2841{
2833 struct task_struct *t; 2842 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
2834 2843 ext4_li_info, "ext4lazyinit");
2835 t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); 2844 if (IS_ERR(ext4_lazyinit_task)) {
2836 if (IS_ERR(t)) { 2845 int err = PTR_ERR(ext4_lazyinit_task);
2837 int err = PTR_ERR(t);
2838 ext4_clear_request_list(); 2846 ext4_clear_request_list();
2839 del_timer_sync(&ext4_li_info->li_timer); 2847 del_timer_sync(&ext4_li_info->li_timer);
2840 kfree(ext4_li_info); 2848 kfree(ext4_li_info);
@@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void)
2985 * If thread exited earlier 2993 * If thread exited earlier
2986 * there's nothing to be done. 2994 * there's nothing to be done.
2987 */ 2995 */
2988 if (!ext4_li_info) 2996 if (!ext4_li_info || !ext4_lazyinit_task)
2989 return; 2997 return;
2990 2998
2991 ext4_clear_request_list(); 2999 kthread_stop(ext4_lazyinit_task);
2992
2993 while (ext4_li_info->li_task) {
2994 wake_up(&ext4_li_info->li_wait_daemon);
2995 wait_event(ext4_li_info->li_wait_task,
2996 ext4_li_info->li_task == NULL);
2997 }
2998} 3000}
2999 3001
3000static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3002static int ext4_fill_super(struct super_block *sb, void *data, int silent)
@@ -4768,7 +4770,7 @@ static struct file_system_type ext4_fs_type = {
4768 .fs_flags = FS_REQUIRES_DEV, 4770 .fs_flags = FS_REQUIRES_DEV,
4769}; 4771};
4770 4772
4771int __init ext4_init_feat_adverts(void) 4773static int __init ext4_init_feat_adverts(void)
4772{ 4774{
4773 struct ext4_features *ef; 4775 struct ext4_features *ef;
4774 int ret = -ENOMEM; 4776 int ret = -ENOMEM;
@@ -4792,23 +4794,44 @@ out:
4792 return ret; 4794 return ret;
4793} 4795}
4794 4796
4797static void ext4_exit_feat_adverts(void)
4798{
4799 kobject_put(&ext4_feat->f_kobj);
4800 wait_for_completion(&ext4_feat->f_kobj_unregister);
4801 kfree(ext4_feat);
4802}
4803
4804/* Shared across all ext4 file systems */
4805wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
4806struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
4807
4795static int __init ext4_init_fs(void) 4808static int __init ext4_init_fs(void)
4796{ 4809{
4797 int err; 4810 int i, err;
4798 4811
4799 ext4_check_flag_values(); 4812 ext4_check_flag_values();
4813
4814 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
4815 mutex_init(&ext4__aio_mutex[i]);
4816 init_waitqueue_head(&ext4__ioend_wq[i]);
4817 }
4818
4800 err = ext4_init_pageio(); 4819 err = ext4_init_pageio();
4801 if (err) 4820 if (err)
4802 return err; 4821 return err;
4803 err = ext4_init_system_zone(); 4822 err = ext4_init_system_zone();
4804 if (err) 4823 if (err)
4805 goto out5; 4824 goto out7;
4806 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4825 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
4807 if (!ext4_kset) 4826 if (!ext4_kset)
4808 goto out4; 4827 goto out6;
4809 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4828 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4829 if (!ext4_proc_root)
4830 goto out5;
4810 4831
4811 err = ext4_init_feat_adverts(); 4832 err = ext4_init_feat_adverts();
4833 if (err)
4834 goto out4;
4812 4835
4813 err = ext4_init_mballoc(); 4836 err = ext4_init_mballoc();
4814 if (err) 4837 if (err)
@@ -4838,12 +4861,14 @@ out1:
4838out2: 4861out2:
4839 ext4_exit_mballoc(); 4862 ext4_exit_mballoc();
4840out3: 4863out3:
4841 kfree(ext4_feat); 4864 ext4_exit_feat_adverts();
4865out4:
4842 remove_proc_entry("fs/ext4", NULL); 4866 remove_proc_entry("fs/ext4", NULL);
4867out5:
4843 kset_unregister(ext4_kset); 4868 kset_unregister(ext4_kset);
4844out4: 4869out6:
4845 ext4_exit_system_zone(); 4870 ext4_exit_system_zone();
4846out5: 4871out7:
4847 ext4_exit_pageio(); 4872 ext4_exit_pageio();
4848 return err; 4873 return err;
4849} 4874}
@@ -4857,6 +4882,7 @@ static void __exit ext4_exit_fs(void)
4857 destroy_inodecache(); 4882 destroy_inodecache();
4858 ext4_exit_xattr(); 4883 ext4_exit_xattr();
4859 ext4_exit_mballoc(); 4884 ext4_exit_mballoc();
4885 ext4_exit_feat_adverts();
4860 remove_proc_entry("fs/ext4", NULL); 4886 remove_proc_entry("fs/ext4", NULL);
4861 kset_unregister(ext4_kset); 4887 kset_unregister(ext4_kset);
4862 ext4_exit_system_zone(); 4888 ext4_exit_system_zone();
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ecc8b3954ed6..cb1026181bdc 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -815,7 +815,7 @@ static int __init fcntl_init(void)
815 __O_SYNC | O_DSYNC | FASYNC | 815 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 816 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 FMODE_EXEC 818 __FMODE_EXEC
819 )); 819 ));
820 820
821 fasync_cache = kmem_cache_create("fasync_cache", 821 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/file_table.c b/fs/file_table.c
index c3e89adf53c0..eb36b6b17e26 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -125,13 +125,13 @@ struct file *get_empty_filp(void)
125 goto fail; 125 goto fail;
126 126
127 percpu_counter_inc(&nr_files); 127 percpu_counter_inc(&nr_files);
128 f->f_cred = get_cred(cred);
128 if (security_file_alloc(f)) 129 if (security_file_alloc(f))
129 goto fail_sec; 130 goto fail_sec;
130 131
131 INIT_LIST_HEAD(&f->f_u.fu_list); 132 INIT_LIST_HEAD(&f->f_u.fu_list);
132 atomic_long_set(&f->f_count, 1); 133 atomic_long_set(&f->f_count, 1);
133 rwlock_init(&f->f_owner.lock); 134 rwlock_init(&f->f_owner.lock);
134 f->f_cred = get_cred(cred);
135 spin_lock_init(&f->f_lock); 135 spin_lock_init(&f->f_lock);
136 eventpoll_init_file(f); 136 eventpoll_init_file(f);
137 /* f->f_version: 0 */ 137 /* f->f_version: 0 */
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed80..83543b5ff941 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1283 if (err) 1283 if (err)
1284 return err; 1284 return err;
1285 1285
1286 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) 1286 if (attr->ia_valid & ATTR_OPEN) {
1287 return 0; 1287 if (fc->atomic_o_trunc)
1288 return 0;
1289 file = NULL;
1290 }
1288 1291
1289 if (attr->ia_valid & ATTR_SIZE) 1292 if (attr->ia_valid & ATTR_SIZE)
1290 is_truncate = true; 1293 is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c826..9e0832dbb1e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
86 return ff; 86 return ff;
87} 87}
88 88
89static void fuse_release_async(struct work_struct *work)
90{
91 struct fuse_req *req;
92 struct fuse_conn *fc;
93 struct path path;
94
95 req = container_of(work, struct fuse_req, misc.release.work);
96 path = req->misc.release.path;
97 fc = get_fuse_conn(path.dentry->d_inode);
98
99 fuse_put_request(fc, req);
100 path_put(&path);
101}
102
89static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 103static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
90{ 104{
91 path_put(&req->misc.release.path); 105 if (fc->destroy_req) {
106 /*
107 * If this is a fuseblk mount, then it's possible that
108 * releasing the path will result in releasing the
109 * super block and sending the DESTROY request. If
110 * the server is single threaded, this would hang.
111 * For this reason do the path_put() in a separate
112 * thread.
113 */
114 atomic_inc(&req->count);
115 INIT_WORK(&req->misc.release.work, fuse_release_async);
116 schedule_work(&req->misc.release.work);
117 } else {
118 path_put(&req->misc.release.path);
119 }
92} 120}
93 121
94static void fuse_file_put(struct fuse_file *ff) 122static void fuse_file_put(struct fuse_file *ff, bool sync)
95{ 123{
96 if (atomic_dec_and_test(&ff->count)) { 124 if (atomic_dec_and_test(&ff->count)) {
97 struct fuse_req *req = ff->reserved_req; 125 struct fuse_req *req = ff->reserved_req;
98 126
99 req->end = fuse_release_end; 127 if (sync) {
100 fuse_request_send_background(ff->fc, req); 128 fuse_request_send(ff->fc, req);
129 path_put(&req->misc.release.path);
130 fuse_put_request(ff->fc, req);
131 } else {
132 req->end = fuse_release_end;
133 fuse_request_send_background(ff->fc, req);
134 }
101 kfree(ff); 135 kfree(ff);
102 } 136 }
103} 137}
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
219 * Normally this will send the RELEASE request, however if 253 * Normally this will send the RELEASE request, however if
220 * some asynchronous READ or WRITE requests are outstanding, 254 * some asynchronous READ or WRITE requests are outstanding,
221 * the sending will be delayed. 255 * the sending will be delayed.
256 *
257 * Make the release synchronous if this is a fuseblk mount,
258 * synchronous RELEASE is allowed (and desirable) in this case
259 * because the server can be trusted not to screw up.
222 */ 260 */
223 fuse_file_put(ff); 261 fuse_file_put(ff, ff->fc->destroy_req != NULL);
224} 262}
225 263
226static int fuse_open(struct inode *inode, struct file *file) 264static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
558 page_cache_release(page); 596 page_cache_release(page);
559 } 597 }
560 if (req->ff) 598 if (req->ff)
561 fuse_file_put(req->ff); 599 fuse_file_put(req->ff, false);
562} 600}
563 601
564static void fuse_send_readpages(struct fuse_req *req, struct file *file) 602static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1137static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) 1175static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1138{ 1176{
1139 __free_page(req->pages[0]); 1177 __free_page(req->pages[0]);
1140 fuse_file_put(req->ff); 1178 fuse_file_put(req->ff, false);
1141} 1179}
1142 1180
1143static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) 1181static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e9..d4286947bc2c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22#include <linux/rbtree.h> 22#include <linux/rbtree.h>
23#include <linux/poll.h> 23#include <linux/poll.h>
24#include <linux/workqueue.h>
24 25
25/** Max number of pages that can be used in a single read request */ 26/** Max number of pages that can be used in a single read request */
26#define FUSE_MAX_PAGES_PER_REQ 32 27#define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
262 /** Data for asynchronous requests */ 263 /** Data for asynchronous requests */
263 union { 264 union {
264 struct { 265 struct {
265 struct fuse_release_in in; 266 union {
267 struct fuse_release_in in;
268 struct work_struct work;
269 };
266 struct path path; 270 struct path path;
267 } release; 271 } release;
268 struct fuse_init_in init_in; 272 struct fuse_init_in init_in;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 08a8beb152e6..7cd9a5a68d59 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1779,11 +1779,11 @@ int __init gfs2_glock_init(void)
1779#endif 1779#endif
1780 1780
1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1782 WQ_HIGHPRI | WQ_FREEZEABLE, 0); 1782 WQ_HIGHPRI | WQ_FREEZABLE, 0);
1783 if (IS_ERR(glock_workqueue)) 1783 if (IS_ERR(glock_workqueue))
1784 return PTR_ERR(glock_workqueue); 1784 return PTR_ERR(glock_workqueue);
1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", 1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
1786 WQ_MEM_RECLAIM | WQ_FREEZEABLE, 1786 WQ_MEM_RECLAIM | WQ_FREEZABLE,
1787 0); 1787 0);
1788 if (IS_ERR(gfs2_delete_workqueue)) { 1788 if (IS_ERR(gfs2_delete_workqueue)) {
1789 destroy_workqueue(glock_workqueue); 1789 destroy_workqueue(glock_workqueue);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index ebef7ab6e17e..72c31a315d96 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -59,14 +59,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
59 struct address_space *mapping = (struct address_space *)(gl + 1); 59 struct address_space *mapping = (struct address_space *)(gl + 1);
60 60
61 gfs2_init_glock_once(gl); 61 gfs2_init_glock_once(gl);
62 memset(mapping, 0, sizeof(*mapping)); 62 address_space_init_once(mapping);
63 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
64 spin_lock_init(&mapping->tree_lock);
65 spin_lock_init(&mapping->i_mmap_lock);
66 INIT_LIST_HEAD(&mapping->private_list);
67 spin_lock_init(&mapping->private_lock);
68 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
69 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
70} 63}
71 64
72/** 65/**
@@ -144,7 +137,7 @@ static int __init init_gfs2_fs(void)
144 137
145 error = -ENOMEM; 138 error = -ENOMEM;
146 gfs_recovery_wq = alloc_workqueue("gfs_recovery", 139 gfs_recovery_wq = alloc_workqueue("gfs_recovery",
147 WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); 140 WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
148 if (!gfs_recovery_wq) 141 if (!gfs_recovery_wq)
149 goto fail_wq; 142 goto fail_wq;
150 143
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aaa2237..b4d70b13be92 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
238} 238}
239 239
240/* 240/*
241 * hfs_unlink() 241 * hfs_remove()
242 * 242 *
243 * This is the unlink() entry in the inode_operations structure for 243 * This serves as both unlink() and rmdir() in the inode_operations
244 * regular HFS directories. The purpose is to delete an existing 244 * structure for regular HFS directories. The purpose is to delete
245 * file, given the inode for the parent directory and the name 245 * an existing child, given the inode for the parent directory and
246 * (and its length) of the existing file. 246 * the name (and its length) of the existing directory.
247 */
248static int hfs_unlink(struct inode *dir, struct dentry *dentry)
249{
250 struct inode *inode;
251 int res;
252
253 inode = dentry->d_inode;
254 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
255 if (res)
256 return res;
257
258 drop_nlink(inode);
259 hfs_delete_inode(inode);
260 inode->i_ctime = CURRENT_TIME_SEC;
261 mark_inode_dirty(inode);
262
263 return res;
264}
265
266/*
267 * hfs_rmdir()
268 * 247 *
269 * This is the rmdir() entry in the inode_operations structure for 248 * HFS does not have hardlinks, so both rmdir and unlink set the
270 * regular HFS directories. The purpose is to delete an existing 249 * link count to 0. The only difference is the emptiness check.
271 * directory, given the inode for the parent directory and the name
272 * (and its length) of the existing directory.
273 */ 250 */
274static int hfs_rmdir(struct inode *dir, struct dentry *dentry) 251static int hfs_remove(struct inode *dir, struct dentry *dentry)
275{ 252{
276 struct inode *inode; 253 struct inode *inode = dentry->d_inode;
277 int res; 254 int res;
278 255
279 inode = dentry->d_inode; 256 if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
280 if (inode->i_size != 2)
281 return -ENOTEMPTY; 257 return -ENOTEMPTY;
282 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); 258 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
283 if (res) 259 if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
307 283
308 /* Unlink destination if it already exists */ 284 /* Unlink destination if it already exists */
309 if (new_dentry->d_inode) { 285 if (new_dentry->d_inode) {
310 res = hfs_unlink(new_dir, new_dentry); 286 res = hfs_remove(new_dir, new_dentry);
311 if (res) 287 if (res)
312 return res; 288 return res;
313 } 289 }
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
332const struct inode_operations hfs_dir_inode_operations = { 308const struct inode_operations hfs_dir_inode_operations = {
333 .create = hfs_create, 309 .create = hfs_create,
334 .lookup = hfs_lookup, 310 .lookup = hfs_lookup,
335 .unlink = hfs_unlink, 311 .unlink = hfs_remove,
336 .mkdir = hfs_mkdir, 312 .mkdir = hfs_mkdir,
337 .rmdir = hfs_rmdir, 313 .rmdir = hfs_remove,
338 .rename = hfs_rename, 314 .rename = hfs_rename,
339 .setattr = hfs_inode_setattr, 315 .setattr = hfs_inode_setattr,
340}; 316};
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 52a0bcaa7b6d..b1991a2a08e0 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -397,8 +397,8 @@ int hfsplus_file_extend(struct inode *inode)
397 u32 start, len, goal; 397 u32 start, len, goal;
398 int res; 398 int res;
399 399
400 if (sbi->total_blocks - sbi->free_blocks + 8 > 400 if (sbi->alloc_file->i_size * 8 <
401 sbi->alloc_file->i_size * 8) { 401 sbi->total_blocks - sbi->free_blocks + 8) {
402 /* extend alloc file */ 402 /* extend alloc file */
403 printk(KERN_ERR "hfs: extend alloc file! " 403 printk(KERN_ERR "hfs: extend alloc file! "
404 "(%llu,%u,%u)\n", 404 "(%llu,%u,%u)\n",
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index d66ad113b1cc..40ad88c12c64 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -134,7 +134,7 @@ int hfs_part_find(struct super_block *sb,
134 res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, 134 res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK,
135 data, READ); 135 data, READ);
136 if (res) 136 if (res)
137 return res; 137 goto out;
138 138
139 switch (be16_to_cpu(*((__be16 *)data))) { 139 switch (be16_to_cpu(*((__be16 *)data))) {
140 case HFS_OLD_PMAP_MAGIC: 140 case HFS_OLD_PMAP_MAGIC:
@@ -147,7 +147,7 @@ int hfs_part_find(struct super_block *sb,
147 res = -ENOENT; 147 res = -ENOENT;
148 break; 148 break;
149 } 149 }
150 150out:
151 kfree(data); 151 kfree(data);
152 return res; 152 return res;
153} 153}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a3b4795f43c..b49b55584c84 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -338,20 +338,22 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
338 struct inode *root, *inode; 338 struct inode *root, *inode;
339 struct qstr str; 339 struct qstr str;
340 struct nls_table *nls = NULL; 340 struct nls_table *nls = NULL;
341 int err = -EINVAL; 341 int err;
342 342
343 err = -EINVAL;
343 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 344 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
344 if (!sbi) 345 if (!sbi)
345 return -ENOMEM; 346 goto out;
346 347
347 sb->s_fs_info = sbi; 348 sb->s_fs_info = sbi;
348 mutex_init(&sbi->alloc_mutex); 349 mutex_init(&sbi->alloc_mutex);
349 mutex_init(&sbi->vh_mutex); 350 mutex_init(&sbi->vh_mutex);
350 hfsplus_fill_defaults(sbi); 351 hfsplus_fill_defaults(sbi);
352
353 err = -EINVAL;
351 if (!hfsplus_parse_options(data, sbi)) { 354 if (!hfsplus_parse_options(data, sbi)) {
352 printk(KERN_ERR "hfs: unable to parse mount options\n"); 355 printk(KERN_ERR "hfs: unable to parse mount options\n");
353 err = -EINVAL; 356 goto out_unload_nls;
354 goto cleanup;
355 } 357 }
356 358
357 /* temporarily use utf8 to correctly find the hidden dir below */ 359 /* temporarily use utf8 to correctly find the hidden dir below */
@@ -359,16 +361,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
359 sbi->nls = load_nls("utf8"); 361 sbi->nls = load_nls("utf8");
360 if (!sbi->nls) { 362 if (!sbi->nls) {
361 printk(KERN_ERR "hfs: unable to load nls for utf8\n"); 363 printk(KERN_ERR "hfs: unable to load nls for utf8\n");
362 err = -EINVAL; 364 goto out_unload_nls;
363 goto cleanup;
364 } 365 }
365 366
366 /* Grab the volume header */ 367 /* Grab the volume header */
367 if (hfsplus_read_wrapper(sb)) { 368 if (hfsplus_read_wrapper(sb)) {
368 if (!silent) 369 if (!silent)
369 printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n"); 370 printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n");
370 err = -EINVAL; 371 goto out_unload_nls;
371 goto cleanup;
372 } 372 }
373 vhdr = sbi->s_vhdr; 373 vhdr = sbi->s_vhdr;
374 374
@@ -377,7 +377,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
377 if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION || 377 if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION ||
378 be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) { 378 be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) {
379 printk(KERN_ERR "hfs: wrong filesystem version\n"); 379 printk(KERN_ERR "hfs: wrong filesystem version\n");
380 goto cleanup; 380 goto out_free_vhdr;
381 } 381 }
382 sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); 382 sbi->total_blocks = be32_to_cpu(vhdr->total_blocks);
383 sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); 383 sbi->free_blocks = be32_to_cpu(vhdr->free_blocks);
@@ -421,19 +421,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
421 sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); 421 sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
422 if (!sbi->ext_tree) { 422 if (!sbi->ext_tree) {
423 printk(KERN_ERR "hfs: failed to load extents file\n"); 423 printk(KERN_ERR "hfs: failed to load extents file\n");
424 goto cleanup; 424 goto out_free_vhdr;
425 } 425 }
426 sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); 426 sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID);
427 if (!sbi->cat_tree) { 427 if (!sbi->cat_tree) {
428 printk(KERN_ERR "hfs: failed to load catalog file\n"); 428 printk(KERN_ERR "hfs: failed to load catalog file\n");
429 goto cleanup; 429 goto out_close_ext_tree;
430 } 430 }
431 431
432 inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); 432 inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID);
433 if (IS_ERR(inode)) { 433 if (IS_ERR(inode)) {
434 printk(KERN_ERR "hfs: failed to load allocation file\n"); 434 printk(KERN_ERR "hfs: failed to load allocation file\n");
435 err = PTR_ERR(inode); 435 err = PTR_ERR(inode);
436 goto cleanup; 436 goto out_close_cat_tree;
437 } 437 }
438 sbi->alloc_file = inode; 438 sbi->alloc_file = inode;
439 439
@@ -442,14 +442,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
442 if (IS_ERR(root)) { 442 if (IS_ERR(root)) {
443 printk(KERN_ERR "hfs: failed to load root directory\n"); 443 printk(KERN_ERR "hfs: failed to load root directory\n");
444 err = PTR_ERR(root); 444 err = PTR_ERR(root);
445 goto cleanup; 445 goto out_put_alloc_file;
446 }
447 sb->s_d_op = &hfsplus_dentry_operations;
448 sb->s_root = d_alloc_root(root);
449 if (!sb->s_root) {
450 iput(root);
451 err = -ENOMEM;
452 goto cleanup;
453 } 446 }
454 447
455 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; 448 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
@@ -459,46 +452,69 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
459 if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { 452 if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
460 hfs_find_exit(&fd); 453 hfs_find_exit(&fd);
461 if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) 454 if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
462 goto cleanup; 455 goto out_put_root;
463 inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id)); 456 inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id));
464 if (IS_ERR(inode)) { 457 if (IS_ERR(inode)) {
465 err = PTR_ERR(inode); 458 err = PTR_ERR(inode);
466 goto cleanup; 459 goto out_put_root;
467 } 460 }
468 sbi->hidden_dir = inode; 461 sbi->hidden_dir = inode;
469 } else 462 } else
470 hfs_find_exit(&fd); 463 hfs_find_exit(&fd);
471 464
472 if (sb->s_flags & MS_RDONLY) 465 if (!(sb->s_flags & MS_RDONLY)) {
473 goto out; 466 /*
467 * H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused
468 * all three are registered with Apple for our use
469 */
470 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
471 vhdr->modify_date = hfsp_now2mt();
472 be32_add_cpu(&vhdr->write_count, 1);
473 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
474 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
475 hfsplus_sync_fs(sb, 1);
474 476
475 /* H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused 477 if (!sbi->hidden_dir) {
476 * all three are registered with Apple for our use 478 mutex_lock(&sbi->vh_mutex);
477 */ 479 sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
478 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); 480 hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str,
479 vhdr->modify_date = hfsp_now2mt(); 481 sbi->hidden_dir);
480 be32_add_cpu(&vhdr->write_count, 1); 482 mutex_unlock(&sbi->vh_mutex);
481 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); 483
482 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); 484 hfsplus_mark_inode_dirty(sbi->hidden_dir,
483 hfsplus_sync_fs(sb, 1); 485 HFSPLUS_I_CAT_DIRTY);
484 486 }
485 if (!sbi->hidden_dir) {
486 mutex_lock(&sbi->vh_mutex);
487 sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR);
488 hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode,
489 &str, sbi->hidden_dir);
490 mutex_unlock(&sbi->vh_mutex);
491
492 hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY);
493 } 487 }
494out: 488
489 sb->s_d_op = &hfsplus_dentry_operations;
490 sb->s_root = d_alloc_root(root);
491 if (!sb->s_root) {
492 err = -ENOMEM;
493 goto out_put_hidden_dir;
494 }
495
495 unload_nls(sbi->nls); 496 unload_nls(sbi->nls);
496 sbi->nls = nls; 497 sbi->nls = nls;
497 return 0; 498 return 0;
498 499
499cleanup: 500out_put_hidden_dir:
500 hfsplus_put_super(sb); 501 iput(sbi->hidden_dir);
502out_put_root:
503 iput(sbi->alloc_file);
504out_put_alloc_file:
505 iput(sbi->alloc_file);
506out_close_cat_tree:
507 hfs_btree_close(sbi->cat_tree);
508out_close_ext_tree:
509 hfs_btree_close(sbi->ext_tree);
510out_free_vhdr:
511 kfree(sbi->s_vhdr);
512 kfree(sbi->s_backup_vhdr);
513out_unload_nls:
514 unload_nls(sbi->nls);
501 unload_nls(nls); 515 unload_nls(nls);
516 kfree(sbi);
517out:
502 return err; 518 return err;
503} 519}
504 520
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 196231794f64..3031d81f5f0f 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -167,7 +167,7 @@ reread:
167 break; 167 break;
168 case cpu_to_be16(HFSP_WRAP_MAGIC): 168 case cpu_to_be16(HFSP_WRAP_MAGIC):
169 if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) 169 if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
170 goto out; 170 goto out_free_backup_vhdr;
171 wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; 171 wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
172 part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; 172 part_start += wd.ablk_start + wd.embed_start * wd.ablk_size;
173 part_size = wd.embed_count * wd.ablk_size; 173 part_size = wd.embed_count * wd.ablk_size;
@@ -179,7 +179,7 @@ reread:
179 * (should do this only for cdrom/loop though) 179 * (should do this only for cdrom/loop though)
180 */ 180 */
181 if (hfs_part_find(sb, &part_start, &part_size)) 181 if (hfs_part_find(sb, &part_start, &part_size))
182 goto out; 182 goto out_free_backup_vhdr;
183 goto reread; 183 goto reread;
184 } 184 }
185 185
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..0647d80accf6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -295,6 +295,20 @@ static void destroy_inode(struct inode *inode)
295 call_rcu(&inode->i_rcu, i_callback); 295 call_rcu(&inode->i_rcu, i_callback);
296} 296}
297 297
298void address_space_init_once(struct address_space *mapping)
299{
300 memset(mapping, 0, sizeof(*mapping));
301 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
302 spin_lock_init(&mapping->tree_lock);
303 spin_lock_init(&mapping->i_mmap_lock);
304 INIT_LIST_HEAD(&mapping->private_list);
305 spin_lock_init(&mapping->private_lock);
306 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
307 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
308 mutex_init(&mapping->unmap_mutex);
309}
310EXPORT_SYMBOL(address_space_init_once);
311
298/* 312/*
299 * These are initializations that only need to be done 313 * These are initializations that only need to be done
300 * once, because the fields are idempotent across use 314 * once, because the fields are idempotent across use
@@ -308,13 +322,7 @@ void inode_init_once(struct inode *inode)
308 INIT_LIST_HEAD(&inode->i_devices); 322 INIT_LIST_HEAD(&inode->i_devices);
309 INIT_LIST_HEAD(&inode->i_wb_list); 323 INIT_LIST_HEAD(&inode->i_wb_list);
310 INIT_LIST_HEAD(&inode->i_lru); 324 INIT_LIST_HEAD(&inode->i_lru);
311 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 325 address_space_init_once(&inode->i_data);
312 spin_lock_init(&inode->i_data.tree_lock);
313 spin_lock_init(&inode->i_data.i_mmap_lock);
314 INIT_LIST_HEAD(&inode->i_data.private_list);
315 spin_lock_init(&inode->i_data.private_lock);
316 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
317 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
318 i_size_ordered_init(inode); 326 i_size_ordered_init(inode);
319#ifdef CONFIG_FSNOTIFY 327#ifdef CONFIG_FSNOTIFY
320 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 328 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -540,11 +548,14 @@ void evict_inodes(struct super_block *sb)
540/** 548/**
541 * invalidate_inodes - attempt to free all inodes on a superblock 549 * invalidate_inodes - attempt to free all inodes on a superblock
542 * @sb: superblock to operate on 550 * @sb: superblock to operate on
551 * @kill_dirty: flag to guide handling of dirty inodes
543 * 552 *
544 * Attempts to free all inodes for a given superblock. If there were any 553 * Attempts to free all inodes for a given superblock. If there were any
545 * busy inodes return a non-zero value, else zero. 554 * busy inodes return a non-zero value, else zero.
555 * If @kill_dirty is set, discard dirty inodes too, otherwise treat
556 * them as busy.
546 */ 557 */
547int invalidate_inodes(struct super_block *sb) 558int invalidate_inodes(struct super_block *sb, bool kill_dirty)
548{ 559{
549 int busy = 0; 560 int busy = 0;
550 struct inode *inode, *next; 561 struct inode *inode, *next;
@@ -556,6 +567,10 @@ int invalidate_inodes(struct super_block *sb)
556 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 567 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
557 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 568 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
558 continue; 569 continue;
570 if (inode->i_state & I_DIRTY && !kill_dirty) {
571 busy = 1;
572 continue;
573 }
559 if (atomic_read(&inode->i_count)) { 574 if (atomic_read(&inode->i_count)) {
560 busy = 1; 575 busy = 1;
561 continue; 576 continue;
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b1247..9b976b57d7fe 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -112,4 +112,4 @@ extern void release_open_intent(struct nameidata *);
112 */ 112 */
113extern int get_nr_dirty_inodes(void); 113extern int get_nr_dirty_inodes(void);
114extern void evict_inodes(struct super_block *); 114extern void evict_inodes(struct super_block *);
115extern int invalidate_inodes(struct super_block *); 115extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index a59635e295fa..1eebeb72b202 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -273,6 +273,13 @@ int __generic_block_fiemap(struct inode *inode,
273 len = isize; 273 len = isize;
274 } 274 }
275 275
276 /*
277 * Some filesystems can't deal with being asked to map less than
278 * blocksize, so make sure our len is at least block length.
279 */
280 if (logical_to_blk(inode, len) == 0)
281 len = blk_to_logical(inode, 1);
282
276 start_blk = logical_to_blk(inode, start); 283 start_blk = logical_to_blk(inode, start);
277 last_blk = logical_to_blk(inode, start + len - 1); 284 last_blk = logical_to_blk(inode, start + len - 1);
278 285
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9e4686900f18..97e73469b2c4 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal)
473} 473}
474 474
475/* 475/*
476 * Called under j_state_lock. Returns true if a transaction commit was started. 476 * Called with j_state_lock locked for writing.
477 * Returns true if a transaction commit was started.
477 */ 478 */
478int __jbd2_log_start_commit(journal_t *journal, tid_t target) 479int __jbd2_log_start_commit(journal_t *journal, tid_t target)
479{ 480{
@@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
520{ 521{
521 transaction_t *transaction = NULL; 522 transaction_t *transaction = NULL;
522 tid_t tid; 523 tid_t tid;
524 int need_to_start = 0;
523 525
524 read_lock(&journal->j_state_lock); 526 read_lock(&journal->j_state_lock);
525 if (journal->j_running_transaction && !current->journal_info) { 527 if (journal->j_running_transaction && !current->journal_info) {
526 transaction = journal->j_running_transaction; 528 transaction = journal->j_running_transaction;
527 __jbd2_log_start_commit(journal, transaction->t_tid); 529 if (!tid_geq(journal->j_commit_request, transaction->t_tid))
530 need_to_start = 1;
528 } else if (journal->j_committing_transaction) 531 } else if (journal->j_committing_transaction)
529 transaction = journal->j_committing_transaction; 532 transaction = journal->j_committing_transaction;
530 533
@@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
535 538
536 tid = transaction->t_tid; 539 tid = transaction->t_tid;
537 read_unlock(&journal->j_state_lock); 540 read_unlock(&journal->j_state_lock);
541 if (need_to_start)
542 jbd2_log_start_commit(journal, tid);
538 jbd2_log_wait_commit(journal, tid); 543 jbd2_log_wait_commit(journal, tid);
539 return 1; 544 return 1;
540} 545}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index faad2bd787c7..1d1191050f99 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction)
117static int start_this_handle(journal_t *journal, handle_t *handle, 117static int start_this_handle(journal_t *journal, handle_t *handle,
118 int gfp_mask) 118 int gfp_mask)
119{ 119{
120 transaction_t *transaction; 120 transaction_t *transaction, *new_transaction = NULL;
121 int needed; 121 tid_t tid;
122 int nblocks = handle->h_buffer_credits; 122 int needed, need_to_start;
123 transaction_t *new_transaction = NULL; 123 int nblocks = handle->h_buffer_credits;
124 124
125 if (nblocks > journal->j_max_transaction_buffers) { 125 if (nblocks > journal->j_max_transaction_buffers) {
126 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 126 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -222,8 +222,11 @@ repeat:
222 atomic_sub(nblocks, &transaction->t_outstanding_credits); 222 atomic_sub(nblocks, &transaction->t_outstanding_credits);
223 prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 223 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
224 TASK_UNINTERRUPTIBLE); 224 TASK_UNINTERRUPTIBLE);
225 __jbd2_log_start_commit(journal, transaction->t_tid); 225 tid = transaction->t_tid;
226 need_to_start = !tid_geq(journal->j_commit_request, tid);
226 read_unlock(&journal->j_state_lock); 227 read_unlock(&journal->j_state_lock);
228 if (need_to_start)
229 jbd2_log_start_commit(journal, tid);
227 schedule(); 230 schedule();
228 finish_wait(&journal->j_wait_transaction_locked, &wait); 231 finish_wait(&journal->j_wait_transaction_locked, &wait);
229 goto repeat; 232 goto repeat;
@@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
442{ 445{
443 transaction_t *transaction = handle->h_transaction; 446 transaction_t *transaction = handle->h_transaction;
444 journal_t *journal = transaction->t_journal; 447 journal_t *journal = transaction->t_journal;
445 int ret; 448 tid_t tid;
449 int need_to_start, ret;
446 450
447 /* If we've had an abort of any type, don't even think about 451 /* If we've had an abort of any type, don't even think about
448 * actually doing the restart! */ 452 * actually doing the restart! */
@@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
465 spin_unlock(&transaction->t_handle_lock); 469 spin_unlock(&transaction->t_handle_lock);
466 470
467 jbd_debug(2, "restarting handle %p\n", handle); 471 jbd_debug(2, "restarting handle %p\n", handle);
468 __jbd2_log_start_commit(journal, transaction->t_tid); 472 tid = transaction->t_tid;
473 need_to_start = !tid_geq(journal->j_commit_request, tid);
469 read_unlock(&journal->j_state_lock); 474 read_unlock(&journal->j_state_lock);
475 if (need_to_start)
476 jbd2_log_start_commit(journal, tid);
470 477
471 lock_map_release(&handle->h_lockdep_map); 478 lock_map_release(&handle->h_lockdep_map);
472 handle->h_buffer_credits = nblocks; 479 handle->h_buffer_credits = nblocks;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 5f1bcb2f06f3..b7c99bfb3da6 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -520,7 +520,7 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
520 struct nsm_handle *nsm, 520 struct nsm_handle *nsm,
521 const struct nlm_reboot *info) 521 const struct nlm_reboot *info)
522{ 522{
523 struct nlm_host *host = NULL; 523 struct nlm_host *host;
524 struct hlist_head *chain; 524 struct hlist_head *chain;
525 struct hlist_node *pos; 525 struct hlist_node *pos;
526 526
@@ -532,12 +532,13 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
532 host->h_state++; 532 host->h_state++;
533 533
534 nlm_get_host(host); 534 nlm_get_host(host);
535 goto out; 535 mutex_unlock(&nlm_host_mutex);
536 return host;
536 } 537 }
537 } 538 }
538out: 539
539 mutex_unlock(&nlm_host_mutex); 540 mutex_unlock(&nlm_host_mutex);
540 return host; 541 return NULL;
541} 542}
542 543
543/** 544/**
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337ddfdbf..6e6777f1b4b2 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
213 new_de = minix_find_entry(new_dentry, &new_page); 213 new_de = minix_find_entry(new_dentry, &new_page);
214 if (!new_de) 214 if (!new_de)
215 goto out_dir; 215 goto out_dir;
216 inode_inc_link_count(old_inode);
217 minix_set_link(new_de, new_page, old_inode); 216 minix_set_link(new_de, new_page, old_inode);
218 new_inode->i_ctime = CURRENT_TIME_SEC; 217 new_inode->i_ctime = CURRENT_TIME_SEC;
219 if (dir_de) 218 if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
225 if (new_dir->i_nlink >= info->s_link_max) 224 if (new_dir->i_nlink >= info->s_link_max)
226 goto out_dir; 225 goto out_dir;
227 } 226 }
228 inode_inc_link_count(old_inode);
229 err = minix_add_link(new_dentry, old_inode); 227 err = minix_add_link(new_dentry, old_inode);
230 if (err) { 228 if (err)
231 inode_dec_link_count(old_inode);
232 goto out_dir; 229 goto out_dir;
233 }
234 if (dir_de) 230 if (dir_de)
235 inode_inc_link_count(new_dir); 231 inode_inc_link_count(new_dir);
236 } 232 }
237 233
238 minix_delete_entry(old_de, old_page); 234 minix_delete_entry(old_de, old_page);
239 inode_dec_link_count(old_inode); 235 mark_inode_dirty(old_inode);
240 236
241 if (dir_de) { 237 if (dir_de) {
242 minix_set_link(dir_de, dir_page, new_dir); 238 minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 7d77f24d32a9..0087cf9c2c6b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -455,14 +455,6 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
455 struct fs_struct *fs = current->fs; 455 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 456 struct dentry *parent = nd->path.dentry;
457 457
458 /*
459 * It can be possible to revalidate the dentry that we started
460 * the path walk with. force_reval_path may also revalidate the
461 * dentry already committed to the nameidata.
462 */
463 if (unlikely(parent == dentry))
464 return nameidata_drop_rcu(nd);
465
466 BUG_ON(!(nd->flags & LOOKUP_RCU)); 458 BUG_ON(!(nd->flags & LOOKUP_RCU));
467 if (nd->root.mnt) { 459 if (nd->root.mnt) {
468 spin_lock(&fs->lock); 460 spin_lock(&fs->lock);
@@ -561,39 +553,25 @@ static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
561 */ 553 */
562void release_open_intent(struct nameidata *nd) 554void release_open_intent(struct nameidata *nd)
563{ 555{
564 if (nd->intent.open.file->f_path.dentry == NULL) 556 struct file *file = nd->intent.open.file;
565 put_filp(nd->intent.open.file);
566 else
567 fput(nd->intent.open.file);
568}
569
570/*
571 * Call d_revalidate and handle filesystems that request rcu-walk
572 * to be dropped. This may be called and return in rcu-walk mode,
573 * regardless of success or error. If -ECHILD is returned, the caller
574 * must return -ECHILD back up the path walk stack so path walk may
575 * be restarted in ref-walk mode.
576 */
577static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
578{
579 int status;
580 557
581 status = dentry->d_op->d_revalidate(dentry, nd); 558 if (file && !IS_ERR(file)) {
582 if (status == -ECHILD) { 559 if (file->f_path.dentry == NULL)
583 if (nameidata_dentry_drop_rcu(nd, dentry)) 560 put_filp(file);
584 return status; 561 else
585 status = dentry->d_op->d_revalidate(dentry, nd); 562 fput(file);
586 } 563 }
564}
587 565
588 return status; 566static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
567{
568 return dentry->d_op->d_revalidate(dentry, nd);
589} 569}
590 570
591static inline struct dentry * 571static struct dentry *
592do_revalidate(struct dentry *dentry, struct nameidata *nd) 572do_revalidate(struct dentry *dentry, struct nameidata *nd)
593{ 573{
594 int status; 574 int status = d_revalidate(dentry, nd);
595
596 status = d_revalidate(dentry, nd);
597 if (unlikely(status <= 0)) { 575 if (unlikely(status <= 0)) {
598 /* 576 /*
599 * The dentry failed validation. 577 * The dentry failed validation.
@@ -602,24 +580,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
602 * to return a fail status. 580 * to return a fail status.
603 */ 581 */
604 if (status < 0) { 582 if (status < 0) {
605 /* If we're in rcu-walk, we don't have a ref */ 583 dput(dentry);
606 if (!(nd->flags & LOOKUP_RCU))
607 dput(dentry);
608 dentry = ERR_PTR(status); 584 dentry = ERR_PTR(status);
609 585 } else if (!d_invalidate(dentry)) {
610 } else { 586 dput(dentry);
611 /* Don't d_invalidate in rcu-walk mode */ 587 dentry = NULL;
612 if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
613 return ERR_PTR(-ECHILD);
614 if (!d_invalidate(dentry)) {
615 dput(dentry);
616 dentry = NULL;
617 }
618 } 588 }
619 } 589 }
620 return dentry; 590 return dentry;
621} 591}
622 592
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
623static inline int need_reval_dot(struct dentry *dentry) 616static inline int need_reval_dot(struct dentry *dentry)
624{ 617{
625 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) 618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
@@ -664,9 +657,6 @@ force_reval_path(struct path *path, struct nameidata *nd)
664 return 0; 657 return 0;
665 658
666 if (!status) { 659 if (!status) {
667 /* Don't d_invalidate in rcu-walk mode */
668 if (nameidata_drop_rcu(nd))
669 return -ECHILD;
670 d_invalidate(dentry); 660 d_invalidate(dentry);
671 status = -ESTALE; 661 status = -ESTALE;
672 } 662 }
@@ -773,6 +763,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
773 int error; 763 int error;
774 struct dentry *dentry = link->dentry; 764 struct dentry *dentry = link->dentry;
775 765
766 BUG_ON(nd->flags & LOOKUP_RCU);
767
776 touch_atime(link->mnt, dentry); 768 touch_atime(link->mnt, dentry);
777 nd_set_link(nd, NULL); 769 nd_set_link(nd, NULL);
778 770
@@ -803,10 +795,16 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
803 * Without that kind of total limit, nasty chains of consecutive 795 * Without that kind of total limit, nasty chains of consecutive
804 * symlinks can cause almost arbitrarily long lookups. 796 * symlinks can cause almost arbitrarily long lookups.
805 */ 797 */
806static inline int do_follow_link(struct path *path, struct nameidata *nd) 798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
807{ 799{
808 void *cookie; 800 void *cookie;
809 int err = -ELOOP; 801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
810 if (current->link_count >= MAX_NESTED_LINKS) 808 if (current->link_count >= MAX_NESTED_LINKS)
811 goto loop; 809 goto loop;
812 if (current->total_link_count >= 40) 810 if (current->total_link_count >= 40)
@@ -1251,9 +1249,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1251 return -ECHILD; 1249 return -ECHILD;
1252 1250
1253 nd->seq = seq; 1251 nd->seq = seq;
1254 if (dentry->d_flags & DCACHE_OP_REVALIDATE) 1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1255 goto need_revalidate; 1253 dentry = do_revalidate_rcu(dentry, nd);
1256done2: 1254 if (!dentry)
1255 goto need_lookup;
1256 if (IS_ERR(dentry))
1257 goto fail;
1258 if (!(nd->flags & LOOKUP_RCU))
1259 goto done;
1260 }
1257 path->mnt = mnt; 1261 path->mnt = mnt;
1258 path->dentry = dentry; 1262 path->dentry = dentry;
1259 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1263 if (likely(__follow_mount_rcu(nd, path, inode, false)))
@@ -1266,8 +1270,13 @@ done2:
1266 if (!dentry) 1270 if (!dentry)
1267 goto need_lookup; 1271 goto need_lookup;
1268found: 1272found:
1269 if (dentry->d_flags & DCACHE_OP_REVALIDATE) 1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1270 goto need_revalidate; 1274 dentry = do_revalidate(dentry, nd);
1275 if (!dentry)
1276 goto need_lookup;
1277 if (IS_ERR(dentry))
1278 goto fail;
1279 }
1271done: 1280done:
1272 path->mnt = mnt; 1281 path->mnt = mnt;
1273 path->dentry = dentry; 1282 path->dentry = dentry;
@@ -1309,16 +1318,6 @@ need_lookup:
1309 mutex_unlock(&dir->i_mutex); 1318 mutex_unlock(&dir->i_mutex);
1310 goto found; 1319 goto found;
1311 1320
1312need_revalidate:
1313 dentry = do_revalidate(dentry, nd);
1314 if (!dentry)
1315 goto need_lookup;
1316 if (IS_ERR(dentry))
1317 goto fail;
1318 if (nd->flags & LOOKUP_RCU)
1319 goto done2;
1320 goto done;
1321
1322fail: 1321fail:
1323 return PTR_ERR(dentry); 1322 return PTR_ERR(dentry);
1324} 1323}
@@ -1415,11 +1414,7 @@ exec_again:
1415 goto out_dput; 1414 goto out_dput;
1416 1415
1417 if (inode->i_op->follow_link) { 1416 if (inode->i_op->follow_link) {
1418 /* We commonly drop rcu-walk here */ 1417 err = do_follow_link(inode, &next, nd);
1419 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1420 return -ECHILD;
1421 BUG_ON(inode != next.dentry->d_inode);
1422 err = do_follow_link(&next, nd);
1423 if (err) 1418 if (err)
1424 goto return_err; 1419 goto return_err;
1425 nd->inode = nd->path.dentry->d_inode; 1420 nd->inode = nd->path.dentry->d_inode;
@@ -1463,10 +1458,7 @@ last_component:
1463 break; 1458 break;
1464 if (inode && unlikely(inode->i_op->follow_link) && 1459 if (inode && unlikely(inode->i_op->follow_link) &&
1465 (lookup_flags & LOOKUP_FOLLOW)) { 1460 (lookup_flags & LOOKUP_FOLLOW)) {
1466 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) 1461 err = do_follow_link(inode, &next, nd);
1467 return -ECHILD;
1468 BUG_ON(inode != next.dentry->d_inode);
1469 err = do_follow_link(&next, nd);
1470 if (err) 1462 if (err)
1471 goto return_err; 1463 goto return_err;
1472 nd->inode = nd->path.dentry->d_inode; 1464 nd->inode = nd->path.dentry->d_inode;
@@ -1500,12 +1492,15 @@ return_reval:
1500 * We may need to check the cached dentry for staleness. 1492 * We may need to check the cached dentry for staleness.
1501 */ 1493 */
1502 if (need_reval_dot(nd->path.dentry)) { 1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1503 /* Note: we do not d_invalidate() */ 1497 /* Note: we do not d_invalidate() */
1504 err = d_revalidate(nd->path.dentry, nd); 1498 err = d_revalidate(nd->path.dentry, nd);
1505 if (!err) 1499 if (!err)
1506 err = -ESTALE; 1500 err = -ESTALE;
1507 if (err < 0) 1501 if (err < 0)
1508 break; 1502 break;
1503 return 0;
1509 } 1504 }
1510return_base: 1505return_base:
1511 if (nameidata_drop_rcu_last_maybe(nd)) 1506 if (nameidata_drop_rcu_last_maybe(nd))
@@ -2265,8 +2260,6 @@ static struct file *finish_open(struct nameidata *nd,
2265 return filp; 2260 return filp;
2266 2261
2267exit: 2262exit:
2268 if (!IS_ERR(nd->intent.open.file))
2269 release_open_intent(nd);
2270 path_put(&nd->path); 2263 path_put(&nd->path);
2271 return ERR_PTR(error); 2264 return ERR_PTR(error);
2272} 2265}
@@ -2389,8 +2382,6 @@ exit_mutex_unlock:
2389exit_dput: 2382exit_dput:
2390 path_put_conditional(path, nd); 2383 path_put_conditional(path, nd);
2391exit: 2384exit:
2392 if (!IS_ERR(nd->intent.open.file))
2393 release_open_intent(nd);
2394 path_put(&nd->path); 2385 path_put(&nd->path);
2395 return ERR_PTR(error); 2386 return ERR_PTR(error);
2396} 2387}
@@ -2477,6 +2468,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
2477 } 2468 }
2478 audit_inode(pathname, nd.path.dentry); 2469 audit_inode(pathname, nd.path.dentry);
2479 filp = finish_open(&nd, open_flag, acc_mode); 2470 filp = finish_open(&nd, open_flag, acc_mode);
2471 release_open_intent(&nd);
2480 return filp; 2472 return filp;
2481 2473
2482creat: 2474creat:
@@ -2553,6 +2545,7 @@ out:
2553 path_put(&nd.root); 2545 path_put(&nd.root);
2554 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2546 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
2555 goto reval; 2547 goto reval;
2548 release_open_intent(&nd);
2556 return filp; 2549 return filp;
2557 2550
2558exit_dput: 2551exit_dput:
@@ -2560,8 +2553,6 @@ exit_dput:
2560out_path: 2553out_path:
2561 path_put(&nd.path); 2554 path_put(&nd.path);
2562out_filp: 2555out_filp:
2563 if (!IS_ERR(nd.intent.open.file))
2564 release_open_intent(&nd);
2565 filp = ERR_PTR(error); 2556 filp = ERR_PTR(error);
2566 goto out; 2557 goto out;
2567} 2558}
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b95371696..d1edf26025dc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1244,7 +1244,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1244 */ 1244 */
1245 br_write_lock(vfsmount_lock); 1245 br_write_lock(vfsmount_lock);
1246 if (mnt_get_count(mnt) != 2) { 1246 if (mnt_get_count(mnt) != 2) {
1247 br_write_lock(vfsmount_lock); 1247 br_write_unlock(vfsmount_lock);
1248 return -EBUSY; 1248 return -EBUSY;
1249 } 1249 }
1250 br_write_unlock(vfsmount_lock); 1250 br_write_unlock(vfsmount_lock);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 199016528fcb..e3d294269058 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -135,33 +135,6 @@ out_err:
135 135
136#if defined(CONFIG_NFS_V4_1) 136#if defined(CONFIG_NFS_V4_1)
137/* 137/*
138 * * CB_SEQUENCE operations will fail until the callback sessionid is set.
139 * */
140int nfs4_set_callback_sessionid(struct nfs_client *clp)
141{
142 struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
143 struct nfs4_sessionid *bc_sid;
144
145 if (!serv->sv_bc_xprt)
146 return -EINVAL;
147
148 /* on success freed in xprt_free */
149 bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL);
150 if (!bc_sid)
151 return -ENOMEM;
152 memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
153 NFS4_MAX_SESSIONID_LEN);
154 spin_lock_bh(&serv->sv_cb_lock);
155 serv->sv_bc_xprt->xpt_bc_sid = bc_sid;
156 spin_unlock_bh(&serv->sv_cb_lock);
157 dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__,
158 ((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
159 ((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
160 serv->sv_bc_xprt);
161 return 0;
162}
163
164/*
165 * The callback service for NFSv4.1 callbacks 138 * The callback service for NFSv4.1 callbacks
166 */ 139 */
167static int 140static int
@@ -266,10 +239,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
266 struct nfs_callback_data *cb_info) 239 struct nfs_callback_data *cb_info)
267{ 240{
268} 241}
269int nfs4_set_callback_sessionid(struct nfs_client *clp)
270{
271 return 0;
272}
273#endif /* CONFIG_NFS_V4_1 */ 242#endif /* CONFIG_NFS_V4_1 */
274 243
275/* 244/*
@@ -359,78 +328,58 @@ void nfs_callback_down(int minorversion)
359 mutex_unlock(&nfs_callback_mutex); 328 mutex_unlock(&nfs_callback_mutex);
360} 329}
361 330
362static int check_gss_callback_principal(struct nfs_client *clp, 331/* Boolean check of RPC_AUTH_GSS principal */
363 struct svc_rqst *rqstp) 332int
333check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
364{ 334{
365 struct rpc_clnt *r = clp->cl_rpcclient; 335 struct rpc_clnt *r = clp->cl_rpcclient;
366 char *p = svc_gss_principal(rqstp); 336 char *p = svc_gss_principal(rqstp);
367 337
338 if (rqstp->rq_authop->flavour != RPC_AUTH_GSS)
339 return 1;
340
368 /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */ 341 /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */
369 if (clp->cl_minorversion != 0) 342 if (clp->cl_minorversion != 0)
370 return SVC_DROP; 343 return 0;
371 /* 344 /*
372 * It might just be a normal user principal, in which case 345 * It might just be a normal user principal, in which case
373 * userspace won't bother to tell us the name at all. 346 * userspace won't bother to tell us the name at all.
374 */ 347 */
375 if (p == NULL) 348 if (p == NULL)
376 return SVC_DENIED; 349 return 0;
377 350
378 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ 351 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
379 352
380 if (memcmp(p, "nfs@", 4) != 0) 353 if (memcmp(p, "nfs@", 4) != 0)
381 return SVC_DENIED; 354 return 0;
382 p += 4; 355 p += 4;
383 if (strcmp(p, r->cl_server) != 0) 356 if (strcmp(p, r->cl_server) != 0)
384 return SVC_DENIED; 357 return 0;
385 return SVC_OK; 358 return 1;
386} 359}
387 360
388/* pg_authenticate method helper */ 361/*
389static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp) 362 * pg_authenticate method for nfsv4 callback threads.
390{ 363 *
391 struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp); 364 * The authflavor has been negotiated, so an incorrect flavor is a server
392 int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0; 365 * bug. Drop packets with incorrect authflavor.
393 366 *
394 dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc); 367 * All other checking done after NFS decoding where the nfs_client can be
395 if (svc_is_backchannel(rqstp)) 368 * found in nfs4_callback_compound
396 /* Sessionid (usually) set after CB_NULL ping */ 369 */
397 return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid,
398 is_cb_compound);
399 else
400 /* No callback identifier in pg_authenticate */
401 return nfs4_find_client_no_ident(svc_addr(rqstp));
402}
403
404/* pg_authenticate method for nfsv4 callback threads. */
405static int nfs_callback_authenticate(struct svc_rqst *rqstp) 370static int nfs_callback_authenticate(struct svc_rqst *rqstp)
406{ 371{
407 struct nfs_client *clp;
408 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
409 int ret = SVC_OK;
410
411 /* Don't talk to strangers */
412 clp = nfs_cb_find_client(rqstp);
413 if (clp == NULL)
414 return SVC_DROP;
415
416 dprintk("%s: %s NFSv4 callback!\n", __func__,
417 svc_print_addr(rqstp, buf, sizeof(buf)));
418
419 switch (rqstp->rq_authop->flavour) { 372 switch (rqstp->rq_authop->flavour) {
420 case RPC_AUTH_NULL: 373 case RPC_AUTH_NULL:
421 if (rqstp->rq_proc != CB_NULL) 374 if (rqstp->rq_proc != CB_NULL)
422 ret = SVC_DENIED; 375 return SVC_DROP;
423 break; 376 break;
424 case RPC_AUTH_UNIX: 377 case RPC_AUTH_GSS:
425 break; 378 /* No RPC_AUTH_GSS support yet in NFSv4.1 */
426 case RPC_AUTH_GSS: 379 if (svc_is_backchannel(rqstp))
427 ret = check_gss_callback_principal(clp, rqstp); 380 return SVC_DROP;
428 break;
429 default:
430 ret = SVC_DENIED;
431 } 381 }
432 nfs_put_client(clp); 382 return SVC_OK;
433 return ret;
434} 383}
435 384
436/* 385/*
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index d3b44f9bd747..46d93ce7311b 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -7,6 +7,7 @@
7 */ 7 */
8#ifndef __LINUX_FS_NFS_CALLBACK_H 8#ifndef __LINUX_FS_NFS_CALLBACK_H
9#define __LINUX_FS_NFS_CALLBACK_H 9#define __LINUX_FS_NFS_CALLBACK_H
10#include <linux/sunrpc/svc.h>
10 11
11#define NFS4_CALLBACK 0x40000000 12#define NFS4_CALLBACK 0x40000000
12#define NFS4_CALLBACK_XDRSIZE 2048 13#define NFS4_CALLBACK_XDRSIZE 2048
@@ -37,7 +38,6 @@ enum nfs4_callback_opnum {
37struct cb_process_state { 38struct cb_process_state {
38 __be32 drc_status; 39 __be32 drc_status;
39 struct nfs_client *clp; 40 struct nfs_client *clp;
40 struct nfs4_sessionid *svc_sid; /* v4.1 callback service sessionid */
41}; 41};
42 42
43struct cb_compound_hdr_arg { 43struct cb_compound_hdr_arg {
@@ -168,7 +168,7 @@ extern unsigned nfs4_callback_layoutrecall(
168extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); 168extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
169extern void nfs4_cb_take_slot(struct nfs_client *clp); 169extern void nfs4_cb_take_slot(struct nfs_client *clp);
170#endif /* CONFIG_NFS_V4_1 */ 170#endif /* CONFIG_NFS_V4_1 */
171 171extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
172extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, 172extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
173 struct cb_getattrres *res, 173 struct cb_getattrres *res,
174 struct cb_process_state *cps); 174 struct cb_process_state *cps);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 4bb91cb2620d..89587573fe50 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -373,17 +373,11 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
373{ 373{
374 struct nfs_client *clp; 374 struct nfs_client *clp;
375 int i; 375 int i;
376 __be32 status; 376 __be32 status = htonl(NFS4ERR_BADSESSION);
377 377
378 cps->clp = NULL; 378 cps->clp = NULL;
379 379
380 status = htonl(NFS4ERR_BADSESSION); 380 clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
381 /* Incoming session must match the callback session */
382 if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN))
383 goto out;
384
385 clp = nfs4_find_client_sessionid(args->csa_addr,
386 &args->csa_sessionid, 1);
387 if (clp == NULL) 381 if (clp == NULL)
388 goto out; 382 goto out;
389 383
@@ -414,9 +408,9 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
414 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 408 res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
415 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; 409 res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
416 nfs4_cb_take_slot(clp); 410 nfs4_cb_take_slot(clp);
417 cps->clp = clp; /* put in nfs4_callback_compound */
418 411
419out: 412out:
413 cps->clp = clp; /* put in nfs4_callback_compound */
420 for (i = 0; i < args->csa_nrclists; i++) 414 for (i = 0; i < args->csa_nrclists; i++)
421 kfree(args->csa_rclists[i].rcl_refcalls); 415 kfree(args->csa_rclists[i].rcl_refcalls);
422 kfree(args->csa_rclists); 416 kfree(args->csa_rclists);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 23112c263f81..14e0f9371d14 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -794,10 +794,9 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
794 794
795 if (hdr_arg.minorversion == 0) { 795 if (hdr_arg.minorversion == 0) {
796 cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); 796 cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident);
797 if (!cps.clp) 797 if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
798 return rpc_drop_reply; 798 return rpc_drop_reply;
799 } else 799 }
800 cps.svc_sid = bc_xprt_sid(rqstp);
801 800
802 hdr_res.taglen = hdr_arg.taglen; 801 hdr_res.taglen = hdr_arg.taglen;
803 hdr_res.tag = hdr_arg.tag; 802 hdr_res.tag = hdr_arg.tag;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 192f2f860265..bd3ca32879e7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1206,16 +1206,11 @@ nfs4_find_client_ident(int cb_ident)
1206 * For CB_COMPOUND calls, find a client by IP address, protocol version, 1206 * For CB_COMPOUND calls, find a client by IP address, protocol version,
1207 * minorversion, and sessionID 1207 * minorversion, and sessionID
1208 * 1208 *
1209 * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service
1210 * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL
1211 * can arrive before the callback sessionid is set. For CB_NULL calls,
1212 * find a client by IP address protocol version, and minorversion.
1213 *
1214 * Returns NULL if no such client 1209 * Returns NULL if no such client
1215 */ 1210 */
1216struct nfs_client * 1211struct nfs_client *
1217nfs4_find_client_sessionid(const struct sockaddr *addr, 1212nfs4_find_client_sessionid(const struct sockaddr *addr,
1218 struct nfs4_sessionid *sid, int is_cb_compound) 1213 struct nfs4_sessionid *sid)
1219{ 1214{
1220 struct nfs_client *clp; 1215 struct nfs_client *clp;
1221 1216
@@ -1227,9 +1222,9 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
1227 if (!nfs4_has_session(clp)) 1222 if (!nfs4_has_session(clp))
1228 continue; 1223 continue;
1229 1224
1230 /* Match sessionid unless cb_null call*/ 1225 /* Match sessionid*/
1231 if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data, 1226 if (memcmp(clp->cl_session->sess_id.data,
1232 sid->data, NFS4_MAX_SESSIONID_LEN) != 0)) 1227 sid->data, NFS4_MAX_SESSIONID_LEN) != 0)
1233 continue; 1228 continue;
1234 1229
1235 atomic_inc(&clp->cl_count); 1230 atomic_inc(&clp->cl_count);
@@ -1244,7 +1239,7 @@ nfs4_find_client_sessionid(const struct sockaddr *addr,
1244 1239
1245struct nfs_client * 1240struct nfs_client *
1246nfs4_find_client_sessionid(const struct sockaddr *addr, 1241nfs4_find_client_sessionid(const struct sockaddr *addr,
1247 struct nfs4_sessionid *sid, int is_cb_compound) 1242 struct nfs4_sessionid *sid)
1248{ 1243{
1249 return NULL; 1244 return NULL;
1250} 1245}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 364e4328f392..bbbc6bf5cb2e 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -23,8 +23,6 @@
23 23
24static void nfs_do_free_delegation(struct nfs_delegation *delegation) 24static void nfs_do_free_delegation(struct nfs_delegation *delegation)
25{ 25{
26 if (delegation->cred)
27 put_rpccred(delegation->cred);
28 kfree(delegation); 26 kfree(delegation);
29} 27}
30 28
@@ -37,6 +35,10 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
37 35
38static void nfs_free_delegation(struct nfs_delegation *delegation) 36static void nfs_free_delegation(struct nfs_delegation *delegation)
39{ 37{
38 if (delegation->cred) {
39 put_rpccred(delegation->cred);
40 delegation->cred = NULL;
41 }
40 call_rcu(&delegation->rcu, nfs_free_delegation_callback); 42 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
41} 43}
42 44
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e6ace0d93c71..9943a75bb6d1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -407,15 +407,18 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
407 pos += vec->iov_len; 407 pos += vec->iov_len;
408 } 408 }
409 409
410 /*
411 * If no bytes were started, return the error, and let the
412 * generic layer handle the completion.
413 */
414 if (requested_bytes == 0) {
415 nfs_direct_req_release(dreq);
416 return result < 0 ? result : -EIO;
417 }
418
410 if (put_dreq(dreq)) 419 if (put_dreq(dreq))
411 nfs_direct_complete(dreq); 420 nfs_direct_complete(dreq);
412 421 return 0;
413 if (requested_bytes != 0)
414 return 0;
415
416 if (result < 0)
417 return result;
418 return -EIO;
419} 422}
420 423
421static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, 424static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
@@ -841,15 +844,18 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
841 pos += vec->iov_len; 844 pos += vec->iov_len;
842 } 845 }
843 846
847 /*
848 * If no bytes were started, return the error, and let the
849 * generic layer handle the completion.
850 */
851 if (requested_bytes == 0) {
852 nfs_direct_req_release(dreq);
853 return result < 0 ? result : -EIO;
854 }
855
844 if (put_dreq(dreq)) 856 if (put_dreq(dreq))
845 nfs_direct_write_complete(dreq, dreq->inode); 857 nfs_direct_write_complete(dreq, dreq->inode);
846 858 return 0;
847 if (requested_bytes != 0)
848 return 0;
849
850 if (result < 0)
851 return result;
852 return -EIO;
853} 859}
854 860
855static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, 861static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d8512423ba72..1cc600e77bb4 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -881,9 +881,10 @@ out:
881 return ret; 881 return ret;
882} 882}
883 883
884static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) 884static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
885{ 885{
886 struct nfs_inode *nfsi = NFS_I(inode); 886 struct nfs_inode *nfsi = NFS_I(inode);
887 unsigned long ret = 0;
887 888
888 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) 889 if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
889 && (fattr->valid & NFS_ATTR_FATTR_CHANGE) 890 && (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@@ -891,25 +892,32 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
891 nfsi->change_attr = fattr->change_attr; 892 nfsi->change_attr = fattr->change_attr;
892 if (S_ISDIR(inode->i_mode)) 893 if (S_ISDIR(inode->i_mode))
893 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 894 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
895 ret |= NFS_INO_INVALID_ATTR;
894 } 896 }
895 /* If we have atomic WCC data, we may update some attributes */ 897 /* If we have atomic WCC data, we may update some attributes */
896 if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) 898 if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
897 && (fattr->valid & NFS_ATTR_FATTR_CTIME) 899 && (fattr->valid & NFS_ATTR_FATTR_CTIME)
898 && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) 900 && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) {
899 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 901 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
902 ret |= NFS_INO_INVALID_ATTR;
903 }
900 904
901 if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) 905 if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
902 && (fattr->valid & NFS_ATTR_FATTR_MTIME) 906 && (fattr->valid & NFS_ATTR_FATTR_MTIME)
903 && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { 907 && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
904 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 908 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
905 if (S_ISDIR(inode->i_mode)) 909 if (S_ISDIR(inode->i_mode))
906 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 910 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
911 ret |= NFS_INO_INVALID_ATTR;
907 } 912 }
908 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) 913 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
909 && (fattr->valid & NFS_ATTR_FATTR_SIZE) 914 && (fattr->valid & NFS_ATTR_FATTR_SIZE)
910 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) 915 && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
911 && nfsi->npages == 0) 916 && nfsi->npages == 0) {
912 i_size_write(inode, nfs_size_to_loff_t(fattr->size)); 917 i_size_write(inode, nfs_size_to_loff_t(fattr->size));
918 ret |= NFS_INO_INVALID_ATTR;
919 }
920 return ret;
913} 921}
914 922
915/** 923/**
@@ -1223,7 +1231,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1223 | NFS_INO_REVAL_PAGECACHE); 1231 | NFS_INO_REVAL_PAGECACHE);
1224 1232
1225 /* Do atomic weak cache consistency updates */ 1233 /* Do atomic weak cache consistency updates */
1226 nfs_wcc_update_inode(inode, fattr); 1234 invalid |= nfs_wcc_update_inode(inode, fattr);
1227 1235
1228 /* More cache consistency checks */ 1236 /* More cache consistency checks */
1229 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { 1237 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4644f04b4b46..cf9fdbdabc67 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -133,8 +133,7 @@ extern void nfs_put_client(struct nfs_client *);
133extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); 133extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *);
134extern struct nfs_client *nfs4_find_client_ident(int); 134extern struct nfs_client *nfs4_find_client_ident(int);
135extern struct nfs_client * 135extern struct nfs_client *
136nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *, 136nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *);
137 int);
138extern struct nfs_server *nfs_create_server( 137extern struct nfs_server *nfs_create_server(
139 const struct nfs_parsed_mount_data *, 138 const struct nfs_parsed_mount_data *,
140 struct nfs_fh *); 139 struct nfs_fh *);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 9f88c5f4c7e2..274342771655 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -311,8 +311,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
311 if (!nfs_server_capable(inode, NFS_CAP_ACLS)) 311 if (!nfs_server_capable(inode, NFS_CAP_ACLS))
312 goto out; 312 goto out;
313 313
314 /* We are doing this here, because XDR marshalling can only 314 /* We are doing this here because XDR marshalling does not
315 return -ENOMEM. */ 315 * return any results, it BUGs. */
316 status = -ENOSPC; 316 status = -ENOSPC;
317 if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) 317 if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES)
318 goto out; 318 goto out;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 01c5e8b1941d..183c6b123d0f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1328,10 +1328,13 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
1328 1328
1329 encode_nfs_fh3(xdr, NFS_FH(args->inode)); 1329 encode_nfs_fh3(xdr, NFS_FH(args->inode));
1330 encode_uint32(xdr, args->mask); 1330 encode_uint32(xdr, args->mask);
1331
1332 base = req->rq_slen;
1331 if (args->npages != 0) 1333 if (args->npages != 0)
1332 xdr_write_pages(xdr, args->pages, 0, args->len); 1334 xdr_write_pages(xdr, args->pages, 0, args->len);
1335 else
1336 xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE);
1333 1337
1334 base = req->rq_slen;
1335 error = nfsacl_encode(xdr->buf, base, args->inode, 1338 error = nfsacl_encode(xdr->buf, base, args->inode,
1336 (args->mask & NFS_ACL) ? 1339 (args->mask & NFS_ACL) ?
1337 args->acl_access : NULL, 1, 0); 1340 args->acl_access : NULL, 1, 0);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 51fe64ace55a..f5c9b125e8cc 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -214,7 +214,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
214 214
215 /* ipv6 length plus port is legal */ 215 /* ipv6 length plus port is legal */
216 if (rlen > INET6_ADDRSTRLEN + 8) { 216 if (rlen > INET6_ADDRSTRLEN + 8) {
217 dprintk("%s Invalid address, length %d\n", __func__, 217 dprintk("%s: Invalid address, length %d\n", __func__,
218 rlen); 218 rlen);
219 goto out_err; 219 goto out_err;
220 } 220 }
@@ -225,6 +225,11 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
225 /* replace the port dots with dashes for the in4_pton() delimiter*/ 225 /* replace the port dots with dashes for the in4_pton() delimiter*/
226 for (i = 0; i < 2; i++) { 226 for (i = 0; i < 2; i++) {
227 char *res = strrchr(buf, '.'); 227 char *res = strrchr(buf, '.');
228 if (!res) {
229 dprintk("%s: Failed finding expected dots in port\n",
230 __func__);
231 goto out_free;
232 }
228 *res = '-'; 233 *res = '-';
229 } 234 }
230 235
@@ -240,7 +245,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
240 port = htons((tmp[0] << 8) | (tmp[1])); 245 port = htons((tmp[0] << 8) | (tmp[1]));
241 246
242 ds = nfs4_pnfs_ds_add(inode, ip_addr, port); 247 ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
243 dprintk("%s Decoded address and port %s\n", __func__, buf); 248 dprintk("%s: Decoded address and port %s\n", __func__, buf);
244out_free: 249out_free:
245 kfree(buf); 250 kfree(buf);
246out_err: 251out_err:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9d992b0346e3..1ff76acc7e98 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -50,6 +50,8 @@
50#include <linux/module.h> 50#include <linux/module.h>
51#include <linux/sunrpc/bc_xprt.h> 51#include <linux/sunrpc/bc_xprt.h>
52#include <linux/xattr.h> 52#include <linux/xattr.h>
53#include <linux/utsname.h>
54#include <linux/mm.h>
53 55
54#include "nfs4_fs.h" 56#include "nfs4_fs.h"
55#include "delegation.h" 57#include "delegation.h"
@@ -3251,6 +3253,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
3251 } 3253 }
3252} 3254}
3253 3255
3256static int buf_to_pages_noslab(const void *buf, size_t buflen,
3257 struct page **pages, unsigned int *pgbase)
3258{
3259 struct page *newpage, **spages;
3260 int rc = 0;
3261 size_t len;
3262 spages = pages;
3263
3264 do {
3265 len = min(PAGE_CACHE_SIZE, buflen);
3266 newpage = alloc_page(GFP_KERNEL);
3267
3268 if (newpage == NULL)
3269 goto unwind;
3270 memcpy(page_address(newpage), buf, len);
3271 buf += len;
3272 buflen -= len;
3273 *pages++ = newpage;
3274 rc++;
3275 } while (buflen != 0);
3276
3277 return rc;
3278
3279unwind:
3280 for(; rc > 0; rc--)
3281 __free_page(spages[rc-1]);
3282 return -ENOMEM;
3283}
3284
3254struct nfs4_cached_acl { 3285struct nfs4_cached_acl {
3255 int cached; 3286 int cached;
3256 size_t len; 3287 size_t len;
@@ -3419,13 +3450,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3419 .rpc_argp = &arg, 3450 .rpc_argp = &arg,
3420 .rpc_resp = &res, 3451 .rpc_resp = &res,
3421 }; 3452 };
3422 int ret; 3453 int ret, i;
3423 3454
3424 if (!nfs4_server_supports_acls(server)) 3455 if (!nfs4_server_supports_acls(server))
3425 return -EOPNOTSUPP; 3456 return -EOPNOTSUPP;
3457 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3458 if (i < 0)
3459 return i;
3426 nfs_inode_return_delegation(inode); 3460 nfs_inode_return_delegation(inode);
3427 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3428 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3461 ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
3462
3463 /*
3464 * Free each page after tx, so the only ref left is
3465 * held by the network stack
3466 */
3467 for (; i > 0; i--)
3468 put_page(pages[i-1]);
3469
3429 /* 3470 /*
3430 * Acl update can result in inode attribute update. 3471 * Acl update can result in inode attribute update.
3431 * so mark the attribute cache invalid. 3472 * so mark the attribute cache invalid.
@@ -4572,27 +4613,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4572 *p = htonl((u32)clp->cl_boot_time.tv_nsec); 4613 *p = htonl((u32)clp->cl_boot_time.tv_nsec);
4573 args.verifier = &verifier; 4614 args.verifier = &verifier;
4574 4615
4575 while (1) { 4616 args.id_len = scnprintf(args.id, sizeof(args.id),
4576 args.id_len = scnprintf(args.id, sizeof(args.id), 4617 "%s/%s.%s/%u",
4577 "%s/%s %u", 4618 clp->cl_ipaddr,
4578 clp->cl_ipaddr, 4619 init_utsname()->nodename,
4579 rpc_peeraddr2str(clp->cl_rpcclient, 4620 init_utsname()->domainname,
4580 RPC_DISPLAY_ADDR), 4621 clp->cl_rpcclient->cl_auth->au_flavor);
4581 clp->cl_id_uniquifier);
4582
4583 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
4584
4585 if (status != -NFS4ERR_CLID_INUSE)
4586 break;
4587
4588 if (signalled())
4589 break;
4590
4591 if (++clp->cl_id_uniquifier == 0)
4592 break;
4593 }
4594 4622
4595 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 4623 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
4624 if (!status)
4625 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
4596 dprintk("<-- %s status= %d\n", __func__, status); 4626 dprintk("<-- %s status= %d\n", __func__, status);
4597 return status; 4627 return status;
4598} 4628}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2336d532cf66..e6742b57a04c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -232,12 +232,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
232 status = nfs4_proc_create_session(clp); 232 status = nfs4_proc_create_session(clp);
233 if (status != 0) 233 if (status != 0)
234 goto out; 234 goto out;
235 status = nfs4_set_callback_sessionid(clp);
236 if (status != 0) {
237 printk(KERN_WARNING "Sessionid not set. No callback service\n");
238 nfs_callback_down(1);
239 status = 0;
240 }
241 nfs41_setup_state_renewal(clp); 235 nfs41_setup_state_renewal(clp);
242 nfs_mark_client_ready(clp, NFS_CS_READY); 236 nfs_mark_client_ready(clp, NFS_CS_READY);
243out: 237out:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2ab8e5cb8f59..4e2c168b6ee9 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6086,11 +6086,11 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6086 __be32 *p = xdr_inline_decode(xdr, 4); 6086 __be32 *p = xdr_inline_decode(xdr, 4);
6087 if (unlikely(!p)) 6087 if (unlikely(!p))
6088 goto out_overflow; 6088 goto out_overflow;
6089 if (!ntohl(*p++)) { 6089 if (*p == xdr_zero) {
6090 p = xdr_inline_decode(xdr, 4); 6090 p = xdr_inline_decode(xdr, 4);
6091 if (unlikely(!p)) 6091 if (unlikely(!p))
6092 goto out_overflow; 6092 goto out_overflow;
6093 if (!ntohl(*p++)) 6093 if (*p == xdr_zero)
6094 return -EAGAIN; 6094 return -EAGAIN;
6095 entry->eof = 1; 6095 entry->eof = 1;
6096 return -EBADCOOKIE; 6096 return -EBADCOOKIE;
@@ -6101,7 +6101,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6101 goto out_overflow; 6101 goto out_overflow;
6102 entry->prev_cookie = entry->cookie; 6102 entry->prev_cookie = entry->cookie;
6103 p = xdr_decode_hyper(p, &entry->cookie); 6103 p = xdr_decode_hyper(p, &entry->cookie);
6104 entry->len = ntohl(*p++); 6104 entry->len = be32_to_cpup(p);
6105 6105
6106 p = xdr_inline_decode(xdr, entry->len); 6106 p = xdr_inline_decode(xdr, entry->len);
6107 if (unlikely(!p)) 6107 if (unlikely(!p))
@@ -6132,9 +6132,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6132 if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE) 6132 if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE)
6133 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); 6133 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
6134 6134
6135 if (verify_attr_len(xdr, p, len) < 0)
6136 goto out_overflow;
6137
6138 return 0; 6135 return 0;
6139 6136
6140out_overflow: 6137out_overflow:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bc4089769735..1b1bc1a0fb0a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -951,7 +951,7 @@ pnfs_put_deviceid_cache(struct nfs_client *clp)
951{ 951{
952 struct pnfs_deviceid_cache *local = clp->cl_devid_cache; 952 struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
953 953
954 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache); 954 dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref));
955 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { 955 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
956 int i; 956 int i;
957 /* Verify cache is empty */ 957 /* Verify cache is empty */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 10d648ea128b..c8278f4046cb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -932,7 +932,7 @@ out_bad:
932 while (!list_empty(&list)) { 932 while (!list_empty(&list)) {
933 data = list_entry(list.next, struct nfs_write_data, pages); 933 data = list_entry(list.next, struct nfs_write_data, pages);
934 list_del(&data->pages); 934 list_del(&data->pages);
935 nfs_writedata_release(data); 935 nfs_writedata_free(data);
936 } 936 }
937 nfs_redirty_request(req); 937 nfs_redirty_request(req);
938 return -ENOMEM; 938 return -ENOMEM;
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index fc1c52571c03..84c27d69d421 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -42,6 +42,11 @@ struct nfsacl_encode_desc {
42 gid_t gid; 42 gid_t gid;
43}; 43};
44 44
45struct nfsacl_simple_acl {
46 struct posix_acl acl;
47 struct posix_acl_entry ace[4];
48};
49
45static int 50static int
46xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) 51xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
47{ 52{
@@ -72,9 +77,20 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
72 return 0; 77 return 0;
73} 78}
74 79
75unsigned int 80/**
76nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, 81 * nfsacl_encode - Encode an NFSv3 ACL
77 struct posix_acl *acl, int encode_entries, int typeflag) 82 *
83 * @buf: destination xdr_buf to contain XDR encoded ACL
84 * @base: byte offset in xdr_buf where XDR'd ACL begins
85 * @inode: inode of file whose ACL this is
86 * @acl: posix_acl to encode
87 * @encode_entries: whether to encode ACEs as well
88 * @typeflag: ACL type: NFS_ACL_DEFAULT or zero
89 *
90 * Returns size of encoded ACL in bytes or a negative errno value.
91 */
92int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
93 struct posix_acl *acl, int encode_entries, int typeflag)
78{ 94{
79 int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; 95 int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0;
80 struct nfsacl_encode_desc nfsacl_desc = { 96 struct nfsacl_encode_desc nfsacl_desc = {
@@ -88,17 +104,22 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
88 .uid = inode->i_uid, 104 .uid = inode->i_uid,
89 .gid = inode->i_gid, 105 .gid = inode->i_gid,
90 }; 106 };
107 struct nfsacl_simple_acl aclbuf;
91 int err; 108 int err;
92 struct posix_acl *acl2 = NULL;
93 109
94 if (entries > NFS_ACL_MAX_ENTRIES || 110 if (entries > NFS_ACL_MAX_ENTRIES ||
95 xdr_encode_word(buf, base, entries)) 111 xdr_encode_word(buf, base, entries))
96 return -EINVAL; 112 return -EINVAL;
97 if (encode_entries && acl && acl->a_count == 3) { 113 if (encode_entries && acl && acl->a_count == 3) {
98 /* Fake up an ACL_MASK entry. */ 114 struct posix_acl *acl2 = &aclbuf.acl;
99 acl2 = posix_acl_alloc(4, GFP_KERNEL); 115
100 if (!acl2) 116 /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is
101 return -ENOMEM; 117 * invoked in contexts where a memory allocation failure is
118 * fatal. Fortunately this fake ACL is small enough to
119 * construct on the stack. */
120 memset(acl2, 0, sizeof(acl2));
121 posix_acl_init(acl2, 4);
122
102 /* Insert entries in canonical order: other orders seem 123 /* Insert entries in canonical order: other orders seem
103 to confuse Solaris VxFS. */ 124 to confuse Solaris VxFS. */
104 acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ 125 acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */
@@ -109,8 +130,6 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
109 nfsacl_desc.acl = acl2; 130 nfsacl_desc.acl = acl2;
110 } 131 }
111 err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); 132 err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc);
112 if (acl2)
113 posix_acl_release(acl2);
114 if (!err) 133 if (!err)
115 err = 8 + nfsacl_desc.desc.elem_size * 134 err = 8 + nfsacl_desc.desc.elem_size *
116 nfsacl_desc.desc.array_len; 135 nfsacl_desc.desc.array_len;
@@ -224,9 +243,18 @@ posix_acl_from_nfsacl(struct posix_acl *acl)
224 return 0; 243 return 0;
225} 244}
226 245
227unsigned int 246/**
228nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, 247 * nfsacl_decode - Decode an NFSv3 ACL
229 struct posix_acl **pacl) 248 *
249 * @buf: xdr_buf containing XDR'd ACL data to decode
250 * @base: byte offset in xdr_buf where XDR'd ACL begins
251 * @aclcnt: count of ACEs in decoded posix_acl
252 * @pacl: buffer in which to place decoded posix_acl
253 *
254 * Returns the length of the decoded ACL in bytes, or a negative errno value.
255 */
256int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
257 struct posix_acl **pacl)
230{ 258{
231 struct nfsacl_decode_desc nfsacl_desc = { 259 struct nfsacl_decode_desc nfsacl_desc = {
232 .desc = { 260 .desc = {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3be975e18919..cde36cb0f348 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -484,7 +484,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
484out: 484out:
485 return status; 485 return status;
486out_default: 486out_default:
487 return nfs_cb_stat_to_errno(status); 487 return nfs_cb_stat_to_errno(nfserr);
488} 488}
489 489
490/* 490/*
@@ -564,11 +564,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
564 if (unlikely(status)) 564 if (unlikely(status))
565 goto out; 565 goto out;
566 if (unlikely(nfserr != NFS4_OK)) 566 if (unlikely(nfserr != NFS4_OK))
567 goto out_default; 567 status = nfs_cb_stat_to_errno(nfserr);
568out: 568out:
569 return status; 569 return status;
570out_default:
571 return nfs_cb_stat_to_errno(status);
572} 570}
573 571
574/* 572/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d98d0213285d..54b60bfceb8d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -230,9 +230,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
230 dp->dl_client = clp; 230 dp->dl_client = clp;
231 get_nfs4_file(fp); 231 get_nfs4_file(fp);
232 dp->dl_file = fp; 232 dp->dl_file = fp;
233 dp->dl_vfs_file = find_readable_file(fp);
234 get_file(dp->dl_vfs_file);
235 dp->dl_flock = NULL;
236 dp->dl_type = type; 233 dp->dl_type = type;
237 dp->dl_stateid.si_boot = boot_time; 234 dp->dl_stateid.si_boot = boot_time;
238 dp->dl_stateid.si_stateownerid = current_delegid++; 235 dp->dl_stateid.si_stateownerid = current_delegid++;
@@ -241,8 +238,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
241 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle); 238 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
242 dp->dl_time = 0; 239 dp->dl_time = 0;
243 atomic_set(&dp->dl_count, 1); 240 atomic_set(&dp->dl_count, 1);
244 list_add(&dp->dl_perfile, &fp->fi_delegations);
245 list_add(&dp->dl_perclnt, &clp->cl_delegations);
246 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); 241 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
247 return dp; 242 return dp;
248} 243}
@@ -253,36 +248,30 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
253 if (atomic_dec_and_test(&dp->dl_count)) { 248 if (atomic_dec_and_test(&dp->dl_count)) {
254 dprintk("NFSD: freeing dp %p\n",dp); 249 dprintk("NFSD: freeing dp %p\n",dp);
255 put_nfs4_file(dp->dl_file); 250 put_nfs4_file(dp->dl_file);
256 fput(dp->dl_vfs_file);
257 kmem_cache_free(deleg_slab, dp); 251 kmem_cache_free(deleg_slab, dp);
258 num_delegations--; 252 num_delegations--;
259 } 253 }
260} 254}
261 255
262/* Remove the associated file_lock first, then remove the delegation. 256static void nfs4_put_deleg_lease(struct nfs4_file *fp)
263 * lease_modify() is called to remove the FS_LEASE file_lock from
264 * the i_flock list, eventually calling nfsd's lock_manager
265 * fl_release_callback.
266 */
267static void
268nfs4_close_delegation(struct nfs4_delegation *dp)
269{ 257{
270 dprintk("NFSD: close_delegation dp %p\n",dp); 258 if (atomic_dec_and_test(&fp->fi_delegees)) {
271 /* XXX: do we even need this check?: */ 259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
272 if (dp->dl_flock) 260 fp->fi_lease = NULL;
273 vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock); 261 fp->fi_deleg_file = NULL;
262 }
274} 263}
275 264
276/* Called under the state lock. */ 265/* Called under the state lock. */
277static void 266static void
278unhash_delegation(struct nfs4_delegation *dp) 267unhash_delegation(struct nfs4_delegation *dp)
279{ 268{
280 list_del_init(&dp->dl_perfile);
281 list_del_init(&dp->dl_perclnt); 269 list_del_init(&dp->dl_perclnt);
282 spin_lock(&recall_lock); 270 spin_lock(&recall_lock);
271 list_del_init(&dp->dl_perfile);
283 list_del_init(&dp->dl_recall_lru); 272 list_del_init(&dp->dl_recall_lru);
284 spin_unlock(&recall_lock); 273 spin_unlock(&recall_lock);
285 nfs4_close_delegation(dp); 274 nfs4_put_deleg_lease(dp->dl_file);
286 nfs4_put_delegation(dp); 275 nfs4_put_delegation(dp);
287} 276}
288 277
@@ -958,8 +947,6 @@ expire_client(struct nfs4_client *clp)
958 spin_lock(&recall_lock); 947 spin_lock(&recall_lock);
959 while (!list_empty(&clp->cl_delegations)) { 948 while (!list_empty(&clp->cl_delegations)) {
960 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); 949 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
961 dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
962 dp->dl_flock);
963 list_del_init(&dp->dl_perclnt); 950 list_del_init(&dp->dl_perclnt);
964 list_move(&dp->dl_recall_lru, &reaplist); 951 list_move(&dp->dl_recall_lru, &reaplist);
965 } 952 }
@@ -2078,6 +2065,7 @@ alloc_init_file(struct inode *ino)
2078 fp->fi_inode = igrab(ino); 2065 fp->fi_inode = igrab(ino);
2079 fp->fi_id = current_fileid++; 2066 fp->fi_id = current_fileid++;
2080 fp->fi_had_conflict = false; 2067 fp->fi_had_conflict = false;
2068 fp->fi_lease = NULL;
2081 memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); 2069 memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
2082 memset(fp->fi_access, 0, sizeof(fp->fi_access)); 2070 memset(fp->fi_access, 0, sizeof(fp->fi_access));
2083 spin_lock(&recall_lock); 2071 spin_lock(&recall_lock);
@@ -2329,23 +2317,8 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
2329 nfs4_file_put_access(fp, O_RDONLY); 2317 nfs4_file_put_access(fp, O_RDONLY);
2330} 2318}
2331 2319
2332/* 2320static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
2333 * Spawn a thread to perform a recall on the delegation represented
2334 * by the lease (file_lock)
2335 *
2336 * Called from break_lease() with lock_flocks() held.
2337 * Note: we assume break_lease will only call this *once* for any given
2338 * lease.
2339 */
2340static
2341void nfsd_break_deleg_cb(struct file_lock *fl)
2342{ 2321{
2343 struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
2344
2345 dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
2346 if (!dp)
2347 return;
2348
2349 /* We're assuming the state code never drops its reference 2322 /* We're assuming the state code never drops its reference
2350 * without first removing the lease. Since we're in this lease 2323 * without first removing the lease. Since we're in this lease
2351 * callback (and since the lease code is serialized by the kernel 2324 * callback (and since the lease code is serialized by the kernel
@@ -2353,22 +2326,35 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
2353 * it's safe to take a reference: */ 2326 * it's safe to take a reference: */
2354 atomic_inc(&dp->dl_count); 2327 atomic_inc(&dp->dl_count);
2355 2328
2356 spin_lock(&recall_lock);
2357 list_add_tail(&dp->dl_recall_lru, &del_recall_lru); 2329 list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
2358 spin_unlock(&recall_lock);
2359 2330
2360 /* only place dl_time is set. protected by lock_flocks*/ 2331 /* only place dl_time is set. protected by lock_flocks*/
2361 dp->dl_time = get_seconds(); 2332 dp->dl_time = get_seconds();
2362 2333
2334 nfsd4_cb_recall(dp);
2335}
2336
2337/* Called from break_lease() with lock_flocks() held. */
2338static void nfsd_break_deleg_cb(struct file_lock *fl)
2339{
2340 struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
2341 struct nfs4_delegation *dp;
2342
2343 BUG_ON(!fp);
2344 /* We assume break_lease is only called once per lease: */
2345 BUG_ON(fp->fi_had_conflict);
2363 /* 2346 /*
2364 * We don't want the locks code to timeout the lease for us; 2347 * We don't want the locks code to timeout the lease for us;
2365 * we'll remove it ourself if the delegation isn't returned 2348 * we'll remove it ourself if a delegation isn't returned
2366 * in time. 2349 * in time:
2367 */ 2350 */
2368 fl->fl_break_time = 0; 2351 fl->fl_break_time = 0;
2369 2352
2370 dp->dl_file->fi_had_conflict = true; 2353 spin_lock(&recall_lock);
2371 nfsd4_cb_recall(dp); 2354 fp->fi_had_conflict = true;
2355 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
2356 nfsd_break_one_deleg(dp);
2357 spin_unlock(&recall_lock);
2372} 2358}
2373 2359
2374static 2360static
@@ -2459,13 +2445,15 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
2459static struct nfs4_delegation * 2445static struct nfs4_delegation *
2460find_delegation_file(struct nfs4_file *fp, stateid_t *stid) 2446find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2461{ 2447{
2462 struct nfs4_delegation *dp; 2448 struct nfs4_delegation *dp = NULL;
2463 2449
2450 spin_lock(&recall_lock);
2464 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { 2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
2465 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) 2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
2466 return dp; 2453 break;
2467 } 2454 }
2468 return NULL; 2455 spin_unlock(&recall_lock);
2456 return dp;
2469} 2457}
2470 2458
2471int share_access_to_flags(u32 share_access) 2459int share_access_to_flags(u32 share_access)
@@ -2641,6 +2629,66 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
2641 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; 2629 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
2642} 2630}
2643 2631
2632static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag)
2633{
2634 struct file_lock *fl;
2635
2636 fl = locks_alloc_lock();
2637 if (!fl)
2638 return NULL;
2639 locks_init_lock(fl);
2640 fl->fl_lmops = &nfsd_lease_mng_ops;
2641 fl->fl_flags = FL_LEASE;
2642 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2643 fl->fl_end = OFFSET_MAX;
2644 fl->fl_owner = (fl_owner_t)(dp->dl_file);
2645 fl->fl_pid = current->tgid;
2646 return fl;
2647}
2648
2649static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
2650{
2651 struct nfs4_file *fp = dp->dl_file;
2652 struct file_lock *fl;
2653 int status;
2654
2655 fl = nfs4_alloc_init_lease(dp, flag);
2656 if (!fl)
2657 return -ENOMEM;
2658 fl->fl_file = find_readable_file(fp);
2659 list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
2660 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
2661 if (status) {
2662 list_del_init(&dp->dl_perclnt);
2663 locks_free_lock(fl);
2664 return -ENOMEM;
2665 }
2666 fp->fi_lease = fl;
2667 fp->fi_deleg_file = fl->fl_file;
2668 get_file(fp->fi_deleg_file);
2669 atomic_set(&fp->fi_delegees, 1);
2670 list_add(&dp->dl_perfile, &fp->fi_delegations);
2671 return 0;
2672}
2673
2674static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
2675{
2676 struct nfs4_file *fp = dp->dl_file;
2677
2678 if (!fp->fi_lease)
2679 return nfs4_setlease(dp, flag);
2680 spin_lock(&recall_lock);
2681 if (fp->fi_had_conflict) {
2682 spin_unlock(&recall_lock);
2683 return -EAGAIN;
2684 }
2685 atomic_inc(&fp->fi_delegees);
2686 list_add(&dp->dl_perfile, &fp->fi_delegations);
2687 spin_unlock(&recall_lock);
2688 list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
2689 return 0;
2690}
2691
2644/* 2692/*
2645 * Attempt to hand out a delegation. 2693 * Attempt to hand out a delegation.
2646 */ 2694 */
@@ -2650,7 +2698,6 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2650 struct nfs4_delegation *dp; 2698 struct nfs4_delegation *dp;
2651 struct nfs4_stateowner *sop = stp->st_stateowner; 2699 struct nfs4_stateowner *sop = stp->st_stateowner;
2652 int cb_up; 2700 int cb_up;
2653 struct file_lock *fl;
2654 int status, flag = 0; 2701 int status, flag = 0;
2655 2702
2656 cb_up = nfsd4_cb_channel_good(sop->so_client); 2703 cb_up = nfsd4_cb_channel_good(sop->so_client);
@@ -2681,36 +2728,11 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2681 } 2728 }
2682 2729
2683 dp = alloc_init_deleg(sop->so_client, stp, fh, flag); 2730 dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
2684 if (dp == NULL) { 2731 if (dp == NULL)
2685 flag = NFS4_OPEN_DELEGATE_NONE; 2732 goto out_no_deleg;
2686 goto out; 2733 status = nfs4_set_delegation(dp, flag);
2687 } 2734 if (status)
2688 status = -ENOMEM; 2735 goto out_free;
2689 fl = locks_alloc_lock();
2690 if (!fl)
2691 goto out;
2692 locks_init_lock(fl);
2693 fl->fl_lmops = &nfsd_lease_mng_ops;
2694 fl->fl_flags = FL_LEASE;
2695 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2696 fl->fl_end = OFFSET_MAX;
2697 fl->fl_owner = (fl_owner_t)dp;
2698 fl->fl_file = find_readable_file(stp->st_file);
2699 BUG_ON(!fl->fl_file);
2700 fl->fl_pid = current->tgid;
2701 dp->dl_flock = fl;
2702
2703 /* vfs_setlease checks to see if delegation should be handed out.
2704 * the lock_manager callback fl_change is used
2705 */
2706 if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
2707 dprintk("NFSD: setlease failed [%d], no delegation\n", status);
2708 dp->dl_flock = NULL;
2709 locks_free_lock(fl);
2710 unhash_delegation(dp);
2711 flag = NFS4_OPEN_DELEGATE_NONE;
2712 goto out;
2713 }
2714 2736
2715 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); 2737 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
2716 2738
@@ -2722,6 +2744,12 @@ out:
2722 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) 2744 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
2723 dprintk("NFSD: WARNING: refusing delegation reclaim\n"); 2745 dprintk("NFSD: WARNING: refusing delegation reclaim\n");
2724 open->op_delegate_type = flag; 2746 open->op_delegate_type = flag;
2747 return;
2748out_free:
2749 nfs4_put_delegation(dp);
2750out_no_deleg:
2751 flag = NFS4_OPEN_DELEGATE_NONE;
2752 goto out;
2725} 2753}
2726 2754
2727/* 2755/*
@@ -2916,8 +2944,6 @@ nfs4_laundromat(void)
2916 test_val = u; 2944 test_val = u;
2917 break; 2945 break;
2918 } 2946 }
2919 dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
2920 dp, dp->dl_flock);
2921 list_move(&dp->dl_recall_lru, &reaplist); 2947 list_move(&dp->dl_recall_lru, &reaplist);
2922 } 2948 }
2923 spin_unlock(&recall_lock); 2949 spin_unlock(&recall_lock);
@@ -3128,7 +3154,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
3128 goto out; 3154 goto out;
3129 renew_client(dp->dl_client); 3155 renew_client(dp->dl_client);
3130 if (filpp) { 3156 if (filpp) {
3131 *filpp = find_readable_file(dp->dl_file); 3157 *filpp = dp->dl_file->fi_deleg_file;
3132 BUG_ON(!*filpp); 3158 BUG_ON(!*filpp);
3133 } 3159 }
3134 } else { /* open or lock stateid */ 3160 } else { /* open or lock stateid */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 956629b9cdc9..1275b8655070 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -317,8 +317,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
317 READ_BUF(dummy32); 317 READ_BUF(dummy32);
318 len += (XDR_QUADLEN(dummy32) << 2); 318 len += (XDR_QUADLEN(dummy32) << 2);
319 READMEM(buf, dummy32); 319 READMEM(buf, dummy32);
320 if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) 320 if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
321 goto out_nfserr; 321 return status;
322 iattr->ia_valid |= ATTR_UID; 322 iattr->ia_valid |= ATTR_UID;
323 } 323 }
324 if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { 324 if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
@@ -328,8 +328,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
328 READ_BUF(dummy32); 328 READ_BUF(dummy32);
329 len += (XDR_QUADLEN(dummy32) << 2); 329 len += (XDR_QUADLEN(dummy32) << 2);
330 READMEM(buf, dummy32); 330 READMEM(buf, dummy32);
331 if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) 331 if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
332 goto out_nfserr; 332 return status;
333 iattr->ia_valid |= ATTR_GID; 333 iattr->ia_valid |= ATTR_GID;
334 } 334 }
335 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { 335 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 3074656ba7bf..2d31224b07bf 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -83,8 +83,6 @@ struct nfs4_delegation {
83 atomic_t dl_count; /* ref count */ 83 atomic_t dl_count; /* ref count */
84 struct nfs4_client *dl_client; 84 struct nfs4_client *dl_client;
85 struct nfs4_file *dl_file; 85 struct nfs4_file *dl_file;
86 struct file *dl_vfs_file;
87 struct file_lock *dl_flock;
88 u32 dl_type; 86 u32 dl_type;
89 time_t dl_time; 87 time_t dl_time;
90/* For recall: */ 88/* For recall: */
@@ -379,6 +377,9 @@ struct nfs4_file {
379 */ 377 */
380 atomic_t fi_readers; 378 atomic_t fi_readers;
381 atomic_t fi_writers; 379 atomic_t fi_writers;
380 struct file *fi_deleg_file;
381 struct file_lock *fi_lease;
382 atomic_t fi_delegees;
382 struct inode *fi_inode; 383 struct inode *fi_inode;
383 u32 fi_id; /* used with stateowner->so_id 384 u32 fi_id; /* used with stateowner->so_id
384 * for stateid_hashtbl hash */ 385 * for stateid_hashtbl hash */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 641117f2188d..da1d9701f8e4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -808,7 +808,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
808 if (ra->p_count == 0) 808 if (ra->p_count == 0)
809 frap = rap; 809 frap = rap;
810 } 810 }
811 depth = nfsdstats.ra_size*11/10; 811 depth = nfsdstats.ra_size;
812 if (!frap) { 812 if (!frap) {
813 spin_unlock(&rab->pb_lock); 813 spin_unlock(&rab->pb_lock);
814 return NULL; 814 return NULL;
@@ -1744,6 +1744,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1744 host_err = nfsd_break_lease(odentry->d_inode); 1744 host_err = nfsd_break_lease(odentry->d_inode);
1745 if (host_err) 1745 if (host_err)
1746 goto out_drop_write; 1746 goto out_drop_write;
1747 if (ndentry->d_inode) {
1748 host_err = nfsd_break_lease(ndentry->d_inode);
1749 if (host_err)
1750 goto out_drop_write;
1751 }
1752 if (host_err)
1753 goto out_drop_write;
1747 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1754 host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1748 if (!host_err) { 1755 if (!host_err) {
1749 host_err = commit_metadata(tfhp); 1756 host_err = commit_metadata(tfhp);
@@ -1812,22 +1819,22 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1812 1819
1813 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1820 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1814 if (host_err) 1821 if (host_err)
1815 goto out_nfserr; 1822 goto out_put;
1816 1823
1817 host_err = nfsd_break_lease(rdentry->d_inode); 1824 host_err = nfsd_break_lease(rdentry->d_inode);
1818 if (host_err) 1825 if (host_err)
1819 goto out_put; 1826 goto out_drop_write;
1820 if (type != S_IFDIR) 1827 if (type != S_IFDIR)
1821 host_err = vfs_unlink(dirp, rdentry); 1828 host_err = vfs_unlink(dirp, rdentry);
1822 else 1829 else
1823 host_err = vfs_rmdir(dirp, rdentry); 1830 host_err = vfs_rmdir(dirp, rdentry);
1824out_put:
1825 dput(rdentry);
1826
1827 if (!host_err) 1831 if (!host_err)
1828 host_err = commit_metadata(fhp); 1832 host_err = commit_metadata(fhp);
1829 1833out_drop_write:
1830 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1834 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1835out_put:
1836 dput(rdentry);
1837
1831out_nfserr: 1838out_nfserr:
1832 err = nfserrno(host_err); 1839 err = nfserrno(host_err);
1833out: 1840out:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 388e9e8f5286..85f7baa15f5d 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,11 +35,6 @@
35#include "btnode.h" 35#include "btnode.h"
36 36
37 37
38void nilfs_btnode_cache_init_once(struct address_space *btnc)
39{
40 nilfs_mapping_init_once(btnc);
41}
42
43static const struct address_space_operations def_btnode_aops = { 38static const struct address_space_operations def_btnode_aops = {
44 .sync_page = block_sync_page, 39 .sync_page = block_sync_page,
45}; 40};
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e0..1b8ebd888c28 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init_once(struct address_space *);
41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); 40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
42void nilfs_btnode_cache_clear(struct address_space *); 41void nilfs_btnode_cache_clear(struct address_space *);
43struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 6a0e2a189f60..a0babd2bff6a 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
454 struct backing_dev_info *bdi = inode->i_sb->s_bdi; 454 struct backing_dev_info *bdi = inode->i_sb->s_bdi;
455 455
456 INIT_LIST_HEAD(&shadow->frozen_buffers); 456 INIT_LIST_HEAD(&shadow->frozen_buffers);
457 nilfs_mapping_init_once(&shadow->frozen_data); 457 address_space_init_once(&shadow->frozen_data);
458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); 458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
459 nilfs_mapping_init_once(&shadow->frozen_btnodes); 459 address_space_init_once(&shadow->frozen_btnodes);
460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); 460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
461 mi->mi_shadow = shadow; 461 mi->mi_shadow = shadow;
462 return 0; 462 return 0;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 98034271cd02..161791d26458 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); 397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
398 if (!new_de) 398 if (!new_de)
399 goto out_dir; 399 goto out_dir;
400 inc_nlink(old_inode);
401 nilfs_set_link(new_dir, new_de, new_page, old_inode); 400 nilfs_set_link(new_dir, new_de, new_page, old_inode);
402 nilfs_mark_inode_dirty(new_dir); 401 nilfs_mark_inode_dirty(new_dir);
403 new_inode->i_ctime = CURRENT_TIME; 402 new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
411 if (new_dir->i_nlink >= NILFS_LINK_MAX) 410 if (new_dir->i_nlink >= NILFS_LINK_MAX)
412 goto out_dir; 411 goto out_dir;
413 } 412 }
414 inc_nlink(old_inode);
415 err = nilfs_add_link(new_dentry, old_inode); 413 err = nilfs_add_link(new_dentry, old_inode);
416 if (err) { 414 if (err)
417 drop_nlink(old_inode);
418 nilfs_mark_inode_dirty(old_inode);
419 goto out_dir; 415 goto out_dir;
420 }
421 if (dir_de) { 416 if (dir_de) {
422 inc_nlink(new_dir); 417 inc_nlink(new_dir);
423 nilfs_mark_inode_dirty(new_dir); 418 nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
431 old_inode->i_ctime = CURRENT_TIME; 426 old_inode->i_ctime = CURRENT_TIME;
432 427
433 nilfs_delete_entry(old_de, old_page); 428 nilfs_delete_entry(old_de, old_page);
434 drop_nlink(old_inode);
435 429
436 if (dir_de) { 430 if (dir_de) {
437 nilfs_set_link(old_inode, dir_de, dir_page, new_dir); 431 nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0c432416cfef..a585b35fd6bc 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 492 return nc;
493} 493}
494 494
495void nilfs_mapping_init_once(struct address_space *mapping)
496{
497 memset(mapping, 0, sizeof(*mapping));
498 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
499 spin_lock_init(&mapping->tree_lock);
500 INIT_LIST_HEAD(&mapping->private_list);
501 spin_lock_init(&mapping->private_lock);
502
503 spin_lock_init(&mapping->i_mmap_lock);
504 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
505 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
506}
507
508void nilfs_mapping_init(struct address_space *mapping, 495void nilfs_mapping_init(struct address_space *mapping,
509 struct backing_dev_info *bdi, 496 struct backing_dev_info *bdi,
510 const struct address_space_operations *aops) 497 const struct address_space_operations *aops)
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 622df27cd891..2a00953ebd5f 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 62void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 63void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init_once(struct address_space *mapping);
65void nilfs_mapping_init(struct address_space *mapping, 64void nilfs_mapping_init(struct address_space *mapping,
66 struct backing_dev_info *bdi, 65 struct backing_dev_info *bdi,
67 const struct address_space_operations *aops); 66 const struct address_space_operations *aops);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5c7f39..2de9f636792a 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,7 +430,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
430 nilfs_segctor_map_segsum_entry( 430 nilfs_segctor_map_segsum_entry(
431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); 431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
432 432
433 if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 433 if (NILFS_I(inode)->i_root &&
434 !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
434 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 435 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
435 /* skip finfo */ 436 /* skip finfo */
436} 437}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0994f6a76c07..1673b3d99842 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -704,7 +704,8 @@ skip_mount_setup:
704 sbp[0]->s_state = 704 sbp[0]->s_state =
705 cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); 705 cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS);
706 /* synchronize sbp[1] with sbp[0] */ 706 /* synchronize sbp[1] with sbp[0] */
707 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); 707 if (sbp[1])
708 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
708 return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); 709 return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL);
709} 710}
710 711
@@ -1278,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
1278#ifdef CONFIG_NILFS_XATTR 1279#ifdef CONFIG_NILFS_XATTR
1279 init_rwsem(&ii->xattr_sem); 1280 init_rwsem(&ii->xattr_sem);
1280#endif 1281#endif
1281 nilfs_btnode_cache_init_once(&ii->i_btnode_cache); 1282 address_space_init_once(&ii->i_btnode_cache);
1282 ii->i_bmap = &ii->i_bmap_data; 1283 ii->i_bmap = &ii->i_bmap_data;
1283 inode_init_once(&ii->vfs_inode); 1284 inode_init_once(&ii->vfs_inode);
1284} 1285}
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index b572b6727181..326e7475a22a 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. 2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2006 Anton Altaparmakov 4 * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -2576,6 +2576,8 @@ mft_rec_already_initialized:
2576 flush_dcache_page(page); 2576 flush_dcache_page(page);
2577 SetPageUptodate(page); 2577 SetPageUptodate(page);
2578 if (base_ni) { 2578 if (base_ni) {
2579 MFT_RECORD *m_tmp;
2580
2579 /* 2581 /*
2580 * Setup the base mft record in the extent mft record. This 2582 * Setup the base mft record in the extent mft record. This
2581 * completes initialization of the allocated extent mft record 2583 * completes initialization of the allocated extent mft record
@@ -2588,11 +2590,11 @@ mft_rec_already_initialized:
2588 * attach it to the base inode @base_ni and map, pin, and lock 2590 * attach it to the base inode @base_ni and map, pin, and lock
2589 * its, i.e. the allocated, mft record. 2591 * its, i.e. the allocated, mft record.
2590 */ 2592 */
2591 m = map_extent_mft_record(base_ni, bit, &ni); 2593 m_tmp = map_extent_mft_record(base_ni, bit, &ni);
2592 if (IS_ERR(m)) { 2594 if (IS_ERR(m_tmp)) {
2593 ntfs_error(vol->sb, "Failed to map allocated extent " 2595 ntfs_error(vol->sb, "Failed to map allocated extent "
2594 "mft record 0x%llx.", (long long)bit); 2596 "mft record 0x%llx.", (long long)bit);
2595 err = PTR_ERR(m); 2597 err = PTR_ERR(m_tmp);
2596 /* Set the mft record itself not in use. */ 2598 /* Set the mft record itself not in use. */
2597 m->flags &= cpu_to_le16( 2599 m->flags &= cpu_to_le16(
2598 ~le16_to_cpu(MFT_RECORD_IN_USE)); 2600 ~le16_to_cpu(MFT_RECORD_IN_USE));
@@ -2603,6 +2605,7 @@ mft_rec_already_initialized:
2603 ntfs_unmap_page(page); 2605 ntfs_unmap_page(page);
2604 goto undo_mftbmp_alloc; 2606 goto undo_mftbmp_alloc;
2605 } 2607 }
2608 BUG_ON(m != m_tmp);
2606 /* 2609 /*
2607 * Make sure the allocated mft record is written out to disk. 2610 * Make sure the allocated mft record is written out to disk.
2608 * No need to set the inode dirty because the caller is going 2611 * No need to set the inode dirty because the caller is going
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c0..6180da1e37e6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
405 ocfs2_quota_trans_credits(sb); 405 ocfs2_quota_trans_credits(sb);
406} 406}
407 407
408/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + 408/* data block for new dir/symlink, allocation of directory block, dx_root
409 * bitmap block for the new bit) dx_root update for free list */ 409 * update for free list */
410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1) 410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
411 411
412static inline int ocfs2_add_dir_index_credits(struct super_block *sb) 412static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
413{ 413{
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e9..19ebc5aad391 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3228 u32 num_clusters, unsigned int e_flags) 3228 u32 num_clusters, unsigned int e_flags)
3229{ 3229{
3230 int ret, delete, index, credits = 0; 3230 int ret, delete, index, credits = 0;
3231 u32 new_bit, new_len; 3231 u32 new_bit, new_len, orig_num_clusters;
3232 unsigned int set_len; 3232 unsigned int set_len;
3233 struct ocfs2_super *osb = OCFS2_SB(sb); 3233 struct ocfs2_super *osb = OCFS2_SB(sb);
3234 handle_t *handle; 3234 handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3261 goto out; 3261 goto out;
3262 } 3262 }
3263 3263
3264 orig_num_clusters = num_clusters;
3265
3264 while (num_clusters) { 3266 while (num_clusters) {
3265 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh, 3267 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
3266 p_cluster, num_clusters, 3268 p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3348 * in write-back mode. 3350 * in write-back mode.
3349 */ 3351 */
3350 if (context->get_clusters == ocfs2_di_get_clusters) { 3352 if (context->get_clusters == ocfs2_di_get_clusters) {
3351 ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); 3353 ret = ocfs2_cow_sync_writeback(sb, context, cpos,
3354 orig_num_clusters);
3352 if (ret) 3355 if (ret)
3353 mlog_errno(ret); 3356 mlog_errno(ret);
3354 } 3357 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447e..36c423fb0635 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1316 struct mount_options *mopt, 1316 struct mount_options *mopt,
1317 int is_remount) 1317 int is_remount)
1318{ 1318{
1319 int status; 1319 int status, user_stack = 0;
1320 char *p; 1320 char *p;
1321 u32 tmp; 1321 u32 tmp;
1322 1322
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
1459 memcpy(mopt->cluster_stack, args[0].from, 1459 memcpy(mopt->cluster_stack, args[0].from,
1460 OCFS2_STACK_LABEL_LEN); 1460 OCFS2_STACK_LABEL_LEN);
1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; 1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
1462 /*
1463 * Open code the memcmp here as we don't have
1464 * an osb to pass to
1465 * ocfs2_userspace_stack().
1466 */
1467 if (memcmp(mopt->cluster_stack,
1468 OCFS2_CLASSIC_CLUSTER_STACK,
1469 OCFS2_STACK_LABEL_LEN))
1470 user_stack = 1;
1462 break; 1471 break;
1463 case Opt_inode64: 1472 case Opt_inode64:
1464 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1473 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
1514 } 1523 }
1515 } 1524 }
1516 1525
1517 /* Ensure only one heartbeat mode */ 1526 if (user_stack == 0) {
1518 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | 1527 /* Ensure only one heartbeat mode */
1519 OCFS2_MOUNT_HB_NONE); 1528 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
1520 if (hweight32(tmp) != 1) { 1529 OCFS2_MOUNT_HB_GLOBAL |
1521 mlog(ML_ERROR, "Invalid heartbeat mount options\n"); 1530 OCFS2_MOUNT_HB_NONE);
1522 status = 0; 1531 if (hweight32(tmp) != 1) {
1523 goto bail; 1532 mlog(ML_ERROR, "Invalid heartbeat mount options\n");
1533 status = 0;
1534 goto bail;
1535 }
1524 } 1536 }
1525 1537
1526 status = 1; 1538 status = 1;
diff --git a/fs/open.c b/fs/open.c
index e52389e1f05b..5a2c6ebc22b5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -790,6 +790,8 @@ struct file *nameidata_to_filp(struct nameidata *nd)
790 790
791 /* Pick up the filp from the open intent */ 791 /* Pick up the filp from the open intent */
792 filp = nd->intent.open.file; 792 filp = nd->intent.open.file;
793 nd->intent.open.file = NULL;
794
793 /* Has the filesystem initialised the file for us? */ 795 /* Has the filesystem initialised the file for us? */
794 if (filp->f_path.dentry == NULL) { 796 if (filp->f_path.dentry == NULL) {
795 path_get(&nd->path); 797 path_get(&nd->path);
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa5..b10e3540d5b7 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
251 } 251 }
252 252
253 vm->vblk_size = get_unaligned_be32(data + 0x08); 253 vm->vblk_size = get_unaligned_be32(data + 0x08);
254 if (vm->vblk_size == 0) {
255 ldm_error ("Illegal VBLK size");
256 return false;
257 }
258
254 vm->vblk_offset = get_unaligned_be32(data + 0x0C); 259 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
255 vm->last_vblk_seq = get_unaligned_be32(data + 0x04); 260 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
256 261
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index 68d6a216ee79..11f688bd76c5 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -29,10 +29,9 @@ static inline void mac_fix_string(char *stg, int len)
29 29
30int mac_partition(struct parsed_partitions *state) 30int mac_partition(struct parsed_partitions *state)
31{ 31{
32 int slot = 1;
33 Sector sect; 32 Sector sect;
34 unsigned char *data; 33 unsigned char *data;
35 int blk, blocks_in_map; 34 int slot, blocks_in_map;
36 unsigned secsize; 35 unsigned secsize;
37#ifdef CONFIG_PPC_PMAC 36#ifdef CONFIG_PPC_PMAC
38 int found_root = 0; 37 int found_root = 0;
@@ -59,10 +58,14 @@ int mac_partition(struct parsed_partitions *state)
59 put_dev_sector(sect); 58 put_dev_sector(sect);
60 return 0; /* not a MacOS disk */ 59 return 0; /* not a MacOS disk */
61 } 60 }
62 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
63 blocks_in_map = be32_to_cpu(part->map_count); 61 blocks_in_map = be32_to_cpu(part->map_count);
64 for (blk = 1; blk <= blocks_in_map; ++blk) { 62 if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) {
65 int pos = blk * secsize; 63 put_dev_sector(sect);
64 return 0;
65 }
66 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
67 for (slot = 1; slot <= blocks_in_map; ++slot) {
68 int pos = slot * secsize;
66 put_dev_sector(sect); 69 put_dev_sector(sect);
67 data = read_part_sector(state, pos/512, &sect); 70 data = read_part_sector(state, pos/512, &sect);
68 if (!data) 71 if (!data)
@@ -113,13 +116,11 @@ int mac_partition(struct parsed_partitions *state)
113 } 116 }
114 117
115 if (goodness > found_root_goodness) { 118 if (goodness > found_root_goodness) {
116 found_root = blk; 119 found_root = slot;
117 found_root_goodness = goodness; 120 found_root_goodness = goodness;
118 } 121 }
119 } 122 }
120#endif /* CONFIG_PPC_PMAC */ 123#endif /* CONFIG_PPC_PMAC */
121
122 ++slot;
123 } 124 }
124#ifdef CONFIG_PPC_PMAC 125#ifdef CONFIG_PPC_PMAC
125 if (found_root_goodness) 126 if (found_root_goodness)
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 39df95a0ec25..b1cf6bf4b41d 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -22,6 +22,7 @@
22 22
23#include <linux/errno.h> 23#include <linux/errno.h>
24 24
25EXPORT_SYMBOL(posix_acl_init);
25EXPORT_SYMBOL(posix_acl_alloc); 26EXPORT_SYMBOL(posix_acl_alloc);
26EXPORT_SYMBOL(posix_acl_clone); 27EXPORT_SYMBOL(posix_acl_clone);
27EXPORT_SYMBOL(posix_acl_valid); 28EXPORT_SYMBOL(posix_acl_valid);
@@ -32,6 +33,16 @@ EXPORT_SYMBOL(posix_acl_chmod_masq);
32EXPORT_SYMBOL(posix_acl_permission); 33EXPORT_SYMBOL(posix_acl_permission);
33 34
34/* 35/*
36 * Init a fresh posix_acl
37 */
38void
39posix_acl_init(struct posix_acl *acl, int count)
40{
41 atomic_set(&acl->a_refcount, 1);
42 acl->a_count = count;
43}
44
45/*
35 * Allocate a new ACL with the specified number of entries. 46 * Allocate a new ACL with the specified number of entries.
36 */ 47 */
37struct posix_acl * 48struct posix_acl *
@@ -40,10 +51,8 @@ posix_acl_alloc(int count, gfp_t flags)
40 const size_t size = sizeof(struct posix_acl) + 51 const size_t size = sizeof(struct posix_acl) +
41 count * sizeof(struct posix_acl_entry); 52 count * sizeof(struct posix_acl_entry);
42 struct posix_acl *acl = kmalloc(size, flags); 53 struct posix_acl *acl = kmalloc(size, flags);
43 if (acl) { 54 if (acl)
44 atomic_set(&acl->a_refcount, 1); 55 posix_acl_init(acl, count);
45 acl->a_count = count;
46 }
47 return acl; 56 return acl;
48} 57}
49 58
diff --git a/fs/proc/array.c b/fs/proc/array.c
index df2b703b9d0f..7c99c1cf7e5c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -353,9 +353,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
353 task_cap(m, task); 353 task_cap(m, task);
354 task_cpus_allowed(m, task); 354 task_cpus_allowed(m, task);
355 cpuset_task_status_allowed(m, task); 355 cpuset_task_status_allowed(m, task);
356#if defined(CONFIG_S390)
357 task_show_regs(m, task);
358#endif
359 task_context_switch_counts(m, task); 356 task_context_switch_counts(m, task);
360 return 0; 357 return 0;
361} 358}
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index eafc22ab1fdd..b701eaa482bf 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -67,7 +67,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
67 struct console *con; 67 struct console *con;
68 loff_t off = 0; 68 loff_t off = 0;
69 69
70 acquire_console_sem(); 70 console_lock();
71 for_each_console(con) 71 for_each_console(con)
72 if (off++ == *pos) 72 if (off++ == *pos)
73 break; 73 break;
@@ -84,7 +84,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
84 84
85static void c_stop(struct seq_file *m, void *v) 85static void c_stop(struct seq_file *m, void *v)
86{ 86{
87 release_console_sem(); 87 console_unlock();
88} 88}
89 89
90static const struct seq_operations consoles_op = { 90static const struct seq_operations consoles_op = {
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7ff..927cbd115e53 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
233 return; 233 return;
234 root = of_find_node_by_path("/"); 234 root = of_find_node_by_path("/");
235 if (root == NULL) { 235 if (root == NULL) {
236 printk(KERN_ERR "/proc/device-tree: can't find root\n"); 236 pr_debug("/proc/device-tree: can't find root\n");
237 return; 237 return;
238 } 238 }
239 proc_device_tree_add_node(root, proc_device_tree); 239 proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51ec3458..68fdf45cc6c9 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, 771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
772 dentry, inode, &security); 772 dentry, inode, &security);
773 if (retval) { 773 if (retval) {
774 dir->i_nlink--; 774 DEC_DIR_INODE_NLINK(dir)
775 goto out_failed; 775 goto out_failed;
776 } 776 }
777 777
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2fb2882f0fa7..8ab48bc2fa7d 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -63,6 +63,14 @@ static struct buffer_head *get_block_length(struct super_block *sb,
63 *length = (unsigned char) bh->b_data[*offset] | 63 *length = (unsigned char) bh->b_data[*offset] |
64 (unsigned char) bh->b_data[*offset + 1] << 8; 64 (unsigned char) bh->b_data[*offset + 1] << 8;
65 *offset += 2; 65 *offset += 2;
66
67 if (*offset == msblk->devblksize) {
68 put_bh(bh);
69 bh = sb_bread(sb, ++(*cur_index));
70 if (bh == NULL)
71 return NULL;
72 *offset = 0;
73 }
66 } 74 }
67 75
68 return bh; 76 return bh;
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 856756ca5ee4..c4eb40018256 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -95,12 +95,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
95 if (!buffer_uptodate(bh[k])) 95 if (!buffer_uptodate(bh[k]))
96 goto release_mutex; 96 goto release_mutex;
97 97
98 if (avail == 0) {
99 offset = 0;
100 put_bh(bh[k++]);
101 continue;
102 }
103
104 stream->buf.in = bh[k]->b_data + offset; 98 stream->buf.in = bh[k]->b_data + offset;
105 stream->buf.in_size = avail; 99 stream->buf.in_size = avail;
106 stream->buf.in_pos = 0; 100 stream->buf.in_pos = 0;
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 818a5e063faf..4661ae2b1cec 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -82,12 +82,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
82 if (!buffer_uptodate(bh[k])) 82 if (!buffer_uptodate(bh[k]))
83 goto release_mutex; 83 goto release_mutex;
84 84
85 if (avail == 0) {
86 offset = 0;
87 put_bh(bh[k++]);
88 continue;
89 }
90
91 stream->next_in = bh[k]->b_data + offset; 85 stream->next_in = bh[k]->b_data + offset;
92 stream->avail_in = avail; 86 stream->avail_in = avail;
93 offset = 0; 87 offset = 0;
diff --git a/fs/super.c b/fs/super.c
index 74e149efed81..7e9dd4cc2c01 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -177,6 +177,11 @@ void deactivate_locked_super(struct super_block *s)
177 struct file_system_type *fs = s->s_type; 177 struct file_system_type *fs = s->s_type;
178 if (atomic_dec_and_test(&s->s_active)) { 178 if (atomic_dec_and_test(&s->s_active)) {
179 fs->kill_sb(s); 179 fs->kill_sb(s);
180 /*
181 * We need to call rcu_barrier so all the delayed rcu free
182 * inodes are flushed before we release the fs module.
183 */
184 rcu_barrier();
180 put_filesystem(fs); 185 put_filesystem(fs);
181 put_super(s); 186 put_super(s);
182 } else { 187 } else {
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b1208c26..e474fbcf8bde 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
245 new_de = sysv_find_entry(new_dentry, &new_page); 245 new_de = sysv_find_entry(new_dentry, &new_page);
246 if (!new_de) 246 if (!new_de)
247 goto out_dir; 247 goto out_dir;
248 inode_inc_link_count(old_inode);
249 sysv_set_link(new_de, new_page, old_inode); 248 sysv_set_link(new_de, new_page, old_inode);
250 new_inode->i_ctime = CURRENT_TIME_SEC; 249 new_inode->i_ctime = CURRENT_TIME_SEC;
251 if (dir_de) 250 if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
257 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max) 256 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
258 goto out_dir; 257 goto out_dir;
259 } 258 }
260 inode_inc_link_count(old_inode);
261 err = sysv_add_link(new_dentry, old_inode); 259 err = sysv_add_link(new_dentry, old_inode);
262 if (err) { 260 if (err)
263 inode_dec_link_count(old_inode);
264 goto out_dir; 261 goto out_dir;
265 }
266 if (dir_de) 262 if (dir_de)
267 inode_inc_link_count(new_dir); 263 inode_inc_link_count(new_dir);
268 } 264 }
269 265
270 sysv_delete_entry(old_de, old_page); 266 sysv_delete_entry(old_de, old_page);
271 inode_dec_link_count(old_inode); 267 mark_inode_dirty(old_inode);
272 268
273 if (dir_de) { 269 if (dir_de) {
274 sysv_set_link(dir_de, dir_page, new_dir); 270 sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9eb86d2..b7c338d5e9df 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
32#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 33#include <linux/exportfs.h>
34 34
35enum { UDF_MAX_LINKS = 0xffff };
36
35static inline int udf_match(int len1, const unsigned char *name1, int len2, 37static inline int udf_match(int len1, const unsigned char *name1, int len2,
36 const unsigned char *name2) 38 const unsigned char *name2)
37{ 39{
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
650 struct udf_inode_info *iinfo; 652 struct udf_inode_info *iinfo;
651 653
652 err = -EMLINK; 654 err = -EMLINK;
653 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) 655 if (dir->i_nlink >= UDF_MAX_LINKS)
654 goto out; 656 goto out;
655 657
656 err = -EIO; 658 err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1034 struct fileIdentDesc cfi, *fi; 1036 struct fileIdentDesc cfi, *fi;
1035 int err; 1037 int err;
1036 1038
1037 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { 1039 if (inode->i_nlink >= UDF_MAX_LINKS)
1038 return -EMLINK; 1040 return -EMLINK;
1039 }
1040 1041
1041 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1042 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1042 if (!fi) { 1043 if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1131 goto end_rename; 1132 goto end_rename;
1132 1133
1133 retval = -EMLINK; 1134 retval = -EMLINK;
1134 if (!new_inode && 1135 if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
1135 new_dir->i_nlink >=
1136 (256 << sizeof(new_dir->i_nlink)) - 1)
1137 goto end_rename; 1136 goto end_rename;
1138 } 1137 }
1139 if (!nfi) { 1138 if (!nfi) {
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 12f39b9e4437..d6f681535eb8 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -306,7 +306,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page); 306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
307 if (!new_de) 307 if (!new_de)
308 goto out_dir; 308 goto out_dir;
309 inode_inc_link_count(old_inode);
310 ufs_set_link(new_dir, new_de, new_page, old_inode); 309 ufs_set_link(new_dir, new_de, new_page, old_inode);
311 new_inode->i_ctime = CURRENT_TIME_SEC; 310 new_inode->i_ctime = CURRENT_TIME_SEC;
312 if (dir_de) 311 if (dir_de)
@@ -318,12 +317,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
318 if (new_dir->i_nlink >= UFS_LINK_MAX) 317 if (new_dir->i_nlink >= UFS_LINK_MAX)
319 goto out_dir; 318 goto out_dir;
320 } 319 }
321 inode_inc_link_count(old_inode);
322 err = ufs_add_link(new_dentry, old_inode); 320 err = ufs_add_link(new_dentry, old_inode);
323 if (err) { 321 if (err)
324 inode_dec_link_count(old_inode);
325 goto out_dir; 322 goto out_dir;
326 }
327 if (dir_de) 323 if (dir_de)
328 inode_inc_link_count(new_dir); 324 inode_inc_link_count(new_dir);
329 } 325 }
@@ -331,12 +327,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
331 /* 327 /*
332 * Like most other Unix systems, set the ctime for inodes on a 328 * Like most other Unix systems, set the ctime for inodes on a
333 * rename. 329 * rename.
334 * inode_dec_link_count() will mark the inode dirty.
335 */ 330 */
336 old_inode->i_ctime = CURRENT_TIME_SEC; 331 old_inode->i_ctime = CURRENT_TIME_SEC;
337 332
338 ufs_delete_entry(old_dir, old_de, old_page); 333 ufs_delete_entry(old_dir, old_de, old_page);
339 inode_dec_link_count(old_inode); 334 mark_inode_dirty(old_inode);
340 335
341 if (dir_de) { 336 if (dir_de) {
342 ufs_set_link(old_inode, dir_de, dir_page, new_dir); 337 ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e5..d61611c88012 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
152 152
153 if (!capable(CAP_SYS_ADMIN)) 153 if (!capable(CAP_SYS_ADMIN))
154 return -XFS_ERROR(EPERM); 154 return -XFS_ERROR(EPERM);
155 if (!blk_queue_discard(q))
156 return -XFS_ERROR(EOPNOTSUPP);
155 if (copy_from_user(&range, urange, sizeof(range))) 157 if (copy_from_user(&range, urange, sizeof(range)))
156 return -XFS_ERROR(EFAULT); 158 return -XFS_ERROR(EFAULT);
157 159
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index b06ede1d0bed..0ca0e3c024d7 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
695 xfs_mount_t *mp, 695 xfs_mount_t *mp,
696 void __user *arg) 696 void __user *arg)
697{ 697{
698 xfs_fsop_geom_v1_t fsgeo; 698 xfs_fsop_geom_t fsgeo;
699 int error; 699 int error;
700 700
701 error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); 701 error = xfs_fs_geometry(mp, &fsgeo, 3);
702 if (error) 702 if (error)
703 return -error; 703 return -error;
704 704
705 if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) 705 /*
706 * Caller should have passed an argument of type
707 * xfs_fsop_geom_v1_t. This is a proper subset of the
708 * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
709 */
710 if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
706 return -XFS_ERROR(EFAULT); 711 return -XFS_ERROR(EFAULT);
707 return 0; 712 return 0;
708} 713}
@@ -985,10 +990,22 @@ xfs_ioctl_setattr(
985 990
986 /* 991 /*
987 * Extent size must be a multiple of the appropriate block 992 * Extent size must be a multiple of the appropriate block
988 * size, if set at all. 993 * size, if set at all. It must also be smaller than the
994 * maximum extent size supported by the filesystem.
995 *
996 * Also, for non-realtime files, limit the extent size hint to
997 * half the size of the AGs in the filesystem so alignment
998 * doesn't result in extents larger than an AG.
989 */ 999 */
990 if (fa->fsx_extsize != 0) { 1000 if (fa->fsx_extsize != 0) {
991 xfs_extlen_t size; 1001 xfs_extlen_t size;
1002 xfs_fsblock_t extsize_fsb;
1003
1004 extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
1005 if (extsize_fsb > MAXEXTLEN) {
1006 code = XFS_ERROR(EINVAL);
1007 goto error_return;
1008 }
992 1009
993 if (XFS_IS_REALTIME_INODE(ip) || 1010 if (XFS_IS_REALTIME_INODE(ip) ||
994 ((mask & FSX_XFLAGS) && 1011 ((mask & FSX_XFLAGS) &&
@@ -997,6 +1014,10 @@ xfs_ioctl_setattr(
997 mp->m_sb.sb_blocklog; 1014 mp->m_sb.sb_blocklog;
998 } else { 1015 } else {
999 size = mp->m_sb.sb_blocksize; 1016 size = mp->m_sb.sb_blocksize;
1017 if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
1018 code = XFS_ERROR(EINVAL);
1019 goto error_return;
1020 }
1000 } 1021 }
1001 1022
1002 if (fa->fsx_extsize % size) { 1023 if (fa->fsx_extsize % size) {
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index f8e854b4fde8..206a2815ced6 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1863,12 +1863,14 @@ xfs_qm_dqreclaim_one(void)
1863 xfs_dquot_t *dqpout; 1863 xfs_dquot_t *dqpout;
1864 xfs_dquot_t *dqp; 1864 xfs_dquot_t *dqp;
1865 int restarts; 1865 int restarts;
1866 int startagain;
1866 1867
1867 restarts = 0; 1868 restarts = 0;
1868 dqpout = NULL; 1869 dqpout = NULL;
1869 1870
1870 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ 1871 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
1871startagain: 1872again:
1873 startagain = 0;
1872 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 1874 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1873 1875
1874 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { 1876 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
@@ -1885,13 +1887,10 @@ startagain:
1885 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); 1887 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
1886 1888
1887 trace_xfs_dqreclaim_want(dqp); 1889 trace_xfs_dqreclaim_want(dqp);
1888
1889 xfs_dqunlock(dqp);
1890 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1891 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1892 return NULL;
1893 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 1890 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1894 goto startagain; 1891 restarts++;
1892 startagain = 1;
1893 goto dqunlock;
1895 } 1894 }
1896 1895
1897 /* 1896 /*
@@ -1906,23 +1905,20 @@ startagain:
1906 ASSERT(list_empty(&dqp->q_mplist)); 1905 ASSERT(list_empty(&dqp->q_mplist));
1907 list_del_init(&dqp->q_freelist); 1906 list_del_init(&dqp->q_freelist);
1908 xfs_Gqm->qm_dqfrlist_cnt--; 1907 xfs_Gqm->qm_dqfrlist_cnt--;
1909 xfs_dqunlock(dqp);
1910 dqpout = dqp; 1908 dqpout = dqp;
1911 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); 1909 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
1912 break; 1910 goto dqunlock;
1913 } 1911 }
1914 1912
1915 ASSERT(dqp->q_hash); 1913 ASSERT(dqp->q_hash);
1916 ASSERT(!list_empty(&dqp->q_mplist)); 1914 ASSERT(!list_empty(&dqp->q_mplist));
1917 1915
1918 /* 1916 /*
1919 * Try to grab the flush lock. If this dquot is in the process of 1917 * Try to grab the flush lock. If this dquot is in the process
1920 * getting flushed to disk, we don't want to reclaim it. 1918 * of getting flushed to disk, we don't want to reclaim it.
1921 */ 1919 */
1922 if (!xfs_dqflock_nowait(dqp)) { 1920 if (!xfs_dqflock_nowait(dqp))
1923 xfs_dqunlock(dqp); 1921 goto dqunlock;
1924 continue;
1925 }
1926 1922
1927 /* 1923 /*
1928 * We have the flush lock so we know that this is not in the 1924 * We have the flush lock so we know that this is not in the
@@ -1944,8 +1940,7 @@ startagain:
1944 xfs_fs_cmn_err(CE_WARN, mp, 1940 xfs_fs_cmn_err(CE_WARN, mp,
1945 "xfs_qm_dqreclaim: dquot %p flush failed", dqp); 1941 "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
1946 } 1942 }
1947 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ 1943 goto dqunlock;
1948 continue;
1949 } 1944 }
1950 1945
1951 /* 1946 /*
@@ -1967,13 +1962,8 @@ startagain:
1967 */ 1962 */
1968 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { 1963 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
1969 restarts++; 1964 restarts++;
1970 mutex_unlock(&dqp->q_hash->qh_lock); 1965 startagain = 1;
1971 xfs_dqfunlock(dqp); 1966 goto qhunlock;
1972 xfs_dqunlock(dqp);
1973 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1974 if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS)
1975 return NULL;
1976 goto startagain;
1977 } 1967 }
1978 1968
1979 ASSERT(dqp->q_nrefs == 0); 1969 ASSERT(dqp->q_nrefs == 0);
@@ -1986,14 +1976,20 @@ startagain:
1986 xfs_Gqm->qm_dqfrlist_cnt--; 1976 xfs_Gqm->qm_dqfrlist_cnt--;
1987 dqpout = dqp; 1977 dqpout = dqp;
1988 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); 1978 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1979qhunlock:
1989 mutex_unlock(&dqp->q_hash->qh_lock); 1980 mutex_unlock(&dqp->q_hash->qh_lock);
1990dqfunlock: 1981dqfunlock:
1991 xfs_dqfunlock(dqp); 1982 xfs_dqfunlock(dqp);
1983dqunlock:
1992 xfs_dqunlock(dqp); 1984 xfs_dqunlock(dqp);
1993 if (dqpout) 1985 if (dqpout)
1994 break; 1986 break;
1995 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 1987 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1996 return NULL; 1988 break;
1989 if (startagain) {
1990 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1991 goto again;
1992 }
1997 } 1993 }
1998 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1994 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1999 return dqpout; 1995 return dqpout;
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 0ab56b32c7eb..d0b3bc72005b 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -75,6 +75,22 @@ typedef unsigned int xfs_alloctype_t;
75#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) 75#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
76 76
77/* 77/*
78 * When deciding how much space to allocate out of an AG, we limit the
79 * allocation maximum size to the size the AG. However, we cannot use all the
80 * blocks in the AG - some are permanently used by metadata. These
81 * blocks are generally:
82 * - the AG superblock, AGF, AGI and AGFL
83 * - the AGF (bno and cnt) and AGI btree root blocks
84 * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits
85 *
86 * The AG headers are sector sized, so the amount of space they take up is
87 * dependent on filesystem geometry. The others are all single blocks.
88 */
89#define XFS_ALLOC_AG_MAX_USABLE(mp) \
90 ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
91
92
93/*
78 * Argument structure for xfs_alloc routines. 94 * Argument structure for xfs_alloc routines.
79 * This is turned into a structure to avoid having 20 arguments passed 95 * This is turned into a structure to avoid having 20 arguments passed
80 * down several levels of the stack. 96 * down several levels of the stack.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 4111cd3966c7..dc3afd7739ff 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1038,17 +1038,34 @@ xfs_bmap_add_extent_delay_real(
1038 * Filling in the middle part of a previous delayed allocation. 1038 * Filling in the middle part of a previous delayed allocation.
1039 * Contiguity is impossible here. 1039 * Contiguity is impossible here.
1040 * This case is avoided almost all the time. 1040 * This case is avoided almost all the time.
1041 *
1042 * We start with a delayed allocation:
1043 *
1044 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1045 * PREV @ idx
1046 *
1047 * and we are allocating:
1048 * +rrrrrrrrrrrrrrrrr+
1049 * new
1050 *
1051 * and we set it up for insertion as:
1052 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1053 * new
1054 * PREV @ idx LEFT RIGHT
1055 * inserted at idx + 1
1041 */ 1056 */
1042 temp = new->br_startoff - PREV.br_startoff; 1057 temp = new->br_startoff - PREV.br_startoff;
1043 trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
1044 xfs_bmbt_set_blockcount(ep, temp);
1045 r[0] = *new;
1046 r[1].br_state = PREV.br_state;
1047 r[1].br_startblock = 0;
1048 r[1].br_startoff = new_endoff;
1049 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; 1058 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
1050 r[1].br_blockcount = temp2; 1059 trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_);
1051 xfs_iext_insert(ip, idx + 1, 2, &r[0], state); 1060 xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */
1061 LEFT = *new;
1062 RIGHT.br_state = PREV.br_state;
1063 RIGHT.br_startblock = nullstartblock(
1064 (int)xfs_bmap_worst_indlen(ip, temp2));
1065 RIGHT.br_startoff = new_endoff;
1066 RIGHT.br_blockcount = temp2;
1067 /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
1068 xfs_iext_insert(ip, idx + 1, 2, &LEFT, state);
1052 ip->i_df.if_lastex = idx + 1; 1069 ip->i_df.if_lastex = idx + 1;
1053 ip->i_d.di_nextents++; 1070 ip->i_d.di_nextents++;
1054 if (cur == NULL) 1071 if (cur == NULL)
@@ -2430,7 +2447,7 @@ xfs_bmap_btalloc_nullfb(
2430 startag = ag = 0; 2447 startag = ag = 0;
2431 2448
2432 pag = xfs_perag_get(mp, ag); 2449 pag = xfs_perag_get(mp, ag);
2433 while (*blen < ap->alen) { 2450 while (*blen < args->maxlen) {
2434 if (!pag->pagf_init) { 2451 if (!pag->pagf_init) {
2435 error = xfs_alloc_pagf_init(mp, args->tp, ag, 2452 error = xfs_alloc_pagf_init(mp, args->tp, ag,
2436 XFS_ALLOC_FLAG_TRYLOCK); 2453 XFS_ALLOC_FLAG_TRYLOCK);
@@ -2452,7 +2469,7 @@ xfs_bmap_btalloc_nullfb(
2452 notinit = 1; 2469 notinit = 1;
2453 2470
2454 if (xfs_inode_is_filestream(ap->ip)) { 2471 if (xfs_inode_is_filestream(ap->ip)) {
2455 if (*blen >= ap->alen) 2472 if (*blen >= args->maxlen)
2456 break; 2473 break;
2457 2474
2458 if (ap->userdata) { 2475 if (ap->userdata) {
@@ -2498,14 +2515,14 @@ xfs_bmap_btalloc_nullfb(
2498 * If the best seen length is less than the request 2515 * If the best seen length is less than the request
2499 * length, use the best as the minimum. 2516 * length, use the best as the minimum.
2500 */ 2517 */
2501 else if (*blen < ap->alen) 2518 else if (*blen < args->maxlen)
2502 args->minlen = *blen; 2519 args->minlen = *blen;
2503 /* 2520 /*
2504 * Otherwise we've seen an extent as big as alen, 2521 * Otherwise we've seen an extent as big as maxlen,
2505 * use that as the minimum. 2522 * use that as the minimum.
2506 */ 2523 */
2507 else 2524 else
2508 args->minlen = ap->alen; 2525 args->minlen = args->maxlen;
2509 2526
2510 /* 2527 /*
2511 * set the failure fallback case to look in the selected 2528 * set the failure fallback case to look in the selected
@@ -2573,7 +2590,9 @@ xfs_bmap_btalloc(
2573 args.tp = ap->tp; 2590 args.tp = ap->tp;
2574 args.mp = mp; 2591 args.mp = mp;
2575 args.fsbno = ap->rval; 2592 args.fsbno = ap->rval;
2576 args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); 2593
2594 /* Trim the allocation back to the maximum an AG can fit. */
2595 args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp));
2577 args.firstblock = ap->firstblock; 2596 args.firstblock = ap->firstblock;
2578 blen = 0; 2597 blen = 0;
2579 if (nullfb) { 2598 if (nullfb) {
@@ -2621,7 +2640,7 @@ xfs_bmap_btalloc(
2621 /* 2640 /*
2622 * Adjust for alignment 2641 * Adjust for alignment
2623 */ 2642 */
2624 if (blen > args.alignment && blen <= ap->alen) 2643 if (blen > args.alignment && blen <= args.maxlen)
2625 args.minlen = blen - args.alignment; 2644 args.minlen = blen - args.alignment;
2626 args.minalignslop = 0; 2645 args.minalignslop = 0;
2627 } else { 2646 } else {
@@ -2640,7 +2659,7 @@ xfs_bmap_btalloc(
2640 * of minlen+alignment+slop doesn't go up 2659 * of minlen+alignment+slop doesn't go up
2641 * between the calls. 2660 * between the calls.
2642 */ 2661 */
2643 if (blen > mp->m_dalign && blen <= ap->alen) 2662 if (blen > mp->m_dalign && blen <= args.maxlen)
2644 nextminlen = blen - mp->m_dalign; 2663 nextminlen = blen - mp->m_dalign;
2645 else 2664 else
2646 nextminlen = args.minlen; 2665 nextminlen = args.minlen;
@@ -4485,6 +4504,16 @@ xfs_bmapi(
4485 /* Figure out the extent size, adjust alen */ 4504 /* Figure out the extent size, adjust alen */
4486 extsz = xfs_get_extsz_hint(ip); 4505 extsz = xfs_get_extsz_hint(ip);
4487 if (extsz) { 4506 if (extsz) {
4507 /*
4508 * make sure we don't exceed a single
4509 * extent length when we align the
4510 * extent by reducing length we are
4511 * going to allocate by the maximum
4512 * amount extent size aligment may
4513 * require.
4514 */
4515 alen = XFS_FILBLKS_MIN(len,
4516 MAXEXTLEN - (2 * extsz - 1));
4488 error = xfs_bmap_extsize_align(mp, 4517 error = xfs_bmap_extsize_align(mp,
4489 &got, &prev, extsz, 4518 &got, &prev, extsz,
4490 rt, eof, 4519 rt, eof,
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 98c6f73b6752..6f8c21ce0d6d 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -427,13 +427,15 @@ xfs_buf_item_unpin(
427 427
428 if (remove) { 428 if (remove) {
429 /* 429 /*
430 * We have to remove the log item from the transaction 430 * If we are in a transaction context, we have to
431 * as we are about to release our reference to the 431 * remove the log item from the transaction as we are
432 * buffer. If we don't, the unlock that occurs later 432 * about to release our reference to the buffer. If we
433 * in xfs_trans_uncommit() will ry to reference the 433 * don't, the unlock that occurs later in
434 * xfs_trans_uncommit() will try to reference the
434 * buffer which we no longer have a hold on. 435 * buffer which we no longer have a hold on.
435 */ 436 */
436 xfs_trans_del_item(lip); 437 if (lip->li_desc)
438 xfs_trans_del_item(lip);
437 439
438 /* 440 /*
439 * Since the transaction no longer refers to the buffer, 441 * Since the transaction no longer refers to the buffer,
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 75f2ef60e579..d22e62623437 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -138,7 +138,8 @@ xfs_efi_item_unpin(
138 138
139 if (remove) { 139 if (remove) {
140 ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); 140 ASSERT(!(lip->li_flags & XFS_LI_IN_AIL));
141 xfs_trans_del_item(lip); 141 if (lip->li_desc)
142 xfs_trans_del_item(lip);
142 xfs_efi_item_free(efip); 143 xfs_efi_item_free(efip);
143 return; 144 return;
144 } 145 }
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d2..85668efb3e3e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
53 xfs_fsop_geom_t *geo, 53 xfs_fsop_geom_t *geo,
54 int new_version) 54 int new_version)
55{ 55{
56
57 memset(geo, 0, sizeof(*geo));
58
56 geo->blocksize = mp->m_sb.sb_blocksize; 59 geo->blocksize = mp->m_sb.sb_blocksize;
57 geo->rtextsize = mp->m_sb.sb_rextsize; 60 geo->rtextsize = mp->m_sb.sb_rextsize;
58 geo->agblocks = mp->m_sb.sb_agblocks; 61 geo->agblocks = mp->m_sb.sb_agblocks;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 55582bd66659..8a0f044750c3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -337,7 +337,12 @@ xfs_iomap_prealloc_size(
337 int shift = 0; 337 int shift = 0;
338 int64_t freesp; 338 int64_t freesp;
339 339
340 alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size); 340 /*
341 * rounddown_pow_of_two() returns an undefined result
342 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
343 * ensure we always pass in a non-zero value.
344 */
345 alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1;
341 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, 346 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
342 rounddown_pow_of_two(alloc_blocks)); 347 rounddown_pow_of_two(alloc_blocks));
343 348
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 916eb7db14d9..3bd3291ef8d2 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -191,7 +191,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket);
191 191
192xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); 192xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
193 193
194int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 194void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
195 struct xfs_log_vec *log_vector, 195 struct xfs_log_vec *log_vector,
196 xfs_lsn_t *commit_lsn, int flags); 196 xfs_lsn_t *commit_lsn, int flags);
197bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 197bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 9dc8125d04e5..9ca59be08977 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -543,7 +543,7 @@ xlog_cil_push(
543 543
544 error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); 544 error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0);
545 if (error) 545 if (error)
546 goto out_abort; 546 goto out_abort_free_ticket;
547 547
548 /* 548 /*
549 * now that we've written the checkpoint into the log, strictly 549 * now that we've written the checkpoint into the log, strictly
@@ -569,8 +569,9 @@ restart:
569 } 569 }
570 spin_unlock(&cil->xc_cil_lock); 570 spin_unlock(&cil->xc_cil_lock);
571 571
572 /* xfs_log_done always frees the ticket on error. */
572 commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); 573 commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
573 if (error || commit_lsn == -1) 574 if (commit_lsn == -1)
574 goto out_abort; 575 goto out_abort;
575 576
576 /* attach all the transactions w/ busy extents to iclog */ 577 /* attach all the transactions w/ busy extents to iclog */
@@ -600,6 +601,8 @@ out_free_ticket:
600 kmem_free(new_ctx); 601 kmem_free(new_ctx);
601 return 0; 602 return 0;
602 603
604out_abort_free_ticket:
605 xfs_log_ticket_put(tic);
603out_abort: 606out_abort:
604 xlog_cil_committed(ctx, XFS_LI_ABORTED); 607 xlog_cil_committed(ctx, XFS_LI_ABORTED);
605 return XFS_ERROR(EIO); 608 return XFS_ERROR(EIO);
@@ -622,7 +625,7 @@ out_abort:
622 * background commit, returns without it held once background commits are 625 * background commit, returns without it held once background commits are
623 * allowed again. 626 * allowed again.
624 */ 627 */
625int 628void
626xfs_log_commit_cil( 629xfs_log_commit_cil(
627 struct xfs_mount *mp, 630 struct xfs_mount *mp,
628 struct xfs_trans *tp, 631 struct xfs_trans *tp,
@@ -637,11 +640,6 @@ xfs_log_commit_cil(
637 if (flags & XFS_TRANS_RELEASE_LOG_RES) 640 if (flags & XFS_TRANS_RELEASE_LOG_RES)
638 log_flags = XFS_LOG_REL_PERM_RESERV; 641 log_flags = XFS_LOG_REL_PERM_RESERV;
639 642
640 if (XLOG_FORCED_SHUTDOWN(log)) {
641 xlog_cil_free_logvec(log_vector);
642 return XFS_ERROR(EIO);
643 }
644
645 /* 643 /*
646 * do all the hard work of formatting items (including memory 644 * do all the hard work of formatting items (including memory
647 * allocation) outside the CIL context lock. This prevents stalling CIL 645 * allocation) outside the CIL context lock. This prevents stalling CIL
@@ -701,7 +699,6 @@ xfs_log_commit_cil(
701 */ 699 */
702 if (push) 700 if (push)
703 xlog_cil_push(log, 0); 701 xlog_cil_push(log, 0);
704 return 0;
705} 702}
706 703
707/* 704/*
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 33dbc4e0ad62..76922793f64f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1446,6 +1446,14 @@ xfs_log_item_batch_insert(
1446 * Bulk operation version of xfs_trans_committed that takes a log vector of 1446 * Bulk operation version of xfs_trans_committed that takes a log vector of
1447 * items to insert into the AIL. This uses bulk AIL insertion techniques to 1447 * items to insert into the AIL. This uses bulk AIL insertion techniques to
1448 * minimise lock traffic. 1448 * minimise lock traffic.
1449 *
1450 * If we are called with the aborted flag set, it is because a log write during
1451 * a CIL checkpoint commit has failed. In this case, all the items in the
1452 * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which
1453 * means that checkpoint commit abort handling is treated exactly the same
1454 * as an iclog write error even though we haven't started any IO yet. Hence in
1455 * this case all we need to do is IOP_COMMITTED processing, followed by an
1456 * IOP_UNPIN(aborted) call.
1449 */ 1457 */
1450void 1458void
1451xfs_trans_committed_bulk( 1459xfs_trans_committed_bulk(
@@ -1472,6 +1480,16 @@ xfs_trans_committed_bulk(
1472 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) 1480 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
1473 continue; 1481 continue;
1474 1482
1483 /*
1484 * if we are aborting the operation, no point in inserting the
1485 * object into the AIL as we are in a shutdown situation.
1486 */
1487 if (aborted) {
1488 ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount));
1489 IOP_UNPIN(lip, 1);
1490 continue;
1491 }
1492
1475 if (item_lsn != commit_lsn) { 1493 if (item_lsn != commit_lsn) {
1476 1494
1477 /* 1495 /*
@@ -1503,20 +1521,24 @@ xfs_trans_committed_bulk(
1503} 1521}
1504 1522
1505/* 1523/*
1506 * Called from the trans_commit code when we notice that 1524 * Called from the trans_commit code when we notice that the filesystem is in
1507 * the filesystem is in the middle of a forced shutdown. 1525 * the middle of a forced shutdown.
1526 *
1527 * When we are called here, we have already pinned all the items in the
1528 * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called
1529 * so we can simply walk the items in the transaction, unpin them with an abort
1530 * flag and then free the items. Note that unpinning the items can result in
1531 * them being freed immediately, so we need to use a safe list traversal method
1532 * here.
1508 */ 1533 */
1509STATIC void 1534STATIC void
1510xfs_trans_uncommit( 1535xfs_trans_uncommit(
1511 struct xfs_trans *tp, 1536 struct xfs_trans *tp,
1512 uint flags) 1537 uint flags)
1513{ 1538{
1514 struct xfs_log_item_desc *lidp; 1539 struct xfs_log_item_desc *lidp, *n;
1515 1540
1516 list_for_each_entry(lidp, &tp->t_items, lid_trans) { 1541 list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) {
1517 /*
1518 * Unpin all but those that aren't dirty.
1519 */
1520 if (lidp->lid_flags & XFS_LID_DIRTY) 1542 if (lidp->lid_flags & XFS_LID_DIRTY)
1521 IOP_UNPIN(lidp->lid_item, 1); 1543 IOP_UNPIN(lidp->lid_item, 1);
1522 } 1544 }
@@ -1733,7 +1755,6 @@ xfs_trans_commit_cil(
1733 int flags) 1755 int flags)
1734{ 1756{
1735 struct xfs_log_vec *log_vector; 1757 struct xfs_log_vec *log_vector;
1736 int error;
1737 1758
1738 /* 1759 /*
1739 * Get each log item to allocate a vector structure for 1760 * Get each log item to allocate a vector structure for
@@ -1744,9 +1765,7 @@ xfs_trans_commit_cil(
1744 if (!log_vector) 1765 if (!log_vector)
1745 return ENOMEM; 1766 return ENOMEM;
1746 1767
1747 error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); 1768 xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
1748 if (error)
1749 return error;
1750 1769
1751 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1770 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1752 xfs_trans_free(tp); 1771 xfs_trans_free(tp);