aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c52
-rw-r--r--fs/block_dev.c30
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent-tree.c46
-rw-r--r--fs/btrfs/extent_io.c213
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/file.c115
-rw-r--r--fs/btrfs/inode.c135
-rw-r--r--fs/btrfs/ioctl.c17
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/relocation.c14
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/volumes.c15
-rw-r--r--fs/ceph/dir.c27
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/snap.c14
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h4
-rw-r--r--fs/cifs/connect.c37
-rw-r--r--fs/cifs/netmisc.c8
-rw-r--r--fs/cifs/sess.c8
-rw-r--r--fs/cifs/transport.c3
-rw-r--r--fs/compat.c8
-rw-r--r--fs/dcache.c26
-rw-r--r--fs/dlm/lowcomms.c6
-rw-r--r--fs/ecryptfs/dentry.c22
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h3
-rw-r--r--fs/ecryptfs/file.c1
-rw-r--r--fs/ecryptfs/inode.c138
-rw-r--r--fs/eventfd.c12
-rw-r--r--fs/eventpoll.c95
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/ext2/namei.c9
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/file.c60
-rw-r--r--fs/ext4/mballoc.c100
-rw-r--r--fs/ext4/page-io.c36
-rw-r--r--fs/ext4/super.c66
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fuse/dir.c9
-rw-r--r--fs/fuse/file.c52
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/glock.c4
-rw-r--r--fs/gfs2/main.c11
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/inode.c31
-rw-r--r--fs/internal.h2
-rw-r--r--fs/jbd2/journal.c9
-rw-r--r--fs/jbd2/transaction.c21
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c165
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/inode.c7
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4filelayoutdev.c4
-rw-r--r--fs/nfs/nfs4proc.c131
-rw-r--r--fs/nfs/nfs4state.c29
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsd/nfs4callback.c8
-rw-r--r--fs/nfsd/nfs4state.c187
-rw-r--r--fs/nfsd/nfs4xdr.c12
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/vfs.c21
-rw-r--r--fs/nilfs2/btnode.c5
-rw-r--r--fs/nilfs2/btnode.h1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/page.c13
-rw-r--r--fs/nilfs2/page.h1
-rw-r--r--fs/nilfs2/segment.c3
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/refcounttree.c7
-rw-r--r--fs/ocfs2/super.c28
-rw-r--r--fs/open.c10
-rw-r--r--fs/partitions/ldm.c5
-rw-r--r--fs/partitions/mac.c17
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_sysctl.c7
-rw-r--r--fs/reiserfs/namei.c2
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/udf/namei.c11
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c11
-rw-r--r--fs/xfs/xfs_fsops.c3
102 files changed, 1579 insertions, 884 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b5..789b3afb3423 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
140 candidate->first = candidate->last = index; 140 candidate->first = candidate->last = index;
141 candidate->offset_first = from; 141 candidate->offset_first = from;
142 candidate->to_last = to; 142 candidate->to_last = to;
143 INIT_LIST_HEAD(&candidate->link);
143 candidate->usage = 1; 144 candidate->usage = 1;
144 candidate->state = AFS_WBACK_PENDING; 145 candidate->state = AFS_WBACK_PENDING;
145 init_waitqueue_head(&candidate->waitq); 146 init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index fc557a3be0a9..26869cde3953 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -239,15 +239,23 @@ static void __put_ioctx(struct kioctx *ctx)
239 call_rcu(&ctx->rcu_head, ctx_rcu_free); 239 call_rcu(&ctx->rcu_head, ctx_rcu_free);
240} 240}
241 241
242#define get_ioctx(kioctx) do { \ 242static inline void get_ioctx(struct kioctx *kioctx)
243 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 243{
244 atomic_inc(&(kioctx)->users); \ 244 BUG_ON(atomic_read(&kioctx->users) <= 0);
245} while (0) 245 atomic_inc(&kioctx->users);
246#define put_ioctx(kioctx) do { \ 246}
247 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 247
248 if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ 248static inline int try_get_ioctx(struct kioctx *kioctx)
249 __put_ioctx(kioctx); \ 249{
250} while (0) 250 return atomic_inc_not_zero(&kioctx->users);
251}
252
253static inline void put_ioctx(struct kioctx *kioctx)
254{
255 BUG_ON(atomic_read(&kioctx->users) <= 0);
256 if (unlikely(atomic_dec_and_test(&kioctx->users)))
257 __put_ioctx(kioctx);
258}
251 259
252/* ioctx_alloc 260/* ioctx_alloc
253 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 261 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
@@ -601,8 +609,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
601 rcu_read_lock(); 609 rcu_read_lock();
602 610
603 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { 611 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
604 if (ctx->user_id == ctx_id && !ctx->dead) { 612 /*
605 get_ioctx(ctx); 613 * RCU protects us against accessing freed memory but
614 * we have to be careful not to get a reference when the
615 * reference count already dropped to 0 (ctx->dead test
616 * is unreliable because of races).
617 */
618 if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
606 ret = ctx; 619 ret = ctx;
607 break; 620 break;
608 } 621 }
@@ -1629,6 +1642,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1629 goto out_put_req; 1642 goto out_put_req;
1630 1643
1631 spin_lock_irq(&ctx->ctx_lock); 1644 spin_lock_irq(&ctx->ctx_lock);
1645 /*
1646 * We could have raced with io_destroy() and are currently holding a
1647 * reference to ctx which should be destroyed. We cannot submit IO
1648 * since ctx gets freed as soon as io_submit() puts its reference. The
1649 * check here is reliable: io_destroy() sets ctx->dead before waiting
1650 * for outstanding IO and the barrier between these two is realized by
1651 * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
1652 * increment ctx->reqs_active before checking for ctx->dead and the
1653 * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
1654 * don't see ctx->dead set here, io_destroy() waits for our IO to
1655 * finish.
1656 */
1657 if (ctx->dead) {
1658 spin_unlock_irq(&ctx->ctx_lock);
1659 ret = -EINVAL;
1660 goto out_put_req;
1661 }
1632 aio_run_iocb(req); 1662 aio_run_iocb(req);
1633 if (!list_empty(&ctx->run_list)) { 1663 if (!list_empty(&ctx->run_list)) {
1634 /* drain the run list */ 1664 /* drain the run list */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 333a7bb4cb9c..889287019599 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); 873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
874 if (ret) 874 if (ret)
875 goto out_del; 875 goto out_del;
876 /*
877 * bdev could be deleted beneath us which would implicitly destroy
878 * the holder directory. Hold on to it.
879 */
880 kobject_get(bdev->bd_part->holder_dir);
876 881
877 list_add(&holder->list, &bdev->bd_holder_disks); 882 list_add(&holder->list, &bdev->bd_holder_disks);
878 goto out_unlock; 883 goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
909 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); 914 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
910 del_symlink(bdev->bd_part->holder_dir, 915 del_symlink(bdev->bd_part->holder_dir,
911 &disk_to_dev(disk)->kobj); 916 &disk_to_dev(disk)->kobj);
917 kobject_put(bdev->bd_part->holder_dir);
912 list_del_init(&holder->list); 918 list_del_init(&holder->list);
913 kfree(holder); 919 kfree(holder);
914 } 920 }
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
922 * flush_disk - invalidates all buffer-cache entries on a disk 928 * flush_disk - invalidates all buffer-cache entries on a disk
923 * 929 *
924 * @bdev: struct block device to be flushed 930 * @bdev: struct block device to be flushed
931 * @kill_dirty: flag to guide handling of dirty inodes
925 * 932 *
926 * Invalidates all buffer-cache entries on a disk. It should be called 933 * Invalidates all buffer-cache entries on a disk. It should be called
927 * when a disk has been changed -- either by a media change or online 934 * when a disk has been changed -- either by a media change or online
928 * resize. 935 * resize.
929 */ 936 */
930static void flush_disk(struct block_device *bdev) 937static void flush_disk(struct block_device *bdev, bool kill_dirty)
931{ 938{
932 if (__invalidate_device(bdev)) { 939 if (__invalidate_device(bdev, kill_dirty)) {
933 char name[BDEVNAME_SIZE] = ""; 940 char name[BDEVNAME_SIZE] = "";
934 941
935 if (bdev->bd_disk) 942 if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
966 "%s: detected capacity change from %lld to %lld\n", 973 "%s: detected capacity change from %lld to %lld\n",
967 name, bdev_size, disk_size); 974 name, bdev_size, disk_size);
968 i_size_write(bdev->bd_inode, disk_size); 975 i_size_write(bdev->bd_inode, disk_size);
969 flush_disk(bdev); 976 flush_disk(bdev, false);
970 } 977 }
971} 978}
972EXPORT_SYMBOL(check_disk_size_change); 979EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
1019 if (!(events & DISK_EVENT_MEDIA_CHANGE)) 1026 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1020 return 0; 1027 return 0;
1021 1028
1022 flush_disk(bdev); 1029 flush_disk(bdev, true);
1023 if (bdops->revalidate_disk) 1030 if (bdops->revalidate_disk)
1024 bdops->revalidate_disk(bdev->bd_disk); 1031 bdops->revalidate_disk(bdev->bd_disk);
1025 return 1; 1032 return 1;
@@ -1215,12 +1222,6 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1215 1222
1216 res = __blkdev_get(bdev, mode, 0); 1223 res = __blkdev_get(bdev, mode, 0);
1217 1224
1218 /* __blkdev_get() may alter read only status, check it afterwards */
1219 if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1220 __blkdev_put(bdev, mode, 0);
1221 res = -EACCES;
1222 }
1223
1224 if (whole) { 1225 if (whole) {
1225 /* finish claiming */ 1226 /* finish claiming */
1226 mutex_lock(&bdev->bd_mutex); 1227 mutex_lock(&bdev->bd_mutex);
@@ -1298,6 +1299,11 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1298 if (err) 1299 if (err)
1299 return ERR_PTR(err); 1300 return ERR_PTR(err);
1300 1301
1302 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1303 blkdev_put(bdev, mode);
1304 return ERR_PTR(-EACCES);
1305 }
1306
1301 return bdev; 1307 return bdev;
1302} 1308}
1303EXPORT_SYMBOL(blkdev_get_by_path); 1309EXPORT_SYMBOL(blkdev_get_by_path);
@@ -1601,7 +1607,7 @@ fail:
1601} 1607}
1602EXPORT_SYMBOL(lookup_bdev); 1608EXPORT_SYMBOL(lookup_bdev);
1603 1609
1604int __invalidate_device(struct block_device *bdev) 1610int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1605{ 1611{
1606 struct super_block *sb = get_super(bdev); 1612 struct super_block *sb = get_super(bdev);
1607 int res = 0; 1613 int res = 0;
@@ -1614,7 +1620,7 @@ int __invalidate_device(struct block_device *bdev)
1614 * hold). 1620 * hold).
1615 */ 1621 */
1616 shrink_dcache_sb(sb); 1622 shrink_dcache_sb(sb);
1617 res = invalidate_inodes(sb); 1623 res = invalidate_inodes(sb, kill_dirty);
1618 drop_super(sb); 1624 drop_super(sb);
1619 } 1625 }
1620 invalidate_bdev(bdev); 1626 invalidate_bdev(bdev);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..7f78cc78fdd0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
729 u64 disk_total; /* total bytes on disk, takes mirrors into 729 u64 disk_total; /* total bytes on disk, takes mirrors into
730 account */ 730 account */
731 731
732 /*
733 * we bump reservation progress every time we decrement
734 * bytes_reserved. This way people waiting for reservations
735 * know something good has happened and they can check
736 * for progress. The number here isn't to be trusted, it
737 * just shows reclaim activity
738 */
739 unsigned long reservation_progress;
740
732 int full; /* indicates that we cannot allocate any more 741 int full; /* indicates that we cannot allocate any more
733 chunks for this space */ 742 chunks for this space */
734 int force_alloc; /* set if we need to force a chunk alloc for 743 int force_alloc; /* set if we need to force a chunk alloc for
@@ -1254,6 +1263,7 @@ struct btrfs_root {
1254#define BTRFS_MOUNT_SPACE_CACHE (1 << 12) 1263#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1255#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) 1264#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1256#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1265#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1266#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1257 1267
1258#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1268#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1259#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1269#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2228,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
2218 u64 start, u64 end); 2228 u64 start, u64 end);
2219int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 2229int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
2220 u64 num_bytes); 2230 u64 num_bytes);
2231int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root, u64 type);
2221 2233
2222/* ctree.c */ 2234/* ctree.c */
2223int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2235int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fdce8799b98d..e1aa8d607bc7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -359,10 +359,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
359 359
360 tree = &BTRFS_I(page->mapping->host)->io_tree; 360 tree = &BTRFS_I(page->mapping->host)->io_tree;
361 361
362 if (page->private == EXTENT_PAGE_PRIVATE) 362 if (page->private == EXTENT_PAGE_PRIVATE) {
363 WARN_ON(1);
363 goto out; 364 goto out;
364 if (!page->private) 365 }
366 if (!page->private) {
367 WARN_ON(1);
365 goto out; 368 goto out;
369 }
366 len = page->private >> 2; 370 len = page->private >> 2;
367 WARN_ON(len == 0); 371 WARN_ON(len == 0);
368 372
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4e7e012ad667..7b3089b5c2df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3342 u64 max_reclaim; 3342 u64 max_reclaim;
3343 u64 reclaimed = 0; 3343 u64 reclaimed = 0;
3344 long time_left; 3344 long time_left;
3345 int pause = 1;
3346 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3345 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3347 int loops = 0; 3346 int loops = 0;
3347 unsigned long progress;
3348 3348
3349 block_rsv = &root->fs_info->delalloc_block_rsv; 3349 block_rsv = &root->fs_info->delalloc_block_rsv;
3350 space_info = block_rsv->space_info; 3350 space_info = block_rsv->space_info;
3351 3351
3352 smp_mb(); 3352 smp_mb();
3353 reserved = space_info->bytes_reserved; 3353 reserved = space_info->bytes_reserved;
3354 progress = space_info->reservation_progress;
3354 3355
3355 if (reserved == 0) 3356 if (reserved == 0)
3356 return 0; 3357 return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3365 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3366 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3366 3367
3367 spin_lock(&space_info->lock); 3368 spin_lock(&space_info->lock);
3368 if (reserved > space_info->bytes_reserved) { 3369 if (reserved > space_info->bytes_reserved)
3369 loops = 0;
3370 reclaimed += reserved - space_info->bytes_reserved; 3370 reclaimed += reserved - space_info->bytes_reserved;
3371 } else {
3372 loops++;
3373 }
3374 reserved = space_info->bytes_reserved; 3371 reserved = space_info->bytes_reserved;
3375 spin_unlock(&space_info->lock); 3372 spin_unlock(&space_info->lock);
3376 3373
3374 loops++;
3375
3377 if (reserved == 0 || reclaimed >= max_reclaim) 3376 if (reserved == 0 || reclaimed >= max_reclaim)
3378 break; 3377 break;
3379 3378
3380 if (trans && trans->transaction->blocked) 3379 if (trans && trans->transaction->blocked)
3381 return -EAGAIN; 3380 return -EAGAIN;
3382 3381
3383 __set_current_state(TASK_INTERRUPTIBLE); 3382 time_left = schedule_timeout_interruptible(1);
3384 time_left = schedule_timeout(pause);
3385 3383
3386 /* We were interrupted, exit */ 3384 /* We were interrupted, exit */
3387 if (time_left) 3385 if (time_left)
3388 break; 3386 break;
3389 3387
3390 pause <<= 1; 3388 /* we've kicked the IO a few times, if anything has been freed,
3391 if (pause > HZ / 10) 3389 * exit. There is no sense in looping here for a long time
3392 pause = HZ / 10; 3390 * when we really need to commit the transaction, or there are
3391 * just too many writers without enough free space
3392 */
3393
3394 if (loops > 3) {
3395 smp_mb();
3396 if (progress != space_info->reservation_progress)
3397 break;
3398 }
3393 3399
3394 } 3400 }
3395 return reclaimed >= to_reclaim; 3401 return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3612 if (num_bytes) { 3618 if (num_bytes) {
3613 spin_lock(&space_info->lock); 3619 spin_lock(&space_info->lock);
3614 space_info->bytes_reserved -= num_bytes; 3620 space_info->bytes_reserved -= num_bytes;
3621 space_info->reservation_progress++;
3615 spin_unlock(&space_info->lock); 3622 spin_unlock(&space_info->lock);
3616 } 3623 }
3617 } 3624 }
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3844 if (block_rsv->reserved >= block_rsv->size) { 3851 if (block_rsv->reserved >= block_rsv->size) {
3845 num_bytes = block_rsv->reserved - block_rsv->size; 3852 num_bytes = block_rsv->reserved - block_rsv->size;
3846 sinfo->bytes_reserved -= num_bytes; 3853 sinfo->bytes_reserved -= num_bytes;
3854 sinfo->reservation_progress++;
3847 block_rsv->reserved = block_rsv->size; 3855 block_rsv->reserved = block_rsv->size;
3848 block_rsv->full = 1; 3856 block_rsv->full = 1;
3849 } 3857 }
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4005 to_reserve = 0; 4013 to_reserve = 0;
4006 } 4014 }
4007 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4015 spin_unlock(&BTRFS_I(inode)->accounting_lock);
4008
4009 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4016 to_reserve += calc_csum_metadata_size(inode, num_bytes);
4010 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4017 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4011 if (ret) 4018 if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4133 btrfs_set_block_group_used(&cache->item, old_val); 4140 btrfs_set_block_group_used(&cache->item, old_val);
4134 cache->reserved -= num_bytes; 4141 cache->reserved -= num_bytes;
4135 cache->space_info->bytes_reserved -= num_bytes; 4142 cache->space_info->bytes_reserved -= num_bytes;
4143 cache->space_info->reservation_progress++;
4136 cache->space_info->bytes_used += num_bytes; 4144 cache->space_info->bytes_used += num_bytes;
4137 cache->space_info->disk_used += num_bytes * factor; 4145 cache->space_info->disk_used += num_bytes * factor;
4138 spin_unlock(&cache->lock); 4146 spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
4184 if (reserved) { 4192 if (reserved) {
4185 cache->reserved -= num_bytes; 4193 cache->reserved -= num_bytes;
4186 cache->space_info->bytes_reserved -= num_bytes; 4194 cache->space_info->bytes_reserved -= num_bytes;
4195 cache->space_info->reservation_progress++;
4187 } 4196 }
4188 spin_unlock(&cache->lock); 4197 spin_unlock(&cache->lock);
4189 spin_unlock(&cache->space_info->lock); 4198 spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
4234 space_info->bytes_readonly += num_bytes; 4243 space_info->bytes_readonly += num_bytes;
4235 cache->reserved -= num_bytes; 4244 cache->reserved -= num_bytes;
4236 space_info->bytes_reserved -= num_bytes; 4245 space_info->bytes_reserved -= num_bytes;
4246 space_info->reservation_progress++;
4237 } 4247 }
4238 spin_unlock(&cache->lock); 4248 spin_unlock(&cache->lock);
4239 spin_unlock(&space_info->lock); 4249 spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4712 if (ret) { 4722 if (ret) {
4713 spin_lock(&cache->space_info->lock); 4723 spin_lock(&cache->space_info->lock);
4714 cache->space_info->bytes_reserved -= buf->len; 4724 cache->space_info->bytes_reserved -= buf->len;
4725 cache->space_info->reservation_progress++;
4715 spin_unlock(&cache->space_info->lock); 4726 spin_unlock(&cache->space_info->lock);
4716 } 4727 }
4717 goto out; 4728 goto out;
@@ -5376,7 +5387,7 @@ again:
5376 num_bytes, data, 1); 5387 num_bytes, data, 1);
5377 goto again; 5388 goto again;
5378 } 5389 }
5379 if (ret == -ENOSPC) { 5390 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
5380 struct btrfs_space_info *sinfo; 5391 struct btrfs_space_info *sinfo;
5381 5392
5382 sinfo = __find_space_info(root->fs_info, data); 5393 sinfo = __find_space_info(root->fs_info, data);
@@ -6583,7 +6594,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
6583 u64 end = start + extent_key->offset - 1; 6594 u64 end = start + extent_key->offset - 1;
6584 6595
6585 em = alloc_extent_map(GFP_NOFS); 6596 em = alloc_extent_map(GFP_NOFS);
6586 BUG_ON(!em || IS_ERR(em)); 6597 BUG_ON(!em);
6587 6598
6588 em->start = start; 6599 em->start = start;
6589 em->len = extent_key->offset; 6600 em->len = extent_key->offset;
@@ -8065,6 +8076,13 @@ out:
8065 return ret; 8076 return ret;
8066} 8077}
8067 8078
8079int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8080 struct btrfs_root *root, u64 type)
8081{
8082 u64 alloc_flags = get_alloc_profile(root, type);
8083 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
8084}
8085
8068/* 8086/*
8069 * helper to account the unused space of all the readonly block group in the 8087 * helper to account the unused space of all the readonly block group in the
8070 * list. takes mirrors into account. 8088 * list. takes mirrors into account.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5e76a474cb7e..714adc4ac4c2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1433 */ 1433 */
1434u64 count_range_bits(struct extent_io_tree *tree, 1434u64 count_range_bits(struct extent_io_tree *tree,
1435 u64 *start, u64 search_end, u64 max_bytes, 1435 u64 *start, u64 search_end, u64 max_bytes,
1436 unsigned long bits) 1436 unsigned long bits, int contig)
1437{ 1437{
1438 struct rb_node *node; 1438 struct rb_node *node;
1439 struct extent_state *state; 1439 struct extent_state *state;
1440 u64 cur_start = *start; 1440 u64 cur_start = *start;
1441 u64 total_bytes = 0; 1441 u64 total_bytes = 0;
1442 u64 last = 0;
1442 int found = 0; 1443 int found = 0;
1443 1444
1444 if (search_end <= cur_start) { 1445 if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1463 state = rb_entry(node, struct extent_state, rb_node); 1464 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->start > search_end) 1465 if (state->start > search_end)
1465 break; 1466 break;
1466 if (state->end >= cur_start && (state->state & bits)) { 1467 if (contig && found && state->start > last + 1)
1468 break;
1469 if (state->end >= cur_start && (state->state & bits) == bits) {
1467 total_bytes += min(search_end, state->end) + 1 - 1470 total_bytes += min(search_end, state->end) + 1 -
1468 max(cur_start, state->start); 1471 max(cur_start, state->start);
1469 if (total_bytes >= max_bytes) 1472 if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1472 *start = state->start; 1475 *start = state->start;
1473 found = 1; 1476 found = 1;
1474 } 1477 }
1478 last = state->end;
1479 } else if (contig && found) {
1480 break;
1475 } 1481 }
1476 node = rb_next(node); 1482 node = rb_next(node);
1477 if (!node) 1483 if (!node)
@@ -1946,6 +1952,7 @@ void set_page_extent_mapped(struct page *page)
1946 1952
1947static void set_page_extent_head(struct page *page, unsigned long len) 1953static void set_page_extent_head(struct page *page, unsigned long len)
1948{ 1954{
1955 WARN_ON(!PagePrivate(page));
1949 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 1956 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1950} 1957}
1951 1958
@@ -2821,9 +2828,17 @@ int try_release_extent_state(struct extent_map_tree *map,
2821 * at this point we can safely clear everything except the 2828 * at this point we can safely clear everything except the
2822 * locked bit and the nodatasum bit 2829 * locked bit and the nodatasum bit
2823 */ 2830 */
2824 clear_extent_bit(tree, start, end, 2831 ret = clear_extent_bit(tree, start, end,
2825 ~(EXTENT_LOCKED | EXTENT_NODATASUM), 2832 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
2826 0, 0, NULL, mask); 2833 0, 0, NULL, mask);
2834
2835 /* if clear_extent_bit failed for enomem reasons,
2836 * we can't allow the release to continue.
2837 */
2838 if (ret < 0)
2839 ret = 0;
2840 else
2841 ret = 1;
2827 } 2842 }
2828 return ret; 2843 return ret;
2829} 2844}
@@ -2903,6 +2918,46 @@ out:
2903 return sector; 2918 return sector;
2904} 2919}
2905 2920
2921/*
2922 * helper function for fiemap, which doesn't want to see any holes.
2923 * This maps until we find something past 'last'
2924 */
2925static struct extent_map *get_extent_skip_holes(struct inode *inode,
2926 u64 offset,
2927 u64 last,
2928 get_extent_t *get_extent)
2929{
2930 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2931 struct extent_map *em;
2932 u64 len;
2933
2934 if (offset >= last)
2935 return NULL;
2936
2937 while(1) {
2938 len = last - offset;
2939 if (len == 0)
2940 break;
2941 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2942 em = get_extent(inode, NULL, 0, offset, len, 0);
2943 if (!em || IS_ERR(em))
2944 return em;
2945
2946 /* if this isn't a hole return it */
2947 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 em->block_start != EXTENT_MAP_HOLE) {
2949 return em;
2950 }
2951
2952 /* this is a hole, advance to the next extent */
2953 offset = extent_map_end(em);
2954 free_extent_map(em);
2955 if (offset >= last)
2956 break;
2957 }
2958 return NULL;
2959}
2960
2906int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2961int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2907 __u64 start, __u64 len, get_extent_t *get_extent) 2962 __u64 start, __u64 len, get_extent_t *get_extent)
2908{ 2963{
@@ -2912,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2912 u32 flags = 0; 2967 u32 flags = 0;
2913 u32 found_type; 2968 u32 found_type;
2914 u64 last; 2969 u64 last;
2970 u64 last_for_get_extent = 0;
2915 u64 disko = 0; 2971 u64 disko = 0;
2972 u64 isize = i_size_read(inode);
2916 struct btrfs_key found_key; 2973 struct btrfs_key found_key;
2917 struct extent_map *em = NULL; 2974 struct extent_map *em = NULL;
2918 struct extent_state *cached_state = NULL; 2975 struct extent_state *cached_state = NULL;
2919 struct btrfs_path *path; 2976 struct btrfs_path *path;
2920 struct btrfs_file_extent_item *item; 2977 struct btrfs_file_extent_item *item;
2921 int end = 0; 2978 int end = 0;
2922 u64 em_start = 0, em_len = 0; 2979 u64 em_start = 0;
2980 u64 em_len = 0;
2981 u64 em_end = 0;
2923 unsigned long emflags; 2982 unsigned long emflags;
2924 int hole = 0;
2925 2983
2926 if (len == 0) 2984 if (len == 0)
2927 return -EINVAL; 2985 return -EINVAL;
@@ -2931,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2931 return -ENOMEM; 2989 return -ENOMEM;
2932 path->leave_spinning = 1; 2990 path->leave_spinning = 1;
2933 2991
2992 /*
2993 * lookup the last file extent. We're not using i_size here
2994 * because there might be preallocation past i_size
2995 */
2934 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2996 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2935 path, inode->i_ino, -1, 0); 2997 path, inode->i_ino, -1, 0);
2936 if (ret < 0) { 2998 if (ret < 0) {
@@ -2944,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2944 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 3006 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2945 found_type = btrfs_key_type(&found_key); 3007 found_type = btrfs_key_type(&found_key);
2946 3008
2947 /* No extents, just return */ 3009 /* No extents, but there might be delalloc bits */
2948 if (found_key.objectid != inode->i_ino || 3010 if (found_key.objectid != inode->i_ino ||
2949 found_type != BTRFS_EXTENT_DATA_KEY) { 3011 found_type != BTRFS_EXTENT_DATA_KEY) {
2950 btrfs_free_path(path); 3012 /* have to trust i_size as the end */
2951 return 0; 3013 last = (u64)-1;
3014 last_for_get_extent = isize;
3015 } else {
3016 /*
3017 * remember the start of the last extent. There are a
3018 * bunch of different factors that go into the length of the
3019 * extent, so its much less complex to remember where it started
3020 */
3021 last = found_key.offset;
3022 last_for_get_extent = last + 1;
2952 } 3023 }
2953 last = found_key.offset;
2954 btrfs_free_path(path); 3024 btrfs_free_path(path);
2955 3025
3026 /*
3027 * we might have some extents allocated but more delalloc past those
3028 * extents. so, we trust isize unless the start of the last extent is
3029 * beyond isize
3030 */
3031 if (last < isize) {
3032 last = (u64)-1;
3033 last_for_get_extent = isize;
3034 }
3035
2956 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3036 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2957 &cached_state, GFP_NOFS); 3037 &cached_state, GFP_NOFS);
2958 em = get_extent(inode, NULL, 0, off, max - off, 0); 3038
3039 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3040 get_extent);
2959 if (!em) 3041 if (!em)
2960 goto out; 3042 goto out;
2961 if (IS_ERR(em)) { 3043 if (IS_ERR(em)) {
@@ -2964,22 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2964 } 3046 }
2965 3047
2966 while (!end) { 3048 while (!end) {
2967 hole = 0; 3049 u64 offset_in_extent;
2968 off = em->start + em->len;
2969 if (off >= max)
2970 end = 1;
2971 3050
2972 if (em->block_start == EXTENT_MAP_HOLE) { 3051 /* break if the extent we found is outside the range */
2973 hole = 1; 3052 if (em->start >= max || extent_map_end(em) < off)
2974 goto next; 3053 break;
2975 }
2976 3054
2977 em_start = em->start; 3055 /*
2978 em_len = em->len; 3056 * get_extent may return an extent that starts before our
3057 * requested range. We have to make sure the ranges
3058 * we return to fiemap always move forward and don't
3059 * overlap, so adjust the offsets here
3060 */
3061 em_start = max(em->start, off);
2979 3062
3063 /*
3064 * record the offset from the start of the extent
3065 * for adjusting the disk offset below
3066 */
3067 offset_in_extent = em_start - em->start;
3068 em_end = extent_map_end(em);
3069 em_len = em_end - em_start;
3070 emflags = em->flags;
2980 disko = 0; 3071 disko = 0;
2981 flags = 0; 3072 flags = 0;
2982 3073
3074 /*
3075 * bump off for our next call to get_extent
3076 */
3077 off = extent_map_end(em);
3078 if (off >= max)
3079 end = 1;
3080
2983 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 3081 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
2984 end = 1; 3082 end = 1;
2985 flags |= FIEMAP_EXTENT_LAST; 3083 flags |= FIEMAP_EXTENT_LAST;
@@ -2990,42 +3088,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2990 flags |= (FIEMAP_EXTENT_DELALLOC | 3088 flags |= (FIEMAP_EXTENT_DELALLOC |
2991 FIEMAP_EXTENT_UNKNOWN); 3089 FIEMAP_EXTENT_UNKNOWN);
2992 } else { 3090 } else {
2993 disko = em->block_start; 3091 disko = em->block_start + offset_in_extent;
2994 } 3092 }
2995 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3093 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
2996 flags |= FIEMAP_EXTENT_ENCODED; 3094 flags |= FIEMAP_EXTENT_ENCODED;
2997 3095
2998next:
2999 emflags = em->flags;
3000 free_extent_map(em); 3096 free_extent_map(em);
3001 em = NULL; 3097 em = NULL;
3002 if (!end) { 3098 if ((em_start >= last) || em_len == (u64)-1 ||
3003 em = get_extent(inode, NULL, 0, off, max - off, 0); 3099 (last == (u64)-1 && isize <= em_end)) {
3004 if (!em)
3005 goto out;
3006 if (IS_ERR(em)) {
3007 ret = PTR_ERR(em);
3008 goto out;
3009 }
3010 emflags = em->flags;
3011 }
3012
3013 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
3014 flags |= FIEMAP_EXTENT_LAST; 3100 flags |= FIEMAP_EXTENT_LAST;
3015 end = 1; 3101 end = 1;
3016 } 3102 }
3017 3103
3018 if (em_start == last) { 3104 /* now scan forward to see if this is really the last extent. */
3105 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3106 get_extent);
3107 if (IS_ERR(em)) {
3108 ret = PTR_ERR(em);
3109 goto out;
3110 }
3111 if (!em) {
3019 flags |= FIEMAP_EXTENT_LAST; 3112 flags |= FIEMAP_EXTENT_LAST;
3020 end = 1; 3113 end = 1;
3021 } 3114 }
3022 3115 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3023 if (!hole) { 3116 em_len, flags);
3024 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3117 if (ret)
3025 em_len, flags); 3118 goto out_free;
3026 if (ret)
3027 goto out_free;
3028 }
3029 } 3119 }
3030out_free: 3120out_free:
3031 free_extent_map(em); 3121 free_extent_map(em);
@@ -3194,7 +3284,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3194 } 3284 }
3195 if (!PageUptodate(p)) 3285 if (!PageUptodate(p))
3196 uptodate = 0; 3286 uptodate = 0;
3197 unlock_page(p); 3287
3288 /*
3289 * see below about how we avoid a nasty race with release page
3290 * and why we unlock later
3291 */
3292 if (i != 0)
3293 unlock_page(p);
3198 } 3294 }
3199 if (uptodate) 3295 if (uptodate)
3200 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3296 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3218,9 +3314,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3218 atomic_inc(&eb->refs); 3314 atomic_inc(&eb->refs);
3219 spin_unlock(&tree->buffer_lock); 3315 spin_unlock(&tree->buffer_lock);
3220 radix_tree_preload_end(); 3316 radix_tree_preload_end();
3317
3318 /*
3319 * there is a race where release page may have
3320 * tried to find this extent buffer in the radix
3321 * but failed. It will tell the VM it is safe to
3322 * reclaim the, and it will clear the page private bit.
3323 * We must make sure to set the page private bit properly
3324 * after the extent buffer is in the radix tree so
3325 * it doesn't get lost
3326 */
3327 set_page_extent_mapped(eb->first_page);
3328 set_page_extent_head(eb->first_page, eb->len);
3329 if (!page0)
3330 unlock_page(eb->first_page);
3221 return eb; 3331 return eb;
3222 3332
3223free_eb: 3333free_eb:
3334 if (eb->first_page && !page0)
3335 unlock_page(eb->first_page);
3336
3224 if (!atomic_dec_and_test(&eb->refs)) 3337 if (!atomic_dec_and_test(&eb->refs))
3225 return exists; 3338 return exists;
3226 btrfs_release_extent_buffer(eb); 3339 btrfs_release_extent_buffer(eb);
@@ -3271,10 +3384,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
3271 continue; 3384 continue;
3272 3385
3273 lock_page(page); 3386 lock_page(page);
3387 WARN_ON(!PagePrivate(page));
3388
3389 set_page_extent_mapped(page);
3274 if (i == 0) 3390 if (i == 0)
3275 set_page_extent_head(page, eb->len); 3391 set_page_extent_head(page, eb->len);
3276 else
3277 set_page_private(page, EXTENT_PAGE_PRIVATE);
3278 3392
3279 clear_page_dirty_for_io(page); 3393 clear_page_dirty_for_io(page);
3280 spin_lock_irq(&page->mapping->tree_lock); 3394 spin_lock_irq(&page->mapping->tree_lock);
@@ -3464,6 +3578,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3464 3578
3465 for (i = start_i; i < num_pages; i++) { 3579 for (i = start_i; i < num_pages; i++) {
3466 page = extent_buffer_page(eb, i); 3580 page = extent_buffer_page(eb, i);
3581
3582 WARN_ON(!PagePrivate(page));
3583
3584 set_page_extent_mapped(page);
3585 if (i == 0)
3586 set_page_extent_head(page, eb->len);
3587
3467 if (inc_all_pages) 3588 if (inc_all_pages)
3468 page_cache_get(page); 3589 page_cache_get(page);
3469 if (!PageUptodate(page)) { 3590 if (!PageUptodate(page)) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
191 191
192u64 count_range_bits(struct extent_io_tree *tree, 192u64 count_range_bits(struct extent_io_tree *tree,
193 u64 *start, u64 search_end, 193 u64 *start, u64 search_end,
194 u64 max_bytes, unsigned long bits); 194 u64 max_bytes, unsigned long bits, int contig);
195 195
196void free_extent_state(struct extent_state *state); 196void free_extent_state(struct extent_state *state);
197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b0e1fce12530..2b6c12e983b3 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -51,8 +51,8 @@ struct extent_map *alloc_extent_map(gfp_t mask)
51{ 51{
52 struct extent_map *em; 52 struct extent_map *em;
53 em = kmem_cache_alloc(extent_map_cache, mask); 53 em = kmem_cache_alloc(extent_map_cache, mask);
54 if (!em || IS_ERR(em)) 54 if (!em)
55 return em; 55 return NULL;
56 em->in_tree = 0; 56 em->in_tree = 0;
57 em->flags = 0; 57 em->flags = 0;
58 em->compress_type = BTRFS_COMPRESS_NONE; 58 em->compress_type = BTRFS_COMPRESS_NONE;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c1d3a818731a..f447b783bb84 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
70 70
71 /* Flush processor's dcache for this page */ 71 /* Flush processor's dcache for this page */
72 flush_dcache_page(page); 72 flush_dcache_page(page);
73
74 /*
75 * if we get a partial write, we can end up with
76 * partially up to date pages. These add
77 * a lot of complexity, so make sure they don't
78 * happen by forcing this copy to be retried.
79 *
80 * The rest of the btrfs_file_write code will fall
81 * back to page at a time copies after we return 0.
82 */
83 if (!PageUptodate(page) && copied < count)
84 copied = 0;
85
73 iov_iter_advance(i, copied); 86 iov_iter_advance(i, copied);
74 write_bytes -= copied; 87 write_bytes -= copied;
75 total_copied += copied; 88 total_copied += copied;
@@ -186,6 +199,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
186 split = alloc_extent_map(GFP_NOFS); 199 split = alloc_extent_map(GFP_NOFS);
187 if (!split2) 200 if (!split2)
188 split2 = alloc_extent_map(GFP_NOFS); 201 split2 = alloc_extent_map(GFP_NOFS);
202 BUG_ON(!split || !split2);
189 203
190 write_lock(&em_tree->lock); 204 write_lock(&em_tree->lock);
191 em = lookup_extent_mapping(em_tree, start, len); 205 em = lookup_extent_mapping(em_tree, start, len);
@@ -762,6 +776,27 @@ out:
762} 776}
763 777
764/* 778/*
779 * on error we return an unlocked page and the error value
780 * on success we return a locked page and 0
781 */
782static int prepare_uptodate_page(struct page *page, u64 pos)
783{
784 int ret = 0;
785
786 if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
787 ret = btrfs_readpage(NULL, page);
788 if (ret)
789 return ret;
790 lock_page(page);
791 if (!PageUptodate(page)) {
792 unlock_page(page);
793 return -EIO;
794 }
795 }
796 return 0;
797}
798
799/*
765 * this gets pages into the page cache and locks them down, it also properly 800 * this gets pages into the page cache and locks them down, it also properly
766 * waits for data=ordered extents to finish before allowing the pages to be 801 * waits for data=ordered extents to finish before allowing the pages to be
767 * modified. 802 * modified.
@@ -776,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
776 unsigned long index = pos >> PAGE_CACHE_SHIFT; 811 unsigned long index = pos >> PAGE_CACHE_SHIFT;
777 struct inode *inode = fdentry(file)->d_inode; 812 struct inode *inode = fdentry(file)->d_inode;
778 int err = 0; 813 int err = 0;
814 int faili = 0;
779 u64 start_pos; 815 u64 start_pos;
780 u64 last_pos; 816 u64 last_pos;
781 817
@@ -793,15 +829,24 @@ again:
793 for (i = 0; i < num_pages; i++) { 829 for (i = 0; i < num_pages; i++) {
794 pages[i] = grab_cache_page(inode->i_mapping, index + i); 830 pages[i] = grab_cache_page(inode->i_mapping, index + i);
795 if (!pages[i]) { 831 if (!pages[i]) {
796 int c; 832 faili = i - 1;
797 for (c = i - 1; c >= 0; c--) { 833 err = -ENOMEM;
798 unlock_page(pages[c]); 834 goto fail;
799 page_cache_release(pages[c]); 835 }
800 } 836
801 return -ENOMEM; 837 if (i == 0)
838 err = prepare_uptodate_page(pages[i], pos);
839 if (i == num_pages - 1)
840 err = prepare_uptodate_page(pages[i],
841 pos + write_bytes);
842 if (err) {
843 page_cache_release(pages[i]);
844 faili = i - 1;
845 goto fail;
802 } 846 }
803 wait_on_page_writeback(pages[i]); 847 wait_on_page_writeback(pages[i]);
804 } 848 }
849 err = 0;
805 if (start_pos < inode->i_size) { 850 if (start_pos < inode->i_size) {
806 struct btrfs_ordered_extent *ordered; 851 struct btrfs_ordered_extent *ordered;
807 lock_extent_bits(&BTRFS_I(inode)->io_tree, 852 lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -841,6 +886,14 @@ again:
841 WARN_ON(!PageLocked(pages[i])); 886 WARN_ON(!PageLocked(pages[i]));
842 } 887 }
843 return 0; 888 return 0;
889fail:
890 while (faili >= 0) {
891 unlock_page(pages[faili]);
892 page_cache_release(pages[faili]);
893 faili--;
894 }
895 return err;
896
844} 897}
845 898
846static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 899static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -850,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
850 struct file *file = iocb->ki_filp; 903 struct file *file = iocb->ki_filp;
851 struct inode *inode = fdentry(file)->d_inode; 904 struct inode *inode = fdentry(file)->d_inode;
852 struct btrfs_root *root = BTRFS_I(inode)->root; 905 struct btrfs_root *root = BTRFS_I(inode)->root;
853 struct page *pinned[2];
854 struct page **pages = NULL; 906 struct page **pages = NULL;
855 struct iov_iter i; 907 struct iov_iter i;
856 loff_t *ppos = &iocb->ki_pos; 908 loff_t *ppos = &iocb->ki_pos;
@@ -871,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
871 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 923 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
872 (file->f_flags & O_DIRECT)); 924 (file->f_flags & O_DIRECT));
873 925
874 pinned[0] = NULL;
875 pinned[1] = NULL;
876
877 start_pos = pos; 926 start_pos = pos;
878 927
879 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 928 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -961,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
961 first_index = pos >> PAGE_CACHE_SHIFT; 1010 first_index = pos >> PAGE_CACHE_SHIFT;
962 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; 1011 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
963 1012
964 /*
965 * there are lots of better ways to do this, but this code
966 * makes sure the first and last page in the file range are
967 * up to date and ready for cow
968 */
969 if ((pos & (PAGE_CACHE_SIZE - 1))) {
970 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
971 if (!PageUptodate(pinned[0])) {
972 ret = btrfs_readpage(NULL, pinned[0]);
973 BUG_ON(ret);
974 wait_on_page_locked(pinned[0]);
975 } else {
976 unlock_page(pinned[0]);
977 }
978 }
979 if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
980 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
981 if (!PageUptodate(pinned[1])) {
982 ret = btrfs_readpage(NULL, pinned[1]);
983 BUG_ON(ret);
984 wait_on_page_locked(pinned[1]);
985 } else {
986 unlock_page(pinned[1]);
987 }
988 }
989
990 while (iov_iter_count(&i) > 0) { 1013 while (iov_iter_count(&i) > 0) {
991 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1014 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
992 size_t write_bytes = min(iov_iter_count(&i), 1015 size_t write_bytes = min(iov_iter_count(&i),
@@ -1023,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1023 1046
1024 copied = btrfs_copy_from_user(pos, num_pages, 1047 copied = btrfs_copy_from_user(pos, num_pages,
1025 write_bytes, pages, &i); 1048 write_bytes, pages, &i);
1026 dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> 1049
1027 PAGE_CACHE_SHIFT; 1050 /*
1051 * if we have trouble faulting in the pages, fall
1052 * back to one page at a time
1053 */
1054 if (copied < write_bytes)
1055 nrptrs = 1;
1056
1057 if (copied == 0)
1058 dirty_pages = 0;
1059 else
1060 dirty_pages = (copied + offset +
1061 PAGE_CACHE_SIZE - 1) >>
1062 PAGE_CACHE_SHIFT;
1028 1063
1029 if (num_pages > dirty_pages) { 1064 if (num_pages > dirty_pages) {
1030 if (copied > 0) 1065 if (copied > 0)
@@ -1068,10 +1103,6 @@ out:
1068 err = ret; 1103 err = ret;
1069 1104
1070 kfree(pages); 1105 kfree(pages);
1071 if (pinned[0])
1072 page_cache_release(pinned[0]);
1073 if (pinned[1])
1074 page_cache_release(pinned[1]);
1075 *ppos = pos; 1106 *ppos = pos;
1076 1107
1077 /* 1108 /*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bcc461a9695f..9007bbd01dbf 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -644,6 +644,7 @@ retry:
644 async_extent->ram_size - 1, 0); 644 async_extent->ram_size - 1, 0);
645 645
646 em = alloc_extent_map(GFP_NOFS); 646 em = alloc_extent_map(GFP_NOFS);
647 BUG_ON(!em);
647 em->start = async_extent->start; 648 em->start = async_extent->start;
648 em->len = async_extent->ram_size; 649 em->len = async_extent->ram_size;
649 em->orig_start = em->start; 650 em->orig_start = em->start;
@@ -820,6 +821,7 @@ static noinline int cow_file_range(struct inode *inode,
820 BUG_ON(ret); 821 BUG_ON(ret);
821 822
822 em = alloc_extent_map(GFP_NOFS); 823 em = alloc_extent_map(GFP_NOFS);
824 BUG_ON(!em);
823 em->start = start; 825 em->start = start;
824 em->orig_start = em->start; 826 em->orig_start = em->start;
825 ram_size = ins.offset; 827 ram_size = ins.offset;
@@ -1169,6 +1171,7 @@ out_check:
1169 struct extent_map_tree *em_tree; 1171 struct extent_map_tree *em_tree;
1170 em_tree = &BTRFS_I(inode)->extent_tree; 1172 em_tree = &BTRFS_I(inode)->extent_tree;
1171 em = alloc_extent_map(GFP_NOFS); 1173 em = alloc_extent_map(GFP_NOFS);
1174 BUG_ON(!em);
1172 em->start = cur_offset; 1175 em->start = cur_offset;
1173 em->orig_start = em->start; 1176 em->orig_start = em->start;
1174 em->len = num_bytes; 1177 em->len = num_bytes;
@@ -1910,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
1910 1913
1911 private = 0; 1914 private = 0;
1912 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, 1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1913 (u64)-1, 1, EXTENT_DIRTY)) { 1916 (u64)-1, 1, EXTENT_DIRTY, 0)) {
1914 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, 1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
1915 start, &private_failure); 1918 start, &private_failure);
1916 if (ret == 0) { 1919 if (ret == 0) {
@@ -4818,10 +4821,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4818 goto fail; 4821 goto fail;
4819 4822
4820 /* 4823 /*
4821 * 1 item for inode ref 4824 * 2 items for inode and inode ref
4822 * 2 items for dir items 4825 * 2 items for dir items
4826 * 1 item for parent inode
4823 */ 4827 */
4824 trans = btrfs_start_transaction(root, 3); 4828 trans = btrfs_start_transaction(root, 5);
4825 if (IS_ERR(trans)) { 4829 if (IS_ERR(trans)) {
4826 err = PTR_ERR(trans); 4830 err = PTR_ERR(trans);
4827 goto fail; 4831 goto fail;
@@ -5277,6 +5281,128 @@ out:
5277 return em; 5281 return em;
5278} 5282}
5279 5283
5284struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
5285 size_t pg_offset, u64 start, u64 len,
5286 int create)
5287{
5288 struct extent_map *em;
5289 struct extent_map *hole_em = NULL;
5290 u64 range_start = start;
5291 u64 end;
5292 u64 found;
5293 u64 found_end;
5294 int err = 0;
5295
5296 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
5297 if (IS_ERR(em))
5298 return em;
5299 if (em) {
5300 /*
5301 * if our em maps to a hole, there might
5302 * actually be delalloc bytes behind it
5303 */
5304 if (em->block_start != EXTENT_MAP_HOLE)
5305 return em;
5306 else
5307 hole_em = em;
5308 }
5309
5310 /* check to see if we've wrapped (len == -1 or similar) */
5311 end = start + len;
5312 if (end < start)
5313 end = (u64)-1;
5314 else
5315 end -= 1;
5316
5317 em = NULL;
5318
5319 /* ok, we didn't find anything, lets look for delalloc */
5320 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
5321 end, len, EXTENT_DELALLOC, 1);
5322 found_end = range_start + found;
5323 if (found_end < range_start)
5324 found_end = (u64)-1;
5325
5326 /*
5327 * we didn't find anything useful, return
5328 * the original results from get_extent()
5329 */
5330 if (range_start > end || found_end <= start) {
5331 em = hole_em;
5332 hole_em = NULL;
5333 goto out;
5334 }
5335
5336 /* adjust the range_start to make sure it doesn't
5337 * go backwards from the start they passed in
5338 */
5339 range_start = max(start,range_start);
5340 found = found_end - range_start;
5341
5342 if (found > 0) {
5343 u64 hole_start = start;
5344 u64 hole_len = len;
5345
5346 em = alloc_extent_map(GFP_NOFS);
5347 if (!em) {
5348 err = -ENOMEM;
5349 goto out;
5350 }
5351 /*
5352 * when btrfs_get_extent can't find anything it
5353 * returns one huge hole
5354 *
5355 * make sure what it found really fits our range, and
5356 * adjust to make sure it is based on the start from
5357 * the caller
5358 */
5359 if (hole_em) {
5360 u64 calc_end = extent_map_end(hole_em);
5361
5362 if (calc_end <= start || (hole_em->start > end)) {
5363 free_extent_map(hole_em);
5364 hole_em = NULL;
5365 } else {
5366 hole_start = max(hole_em->start, start);
5367 hole_len = calc_end - hole_start;
5368 }
5369 }
5370 em->bdev = NULL;
5371 if (hole_em && range_start > hole_start) {
5372 /* our hole starts before our delalloc, so we
5373 * have to return just the parts of the hole
5374 * that go until the delalloc starts
5375 */
5376 em->len = min(hole_len,
5377 range_start - hole_start);
5378 em->start = hole_start;
5379 em->orig_start = hole_start;
5380 /*
5381 * don't adjust block start at all,
5382 * it is fixed at EXTENT_MAP_HOLE
5383 */
5384 em->block_start = hole_em->block_start;
5385 em->block_len = hole_len;
5386 } else {
5387 em->start = range_start;
5388 em->len = found;
5389 em->orig_start = range_start;
5390 em->block_start = EXTENT_MAP_DELALLOC;
5391 em->block_len = found;
5392 }
5393 } else if (hole_em) {
5394 return hole_em;
5395 }
5396out:
5397
5398 free_extent_map(hole_em);
5399 if (err) {
5400 free_extent_map(em);
5401 return ERR_PTR(err);
5402 }
5403 return em;
5404}
5405
5280static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5406static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5281 u64 start, u64 len) 5407 u64 start, u64 len)
5282{ 5408{
@@ -5931,6 +6057,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5931 if (!skip_sum) { 6057 if (!skip_sum) {
5932 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); 6058 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
5933 if (!dip->csums) { 6059 if (!dip->csums) {
6060 kfree(dip);
5934 ret = -ENOMEM; 6061 ret = -ENOMEM;
5935 goto free_ordered; 6062 goto free_ordered;
5936 } 6063 }
@@ -6099,7 +6226,7 @@ out:
6099static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6226static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
6100 __u64 start, __u64 len) 6227 __u64 start, __u64 len)
6101{ 6228{
6102 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); 6229 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
6103} 6230}
6104 6231
6105int btrfs_readpage(struct file *file, struct page *page) 6232int btrfs_readpage(struct file *file, struct page *page)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 02d224e8c83f..5fdb2abc4fa7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1071 if (copy_from_user(&flags, arg, sizeof(flags))) 1071 if (copy_from_user(&flags, arg, sizeof(flags)))
1072 return -EFAULT; 1072 return -EFAULT;
1073 1073
1074 if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) 1074 if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
1075 return -EINVAL; 1075 return -EINVAL;
1076 1076
1077 if (flags & ~BTRFS_SUBVOL_RDONLY) 1077 if (flags & ~BTRFS_SUBVOL_RDONLY)
1078 return -EOPNOTSUPP; 1078 return -EOPNOTSUPP;
1079 1079
1080 if (!is_owner_or_cap(inode))
1081 return -EACCES;
1082
1080 down_write(&root->fs_info->subvol_sem); 1083 down_write(&root->fs_info->subvol_sem);
1081 1084
1082 /* nothing to do */ 1085 /* nothing to do */
@@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1097 goto out_reset; 1100 goto out_reset;
1098 } 1101 }
1099 1102
1100 ret = btrfs_update_root(trans, root, 1103 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1101 &root->root_key, &root->root_item); 1104 &root->root_key, &root->root_item);
1102 1105
1103 btrfs_commit_transaction(trans, root); 1106 btrfs_commit_transaction(trans, root);
@@ -2208,7 +2211,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2208 int num_types = 4; 2211 int num_types = 4;
2209 int alloc_size; 2212 int alloc_size;
2210 int ret = 0; 2213 int ret = 0;
2211 int slot_count = 0; 2214 u64 slot_count = 0;
2212 int i, c; 2215 int i, c;
2213 2216
2214 if (copy_from_user(&space_args, 2217 if (copy_from_user(&space_args,
@@ -2247,7 +2250,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2247 goto out; 2250 goto out;
2248 } 2251 }
2249 2252
2250 slot_count = min_t(int, space_args.space_slots, slot_count); 2253 slot_count = min_t(u64, space_args.space_slots, slot_count);
2251 2254
2252 alloc_size = sizeof(*dest) * slot_count; 2255 alloc_size = sizeof(*dest) * slot_count;
2253 2256
@@ -2267,6 +2270,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2267 for (i = 0; i < num_types; i++) { 2270 for (i = 0; i < num_types; i++) {
2268 struct btrfs_space_info *tmp; 2271 struct btrfs_space_info *tmp;
2269 2272
2273 if (!slot_count)
2274 break;
2275
2270 info = NULL; 2276 info = NULL;
2271 rcu_read_lock(); 2277 rcu_read_lock();
2272 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 2278 list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
@@ -2288,7 +2294,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2288 memcpy(dest, &space, sizeof(space)); 2294 memcpy(dest, &space, sizeof(space));
2289 dest++; 2295 dest++;
2290 space_args.total_spaces++; 2296 space_args.total_spaces++;
2297 slot_count--;
2291 } 2298 }
2299 if (!slot_count)
2300 break;
2292 } 2301 }
2293 up_read(&info->groups_sem); 2302 up_read(&info->groups_sem);
2294 } 2303 }
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
280 unsigned long tot_out; 280 unsigned long tot_out;
281 unsigned long tot_len; 281 unsigned long tot_len;
282 char *buf; 282 char *buf;
283 bool may_late_unmap, need_unmap;
283 284
284 data_in = kmap(pages_in[0]); 285 data_in = kmap(pages_in[0]);
285 tot_len = read_compress_length(data_in); 286 tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
300 301
301 tot_in += in_len; 302 tot_in += in_len;
302 working_bytes = in_len; 303 working_bytes = in_len;
304 may_late_unmap = need_unmap = false;
303 305
304 /* fast path: avoid using the working buffer */ 306 /* fast path: avoid using the working buffer */
305 if (in_page_bytes_left >= in_len) { 307 if (in_page_bytes_left >= in_len) {
306 buf = data_in + in_offset; 308 buf = data_in + in_offset;
307 bytes = in_len; 309 bytes = in_len;
310 may_late_unmap = true;
308 goto cont; 311 goto cont;
309 } 312 }
310 313
@@ -329,14 +332,17 @@ cont:
329 if (working_bytes == 0 && tot_in >= tot_len) 332 if (working_bytes == 0 && tot_in >= tot_len)
330 break; 333 break;
331 334
332 kunmap(pages_in[page_in_index]); 335 if (page_in_index + 1 >= total_pages_in) {
333 page_in_index++;
334 if (page_in_index >= total_pages_in) {
335 ret = -1; 336 ret = -1;
336 data_in = NULL;
337 goto done; 337 goto done;
338 } 338 }
339 data_in = kmap(pages_in[page_in_index]); 339
340 if (may_late_unmap)
341 need_unmap = true;
342 else
343 kunmap(pages_in[page_in_index]);
344
345 data_in = kmap(pages_in[++page_in_index]);
340 346
341 in_page_bytes_left = PAGE_CACHE_SIZE; 347 in_page_bytes_left = PAGE_CACHE_SIZE;
342 in_offset = 0; 348 in_offset = 0;
@@ -346,6 +352,8 @@ cont:
346 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); 352 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
347 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, 353 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
348 &out_len); 354 &out_len);
355 if (need_unmap)
356 kunmap(pages_in[page_in_index - 1]);
349 if (ret != LZO_E_OK) { 357 if (ret != LZO_E_OK) {
350 printk(KERN_WARNING "btrfs decompress failed\n"); 358 printk(KERN_WARNING "btrfs decompress failed\n");
351 ret = -1; 359 ret = -1;
@@ -363,8 +371,7 @@ cont:
363 break; 371 break;
364 } 372 }
365done: 373done:
366 if (data_in) 374 kunmap(pages_in[page_in_index]);
367 kunmap(pages_in[page_in_index]);
368 return ret; 375 return ret;
369} 376}
370 377
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 1f5556acb530..31ade5802ae8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
1157 new_node->bytenr = dest->node->start; 1157 new_node->bytenr = dest->node->start;
1158 new_node->level = node->level; 1158 new_node->level = node->level;
1159 new_node->lowest = node->lowest; 1159 new_node->lowest = node->lowest;
1160 new_node->checked = 1;
1160 new_node->root = dest; 1161 new_node->root = dest;
1161 1162
1162 if (!node->lowest) { 1163 if (!node->lowest) {
@@ -3653,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3653 u32 item_size; 3654 u32 item_size;
3654 int ret; 3655 int ret;
3655 int err = 0; 3656 int err = 0;
3657 int progress = 0;
3656 3658
3657 path = btrfs_alloc_path(); 3659 path = btrfs_alloc_path();
3658 if (!path) 3660 if (!path)
@@ -3665,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3665 } 3667 }
3666 3668
3667 while (1) { 3669 while (1) {
3670 progress++;
3668 trans = btrfs_start_transaction(rc->extent_root, 0); 3671 trans = btrfs_start_transaction(rc->extent_root, 0);
3669 BUG_ON(IS_ERR(trans)); 3672 BUG_ON(IS_ERR(trans));
3670 3673restart:
3671 if (update_backref_cache(trans, &rc->backref_cache)) { 3674 if (update_backref_cache(trans, &rc->backref_cache)) {
3672 btrfs_end_transaction(trans, rc->extent_root); 3675 btrfs_end_transaction(trans, rc->extent_root);
3673 continue; 3676 continue;
@@ -3780,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3780 } 3783 }
3781 } 3784 }
3782 } 3785 }
3786 if (trans && progress && err == -ENOSPC) {
3787 ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
3788 rc->block_group->flags);
3789 if (ret == 0) {
3790 err = 0;
3791 progress = 0;
3792 goto restart;
3793 }
3794 }
3783 3795
3784 btrfs_release_path(rc->extent_root, path); 3796 btrfs_release_path(rc->extent_root, path);
3785 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3797 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a004008f7d28..d39a9895d932 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, 158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
159 Opt_enospc_debug, Opt_err,
159}; 160};
160 161
161static match_table_t tokens = { 162static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
184 {Opt_space_cache, "space_cache"}, 185 {Opt_space_cache, "space_cache"},
185 {Opt_clear_cache, "clear_cache"}, 186 {Opt_clear_cache, "clear_cache"},
186 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 187 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
188 {Opt_enospc_debug, "enospc_debug"},
187 {Opt_err, NULL}, 189 {Opt_err, NULL},
188}; 190};
189 191
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
358 case Opt_user_subvol_rm_allowed: 360 case Opt_user_subvol_rm_allowed:
359 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 361 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
360 break; 362 break;
363 case Opt_enospc_debug:
364 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
365 break;
361 case Opt_err: 366 case Opt_err:
362 printk(KERN_INFO "btrfs: unrecognized mount option " 367 printk(KERN_INFO "btrfs: unrecognized mount option "
363 "'%s'\n", p); 368 "'%s'\n", p);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2636a051e4b2..dd13eb81ee40 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1338,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1338 1338
1339 ret = btrfs_shrink_device(device, 0); 1339 ret = btrfs_shrink_device(device, 0);
1340 if (ret) 1340 if (ret)
1341 goto error_brelse; 1341 goto error_undo;
1342 1342
1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); 1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1344 if (ret) 1344 if (ret)
1345 goto error_brelse; 1345 goto error_undo;
1346 1346
1347 device->in_fs_metadata = 0; 1347 device->in_fs_metadata = 0;
1348 1348
@@ -1416,6 +1416,13 @@ out:
1416 mutex_unlock(&root->fs_info->volume_mutex); 1416 mutex_unlock(&root->fs_info->volume_mutex);
1417 mutex_unlock(&uuid_mutex); 1417 mutex_unlock(&uuid_mutex);
1418 return ret; 1418 return ret;
1419error_undo:
1420 if (device->writeable) {
1421 list_add(&device->dev_alloc_list,
1422 &root->fs_info->fs_devices->alloc_list);
1423 root->fs_info->fs_devices->rw_devices++;
1424 }
1425 goto error_brelse;
1419} 1426}
1420 1427
1421/* 1428/*
@@ -1605,12 +1612,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1605 1612
1606 ret = find_next_devid(root, &device->devid); 1613 ret = find_next_devid(root, &device->devid);
1607 if (ret) { 1614 if (ret) {
1615 kfree(device->name);
1608 kfree(device); 1616 kfree(device);
1609 goto error; 1617 goto error;
1610 } 1618 }
1611 1619
1612 trans = btrfs_start_transaction(root, 0); 1620 trans = btrfs_start_transaction(root, 0);
1613 if (IS_ERR(trans)) { 1621 if (IS_ERR(trans)) {
1622 kfree(device->name);
1614 kfree(device); 1623 kfree(device);
1615 ret = PTR_ERR(trans); 1624 ret = PTR_ERR(trans);
1616 goto error; 1625 goto error;
@@ -1631,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1631 device->dev_root = root->fs_info->dev_root; 1640 device->dev_root = root->fs_info->dev_root;
1632 device->bdev = bdev; 1641 device->bdev = bdev;
1633 device->in_fs_metadata = 1; 1642 device->in_fs_metadata = 1;
1634 device->mode = 0; 1643 device->mode = FMODE_EXCL;
1635 set_blocksize(device->bdev, 4096); 1644 set_blocksize(device->bdev, 4096);
1636 1645
1637 if (seeding_dev) { 1646 if (seeding_dev) {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0bc68de8edd7..ebafa65a29b6 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -409,7 +409,7 @@ more:
409 spin_lock(&inode->i_lock); 409 spin_lock(&inode->i_lock);
410 if (ci->i_release_count == fi->dir_release_count) { 410 if (ci->i_release_count == fi->dir_release_count) {
411 dout(" marking %p complete\n", inode); 411 dout(" marking %p complete\n", inode);
412 ci->i_ceph_flags |= CEPH_I_COMPLETE; 412 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
413 ci->i_max_offset = filp->f_pos; 413 ci->i_max_offset = filp->f_pos;
414 } 414 }
415 spin_unlock(&inode->i_lock); 415 spin_unlock(&inode->i_lock);
@@ -496,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
496 496
497 /* .snap dir? */ 497 /* .snap dir? */
498 if (err == -ENOENT && 498 if (err == -ENOENT &&
499 ceph_snap(parent) == CEPH_NOSNAP &&
499 strcmp(dentry->d_name.name, 500 strcmp(dentry->d_name.name,
500 fsc->mount_options->snapdir_name) == 0) { 501 fsc->mount_options->snapdir_name) == 0) {
501 struct inode *inode = ceph_get_snapdir(parent); 502 struct inode *inode = ceph_get_snapdir(parent);
@@ -992,7 +993,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
992{ 993{
993 struct inode *dir; 994 struct inode *dir;
994 995
995 if (nd->flags & LOOKUP_RCU) 996 if (nd && nd->flags & LOOKUP_RCU)
996 return -ECHILD; 997 return -ECHILD;
997 998
998 dir = dentry->d_parent->d_inode; 999 dir = dentry->d_parent->d_inode;
@@ -1029,28 +1030,8 @@ out_touch:
1029static void ceph_dentry_release(struct dentry *dentry) 1030static void ceph_dentry_release(struct dentry *dentry)
1030{ 1031{
1031 struct ceph_dentry_info *di = ceph_dentry(dentry); 1032 struct ceph_dentry_info *di = ceph_dentry(dentry);
1032 struct inode *parent_inode = NULL;
1033 u64 snapid = CEPH_NOSNAP;
1034 1033
1035 if (!IS_ROOT(dentry)) { 1034 dout("dentry_release %p\n", dentry);
1036 parent_inode = dentry->d_parent->d_inode;
1037 if (parent_inode)
1038 snapid = ceph_snap(parent_inode);
1039 }
1040 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1041 if (parent_inode && snapid != CEPH_SNAPDIR) {
1042 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1043
1044 spin_lock(&parent_inode->i_lock);
1045 if (ci->i_shared_gen == di->lease_shared_gen ||
1046 snapid <= CEPH_MAXSNAP) {
1047 dout(" clearing %p complete (d_release)\n",
1048 parent_inode);
1049 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
1050 ci->i_release_count++;
1051 }
1052 spin_unlock(&parent_inode->i_lock);
1053 }
1054 if (di) { 1035 if (di) {
1055 ceph_dentry_lru_del(dentry); 1036 ceph_dentry_lru_del(dentry);
1056 if (di->lease_session) 1037 if (di->lease_session)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5625463aa479..193bfa5e9cbd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -707,7 +707,7 @@ static int fill_inode(struct inode *inode,
707 (issued & CEPH_CAP_FILE_EXCL) == 0 && 707 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
709 dout(" marking %p complete (empty)\n", inode); 709 dout(" marking %p complete (empty)\n", inode);
710 ci->i_ceph_flags |= CEPH_I_COMPLETE; 710 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
711 ci->i_max_offset = 2; 711 ci->i_max_offset = 2;
712 } 712 }
713 break; 713 break;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 39c243acd062..f40b9139e437 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -584,10 +584,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
584 if (lastinode) 584 if (lastinode)
585 iput(lastinode); 585 iput(lastinode);
586 586
587 dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino); 587 list_for_each_entry(child, &realm->children, child_item) {
588 list_for_each_entry(child, &realm->children, child_item) 588 dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
589 queue_realm_cap_snaps(child); 589 realm, realm->ino, child, child->ino);
590 list_del_init(&child->dirty_item);
591 list_add(&child->dirty_item, &realm->dirty_item);
592 }
590 593
594 list_del_init(&realm->dirty_item);
591 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); 595 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
592} 596}
593 597
@@ -683,7 +687,9 @@ more:
683 * queue cap snaps _after_ we've built the new snap contexts, 687 * queue cap snaps _after_ we've built the new snap contexts,
684 * so that i_head_snapc can be set appropriately. 688 * so that i_head_snapc can be set appropriately.
685 */ 689 */
686 list_for_each_entry(realm, &dirty_realms, dirty_item) { 690 while (!list_empty(&dirty_realms)) {
691 realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
692 dirty_item);
687 queue_realm_cap_snaps(realm); 693 queue_realm_cap_snaps(realm);
688 } 694 }
689 695
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 4a3330235d55..a9371b6578c0 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -127,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
127extern const struct export_operations cifs_export_ops; 127extern const struct export_operations cifs_export_ops;
128#endif /* EXPERIMENTAL */ 128#endif /* EXPERIMENTAL */
129 129
130#define CIFS_VERSION "1.70" 130#define CIFS_VERSION "1.71"
131#endif /* _CIFSFS_H */ 131#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index edd5b29b53c9..17afb0fbcaed 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -188,6 +188,8 @@ struct TCP_Server_Info {
188 /* multiplexed reads or writes */ 188 /* multiplexed reads or writes */
189 unsigned int maxBuf; /* maxBuf specifies the maximum */ 189 unsigned int maxBuf; /* maxBuf specifies the maximum */
190 /* message size the server can send or receive for non-raw SMBs */ 190 /* message size the server can send or receive for non-raw SMBs */
191 /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */
192 /* when socket is setup (and during reconnect) before NegProt sent */
191 unsigned int max_rw; /* maxRw specifies the maximum */ 193 unsigned int max_rw; /* maxRw specifies the maximum */
192 /* message size the server can send or receive for */ 194 /* message size the server can send or receive for */
193 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ 195 /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
@@ -652,7 +654,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
652#define MID_REQUEST_SUBMITTED 2 654#define MID_REQUEST_SUBMITTED 2
653#define MID_RESPONSE_RECEIVED 4 655#define MID_RESPONSE_RECEIVED 4
654#define MID_RETRY_NEEDED 8 /* session closed while this request out */ 656#define MID_RETRY_NEEDED 8 /* session closed while this request out */
655#define MID_NO_RESP_NEEDED 0x10 657#define MID_RESPONSE_MALFORMED 0x10
656 658
657/* Types of response buffer returned from SendReceive2 */ 659/* Types of response buffer returned from SendReceive2 */
658#define CIFS_NO_BUFFER 0 /* Response buffer not returned */ 660#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 257b6d895e20..8d6c17ab593d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -338,10 +338,11 @@ cifs_echo_request(struct work_struct *work)
338 struct TCP_Server_Info, echo.work); 338 struct TCP_Server_Info, echo.work);
339 339
340 /* 340 /*
341 * We cannot send an echo until the NEGOTIATE_PROTOCOL request is done. 341 * We cannot send an echo until the NEGOTIATE_PROTOCOL request is
342 * Also, no need to ping if we got a response recently 342 * done, which is indicated by maxBuf != 0. Also, no need to ping if
343 * we got a response recently
343 */ 344 */
344 if (server->tcpStatus != CifsGood || 345 if (server->maxBuf == 0 ||
345 time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) 346 time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
346 goto requeue_echo; 347 goto requeue_echo;
347 348
@@ -585,11 +586,20 @@ incomplete_rcv:
585 total_read += 4; /* account for rfc1002 hdr */ 586 total_read += 4; /* account for rfc1002 hdr */
586 587
587 dump_smb(smb_buffer, total_read); 588 dump_smb(smb_buffer, total_read);
588 if (checkSMB(smb_buffer, smb_buffer->Mid, total_read)) { 589
590 /*
591 * We know that we received enough to get to the MID as we
592 * checked the pdu_length earlier. Now check to see
593 * if the rest of the header is OK. We borrow the length
594 * var for the rest of the loop to avoid a new stack var.
595 *
596 * 48 bytes is enough to display the header and a little bit
597 * into the payload for debugging purposes.
598 */
599 length = checkSMB(smb_buffer, smb_buffer->Mid, total_read);
600 if (length != 0)
589 cifs_dump_mem("Bad SMB: ", smb_buffer, 601 cifs_dump_mem("Bad SMB: ", smb_buffer,
590 total_read < 48 ? total_read : 48); 602 min_t(unsigned int, total_read, 48));
591 continue;
592 }
593 603
594 mid_entry = NULL; 604 mid_entry = NULL;
595 server->lstrp = jiffies; 605 server->lstrp = jiffies;
@@ -601,7 +611,8 @@ incomplete_rcv:
601 if ((mid_entry->mid == smb_buffer->Mid) && 611 if ((mid_entry->mid == smb_buffer->Mid) &&
602 (mid_entry->midState == MID_REQUEST_SUBMITTED) && 612 (mid_entry->midState == MID_REQUEST_SUBMITTED) &&
603 (mid_entry->command == smb_buffer->Command)) { 613 (mid_entry->command == smb_buffer->Command)) {
604 if (check2ndT2(smb_buffer,server->maxBuf) > 0) { 614 if (length == 0 &&
615 check2ndT2(smb_buffer, server->maxBuf) > 0) {
605 /* We have a multipart transact2 resp */ 616 /* We have a multipart transact2 resp */
606 isMultiRsp = true; 617 isMultiRsp = true;
607 if (mid_entry->resp_buf) { 618 if (mid_entry->resp_buf) {
@@ -636,7 +647,12 @@ incomplete_rcv:
636 mid_entry->resp_buf = smb_buffer; 647 mid_entry->resp_buf = smb_buffer;
637 mid_entry->largeBuf = isLargeBuf; 648 mid_entry->largeBuf = isLargeBuf;
638multi_t2_fnd: 649multi_t2_fnd:
639 mid_entry->midState = MID_RESPONSE_RECEIVED; 650 if (length == 0)
651 mid_entry->midState =
652 MID_RESPONSE_RECEIVED;
653 else
654 mid_entry->midState =
655 MID_RESPONSE_MALFORMED;
640#ifdef CONFIG_CIFS_STATS2 656#ifdef CONFIG_CIFS_STATS2
641 mid_entry->when_received = jiffies; 657 mid_entry->when_received = jiffies;
642#endif 658#endif
@@ -657,6 +673,9 @@ multi_t2_fnd:
657 else 673 else
658 smallbuf = NULL; 674 smallbuf = NULL;
659 } 675 }
676 } else if (length != 0) {
677 /* response sanity checks failed */
678 continue;
660 } else if (!is_valid_oplock_break(smb_buffer, server) && 679 } else if (!is_valid_oplock_break(smb_buffer, server) &&
661 !isMultiRsp) { 680 !isMultiRsp) {
662 cERROR(1, "No task to wake, unknown frame received! " 681 cERROR(1, "No task to wake, unknown frame received! "
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 8d9189f64477..79f641eeda30 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -170,7 +170,7 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
170{ 170{
171 int rc, alen, slen; 171 int rc, alen, slen;
172 const char *pct; 172 const char *pct;
173 char *endp, scope_id[13]; 173 char scope_id[13];
174 struct sockaddr_in *s4 = (struct sockaddr_in *) dst; 174 struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
175 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; 175 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
176 176
@@ -197,9 +197,9 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
197 memcpy(scope_id, pct + 1, slen); 197 memcpy(scope_id, pct + 1, slen);
198 scope_id[slen] = '\0'; 198 scope_id[slen] = '\0';
199 199
200 s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); 200 rc = strict_strtoul(scope_id, 0,
201 if (endp != scope_id + slen) 201 (unsigned long *)&s6->sin6_scope_id);
202 return 0; 202 rc = (rc == 0) ? 1 : 0;
203 } 203 }
204 204
205 return rc; 205 return rc;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 1adc9625a344..16765703131b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -656,13 +656,13 @@ ssetup_ntlmssp_authenticate:
656 656
657 if (type == LANMAN) { 657 if (type == LANMAN) {
658#ifdef CONFIG_CIFS_WEAK_PW_HASH 658#ifdef CONFIG_CIFS_WEAK_PW_HASH
659 char lnm_session_key[CIFS_SESS_KEY_SIZE]; 659 char lnm_session_key[CIFS_AUTH_RESP_SIZE];
660 660
661 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; 661 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
662 662
663 /* no capabilities flags in old lanman negotiation */ 663 /* no capabilities flags in old lanman negotiation */
664 664
665 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); 665 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
666 666
667 /* Calculate hash with password and copy into bcc_ptr. 667 /* Calculate hash with password and copy into bcc_ptr.
668 * Encryption Key (stored as in cryptkey) gets used if the 668 * Encryption Key (stored as in cryptkey) gets used if the
@@ -675,8 +675,8 @@ ssetup_ntlmssp_authenticate:
675 true : false, lnm_session_key); 675 true : false, lnm_session_key);
676 676
677 ses->flags |= CIFS_SES_LANMAN; 677 ses->flags |= CIFS_SES_LANMAN;
678 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE); 678 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
679 bcc_ptr += CIFS_SESS_KEY_SIZE; 679 bcc_ptr += CIFS_AUTH_RESP_SIZE;
680 680
681 /* can not sign if LANMAN negotiated so no need 681 /* can not sign if LANMAN negotiated so no need
682 to calculate signing key? but what if server 682 to calculate signing key? but what if server
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index fbc5aace54b1..46d8756f2b24 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -457,6 +457,9 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
457 case MID_RETRY_NEEDED: 457 case MID_RETRY_NEEDED:
458 rc = -EAGAIN; 458 rc = -EAGAIN;
459 break; 459 break;
460 case MID_RESPONSE_MALFORMED:
461 rc = -EIO;
462 break;
460 default: 463 default:
461 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__, 464 cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__,
462 mid->mid, mid->midState); 465 mid->mid, mid->midState);
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a00e6cc..691c3fd8ce1d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1228,7 +1228,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
1228 file = fget_light(fd, &fput_needed); 1228 file = fget_light(fd, &fput_needed);
1229 if (!file) 1229 if (!file)
1230 return -EBADF; 1230 return -EBADF;
1231 ret = compat_readv(file, vec, vlen, &pos); 1231 ret = -ESPIPE;
1232 if (file->f_mode & FMODE_PREAD)
1233 ret = compat_readv(file, vec, vlen, &pos);
1232 fput_light(file, fput_needed); 1234 fput_light(file, fput_needed);
1233 return ret; 1235 return ret;
1234} 1236}
@@ -1285,7 +1287,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
1285 file = fget_light(fd, &fput_needed); 1287 file = fget_light(fd, &fput_needed);
1286 if (!file) 1288 if (!file)
1287 return -EBADF; 1289 return -EBADF;
1288 ret = compat_writev(file, vec, vlen, &pos); 1290 ret = -ESPIPE;
1291 if (file->f_mode & FMODE_PWRITE)
1292 ret = compat_writev(file, vec, vlen, &pos);
1289 fput_light(file, fput_needed); 1293 fput_light(file, fput_needed);
1290 return ret; 1294 return ret;
1291} 1295}
diff --git a/fs/dcache.c b/fs/dcache.c
index 2a6bd9a4ae97..611ffe928c03 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1523,6 +1523,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1523} 1523}
1524EXPORT_SYMBOL(d_alloc_root); 1524EXPORT_SYMBOL(d_alloc_root);
1525 1525
1526static struct dentry * __d_find_any_alias(struct inode *inode)
1527{
1528 struct dentry *alias;
1529
1530 if (list_empty(&inode->i_dentry))
1531 return NULL;
1532 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1533 __dget(alias);
1534 return alias;
1535}
1536
1537static struct dentry * d_find_any_alias(struct inode *inode)
1538{
1539 struct dentry *de;
1540
1541 spin_lock(&inode->i_lock);
1542 de = __d_find_any_alias(inode);
1543 spin_unlock(&inode->i_lock);
1544 return de;
1545}
1546
1547
1526/** 1548/**
1527 * d_obtain_alias - find or allocate a dentry for a given inode 1549 * d_obtain_alias - find or allocate a dentry for a given inode
1528 * @inode: inode to allocate the dentry for 1550 * @inode: inode to allocate the dentry for
@@ -1552,7 +1574,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1552 if (IS_ERR(inode)) 1574 if (IS_ERR(inode))
1553 return ERR_CAST(inode); 1575 return ERR_CAST(inode);
1554 1576
1555 res = d_find_alias(inode); 1577 res = d_find_any_alias(inode);
1556 if (res) 1578 if (res)
1557 goto out_iput; 1579 goto out_iput;
1558 1580
@@ -1565,7 +1587,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1565 1587
1566 1588
1567 spin_lock(&inode->i_lock); 1589 spin_lock(&inode->i_lock);
1568 res = __d_find_alias(inode, 0); 1590 res = __d_find_any_alias(inode);
1569 if (res) { 1591 if (res) {
1570 spin_unlock(&inode->i_lock); 1592 spin_unlock(&inode->i_lock);
1571 dput(tmp); 1593 dput(tmp);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9c64ae9e4c1a..2d8c87b951c2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1468,15 +1468,13 @@ static void work_stop(void)
1468 1468
1469static int work_start(void) 1469static int work_start(void)
1470{ 1470{
1471 recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM | 1471 recv_workqueue = create_singlethread_workqueue("dlm_recv");
1472 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1473 if (!recv_workqueue) { 1472 if (!recv_workqueue) {
1474 log_print("can't start dlm_recv"); 1473 log_print("can't start dlm_recv");
1475 return -ENOMEM; 1474 return -ENOMEM;
1476 } 1475 }
1477 1476
1478 send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM | 1477 send_workqueue = create_singlethread_workqueue("dlm_send");
1479 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1480 if (!send_workqueue) { 1478 if (!send_workqueue) {
1481 log_print("can't start dlm_send"); 1479 log_print("can't start dlm_send");
1482 destroy_workqueue(recv_workqueue); 1480 destroy_workqueue(recv_workqueue);
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 6fc4f319b550..534c1d46e69e 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -46,24 +46,28 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
46{ 46{
47 struct dentry *lower_dentry; 47 struct dentry *lower_dentry;
48 struct vfsmount *lower_mnt; 48 struct vfsmount *lower_mnt;
49 struct dentry *dentry_save; 49 struct dentry *dentry_save = NULL;
50 struct vfsmount *vfsmount_save; 50 struct vfsmount *vfsmount_save = NULL;
51 int rc = 1; 51 int rc = 1;
52 52
53 if (nd->flags & LOOKUP_RCU) 53 if (nd && nd->flags & LOOKUP_RCU)
54 return -ECHILD; 54 return -ECHILD;
55 55
56 lower_dentry = ecryptfs_dentry_to_lower(dentry); 56 lower_dentry = ecryptfs_dentry_to_lower(dentry);
57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) 58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
59 goto out; 59 goto out;
60 dentry_save = nd->path.dentry; 60 if (nd) {
61 vfsmount_save = nd->path.mnt; 61 dentry_save = nd->path.dentry;
62 nd->path.dentry = lower_dentry; 62 vfsmount_save = nd->path.mnt;
63 nd->path.mnt = lower_mnt; 63 nd->path.dentry = lower_dentry;
64 nd->path.mnt = lower_mnt;
65 }
64 rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); 66 rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
65 nd->path.dentry = dentry_save; 67 if (nd) {
66 nd->path.mnt = vfsmount_save; 68 nd->path.dentry = dentry_save;
69 nd->path.mnt = vfsmount_save;
70 }
67 if (dentry->d_inode) { 71 if (dentry->d_inode) {
68 struct inode *lower_inode = 72 struct inode *lower_inode =
69 ecryptfs_inode_to_lower(dentry->d_inode); 73 ecryptfs_inode_to_lower(dentry->d_inode);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index dbc84ed96336..e00753496e3e 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -632,8 +632,7 @@ int ecryptfs_interpose(struct dentry *hidden_dentry,
632 u32 flags); 632 u32 flags);
633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
634 struct dentry *lower_dentry, 634 struct dentry *lower_dentry,
635 struct inode *ecryptfs_dir_inode, 635 struct inode *ecryptfs_dir_inode);
636 struct nameidata *ecryptfs_nd);
637int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, 636int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
638 size_t *decrypted_name_size, 637 size_t *decrypted_name_size,
639 struct dentry *ecryptfs_dentry, 638 struct dentry *ecryptfs_dentry,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 81e10e6a9443..7d1050e254f9 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -317,6 +317,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
317 317
318const struct file_operations ecryptfs_dir_fops = { 318const struct file_operations ecryptfs_dir_fops = {
319 .readdir = ecryptfs_readdir, 319 .readdir = ecryptfs_readdir,
320 .read = generic_read_dir,
320 .unlocked_ioctl = ecryptfs_unlocked_ioctl, 321 .unlocked_ioctl = ecryptfs_unlocked_ioctl,
321#ifdef CONFIG_COMPAT 322#ifdef CONFIG_COMPAT
322 .compat_ioctl = ecryptfs_compat_ioctl, 323 .compat_ioctl = ecryptfs_compat_ioctl,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index bd33f87a1907..b592938a84bc 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -74,16 +74,20 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
74 unsigned int flags_save; 74 unsigned int flags_save;
75 int rc; 75 int rc;
76 76
77 dentry_save = nd->path.dentry; 77 if (nd) {
78 vfsmount_save = nd->path.mnt; 78 dentry_save = nd->path.dentry;
79 flags_save = nd->flags; 79 vfsmount_save = nd->path.mnt;
80 nd->path.dentry = lower_dentry; 80 flags_save = nd->flags;
81 nd->path.mnt = lower_mnt; 81 nd->path.dentry = lower_dentry;
82 nd->flags &= ~LOOKUP_OPEN; 82 nd->path.mnt = lower_mnt;
83 nd->flags &= ~LOOKUP_OPEN;
84 }
83 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd); 85 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
84 nd->path.dentry = dentry_save; 86 if (nd) {
85 nd->path.mnt = vfsmount_save; 87 nd->path.dentry = dentry_save;
86 nd->flags = flags_save; 88 nd->path.mnt = vfsmount_save;
89 nd->flags = flags_save;
90 }
87 return rc; 91 return rc;
88} 92}
89 93
@@ -241,8 +245,7 @@ out:
241 */ 245 */
242int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 246int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
243 struct dentry *lower_dentry, 247 struct dentry *lower_dentry,
244 struct inode *ecryptfs_dir_inode, 248 struct inode *ecryptfs_dir_inode)
245 struct nameidata *ecryptfs_nd)
246{ 249{
247 struct dentry *lower_dir_dentry; 250 struct dentry *lower_dir_dentry;
248 struct vfsmount *lower_mnt; 251 struct vfsmount *lower_mnt;
@@ -290,8 +293,6 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
290 goto out; 293 goto out;
291 if (special_file(lower_inode->i_mode)) 294 if (special_file(lower_inode->i_mode))
292 goto out; 295 goto out;
293 if (!ecryptfs_nd)
294 goto out;
295 /* Released in this function */ 296 /* Released in this function */
296 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER); 297 page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER);
297 if (!page_virt) { 298 if (!page_virt) {
@@ -349,75 +350,6 @@ out:
349} 350}
350 351
351/** 352/**
352 * ecryptfs_new_lower_dentry
353 * @name: The name of the new dentry.
354 * @lower_dir_dentry: Parent directory of the new dentry.
355 * @nd: nameidata from last lookup.
356 *
357 * Create a new dentry or get it from lower parent dir.
358 */
359static struct dentry *
360ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
361 struct nameidata *nd)
362{
363 struct dentry *new_dentry;
364 struct dentry *tmp;
365 struct inode *lower_dir_inode;
366
367 lower_dir_inode = lower_dir_dentry->d_inode;
368
369 tmp = d_alloc(lower_dir_dentry, name);
370 if (!tmp)
371 return ERR_PTR(-ENOMEM);
372
373 mutex_lock(&lower_dir_inode->i_mutex);
374 new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd);
375 mutex_unlock(&lower_dir_inode->i_mutex);
376
377 if (!new_dentry)
378 new_dentry = tmp;
379 else
380 dput(tmp);
381
382 return new_dentry;
383}
384
385
386/**
387 * ecryptfs_lookup_one_lower
388 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
389 * @lower_dir_dentry: lower parent directory
390 * @name: lower file name
391 *
392 * Get the lower dentry from vfs. If lower dentry does not exist yet,
393 * create it.
394 */
395static struct dentry *
396ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
397 struct dentry *lower_dir_dentry, struct qstr *name)
398{
399 struct nameidata nd;
400 struct vfsmount *lower_mnt;
401 int err;
402
403 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
404 ecryptfs_dentry->d_parent));
405 err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
406 mntput(lower_mnt);
407
408 if (!err) {
409 /* we dont need the mount */
410 mntput(nd.path.mnt);
411 return nd.path.dentry;
412 }
413 if (err != -ENOENT)
414 return ERR_PTR(err);
415
416 /* create a new lower dentry */
417 return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd);
418}
419
420/**
421 * ecryptfs_lookup 353 * ecryptfs_lookup
422 * @ecryptfs_dir_inode: The eCryptfs directory inode 354 * @ecryptfs_dir_inode: The eCryptfs directory inode
423 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up 355 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
@@ -434,7 +366,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
434 size_t encrypted_and_encoded_name_size; 366 size_t encrypted_and_encoded_name_size;
435 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL; 367 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
436 struct dentry *lower_dir_dentry, *lower_dentry; 368 struct dentry *lower_dir_dentry, *lower_dentry;
437 struct qstr lower_name;
438 int rc = 0; 369 int rc = 0;
439 370
440 if ((ecryptfs_dentry->d_name.len == 1 371 if ((ecryptfs_dentry->d_name.len == 1
@@ -444,20 +375,14 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
444 goto out_d_drop; 375 goto out_d_drop;
445 } 376 }
446 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); 377 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
447 lower_name.name = ecryptfs_dentry->d_name.name; 378 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
448 lower_name.len = ecryptfs_dentry->d_name.len; 379 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
449 lower_name.hash = ecryptfs_dentry->d_name.hash; 380 lower_dir_dentry,
450 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 381 ecryptfs_dentry->d_name.len);
451 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 382 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
452 lower_dir_dentry->d_inode, &lower_name);
453 if (rc < 0)
454 goto out_d_drop;
455 }
456 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
457 lower_dir_dentry, &lower_name);
458 if (IS_ERR(lower_dentry)) { 383 if (IS_ERR(lower_dentry)) {
459 rc = PTR_ERR(lower_dentry); 384 rc = PTR_ERR(lower_dentry);
460 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " 385 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
461 "[%d] on lower_dentry = [%s]\n", __func__, rc, 386 "[%d] on lower_dentry = [%s]\n", __func__, rc,
462 encrypted_and_encoded_name); 387 encrypted_and_encoded_name);
463 goto out_d_drop; 388 goto out_d_drop;
@@ -479,28 +404,21 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
479 "filename; rc = [%d]\n", __func__, rc); 404 "filename; rc = [%d]\n", __func__, rc);
480 goto out_d_drop; 405 goto out_d_drop;
481 } 406 }
482 lower_name.name = encrypted_and_encoded_name; 407 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
483 lower_name.len = encrypted_and_encoded_name_size; 408 lower_dentry = lookup_one_len(encrypted_and_encoded_name,
484 lower_name.hash = full_name_hash(lower_name.name, lower_name.len); 409 lower_dir_dentry,
485 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 410 encrypted_and_encoded_name_size);
486 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 411 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
487 lower_dir_dentry->d_inode, &lower_name);
488 if (rc < 0)
489 goto out_d_drop;
490 }
491 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
492 lower_dir_dentry, &lower_name);
493 if (IS_ERR(lower_dentry)) { 412 if (IS_ERR(lower_dentry)) {
494 rc = PTR_ERR(lower_dentry); 413 rc = PTR_ERR(lower_dentry);
495 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " 414 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
496 "[%d] on lower_dentry = [%s]\n", __func__, rc, 415 "[%d] on lower_dentry = [%s]\n", __func__, rc,
497 encrypted_and_encoded_name); 416 encrypted_and_encoded_name);
498 goto out_d_drop; 417 goto out_d_drop;
499 } 418 }
500lookup_and_interpose: 419lookup_and_interpose:
501 rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry, 420 rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
502 ecryptfs_dir_inode, 421 ecryptfs_dir_inode);
503 ecryptfs_nd);
504 goto out; 422 goto out;
505out_d_drop: 423out_d_drop:
506 d_drop(ecryptfs_dentry); 424 d_drop(ecryptfs_dentry);
@@ -1092,6 +1010,8 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1092 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry), 1010 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
1093 ecryptfs_dentry_to_lower(dentry), &lower_stat); 1011 ecryptfs_dentry_to_lower(dentry), &lower_stat);
1094 if (!rc) { 1012 if (!rc) {
1013 fsstack_copy_attr_all(dentry->d_inode,
1014 ecryptfs_inode_to_lower(dentry->d_inode));
1095 generic_fillattr(dentry->d_inode, stat); 1015 generic_fillattr(dentry->d_inode, stat);
1096 stat->blocks = lower_stat.blocks; 1016 stat->blocks = lower_stat.blocks;
1097 } 1017 }
diff --git a/fs/eventfd.c b/fs/eventfd.c
index e0194b3e14d6..d9a591773919 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -99,7 +99,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_get);
99 * @ctx: [in] Pointer to eventfd context. 99 * @ctx: [in] Pointer to eventfd context.
100 * 100 *
101 * The eventfd context reference must have been previously acquired either 101 * The eventfd context reference must have been previously acquired either
102 * with eventfd_ctx_get() or eventfd_ctx_fdget()). 102 * with eventfd_ctx_get() or eventfd_ctx_fdget().
103 */ 103 */
104void eventfd_ctx_put(struct eventfd_ctx *ctx) 104void eventfd_ctx_put(struct eventfd_ctx *ctx)
105{ 105{
@@ -146,9 +146,9 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
146 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. 146 * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
147 * @ctx: [in] Pointer to eventfd context. 147 * @ctx: [in] Pointer to eventfd context.
148 * @wait: [in] Wait queue to be removed. 148 * @wait: [in] Wait queue to be removed.
149 * @cnt: [out] Pointer to the 64bit conter value. 149 * @cnt: [out] Pointer to the 64-bit counter value.
150 * 150 *
151 * Returns zero if successful, or the following error codes: 151 * Returns %0 if successful, or the following error codes:
152 * 152 *
153 * -EAGAIN : The operation would have blocked. 153 * -EAGAIN : The operation would have blocked.
154 * 154 *
@@ -175,11 +175,11 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
175 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero. 175 * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
176 * @ctx: [in] Pointer to eventfd context. 176 * @ctx: [in] Pointer to eventfd context.
177 * @no_wait: [in] Different from zero if the operation should not block. 177 * @no_wait: [in] Different from zero if the operation should not block.
178 * @cnt: [out] Pointer to the 64bit conter value. 178 * @cnt: [out] Pointer to the 64-bit counter value.
179 * 179 *
180 * Returns zero if successful, or the following error codes: 180 * Returns %0 if successful, or the following error codes:
181 * 181 *
182 * -EAGAIN : The operation would have blocked but @no_wait was nonzero. 182 * -EAGAIN : The operation would have blocked but @no_wait was non-zero.
183 * -ERESTARTSYS : A signal interrupted the wait operation. 183 * -ERESTARTSYS : A signal interrupted the wait operation.
184 * 184 *
185 * If @no_wait is zero, the function might sleep until the eventfd internal 185 * If @no_wait is zero, the function might sleep until the eventfd internal
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 267d0ada4541..4a09af9e9a63 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
63 * cleanup path and it is also acquired by eventpoll_release_file() 63 * cleanup path and it is also acquired by eventpoll_release_file()
64 * if a file has been pushed inside an epoll set and it is then 64 * if a file has been pushed inside an epoll set and it is then
65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). 65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
66 * It is also acquired when inserting an epoll fd onto another epoll
67 * fd. We do this so that we walk the epoll tree and ensure that this
68 * insertion does not create a cycle of epoll file descriptors, which
69 * could lead to deadlock. We need a global mutex to prevent two
70 * simultaneous inserts (A into B and B into A) from racing and
71 * constructing a cycle without either insert observing that it is
72 * going to.
66 * It is possible to drop the "ep->mtx" and to use the global 73 * It is possible to drop the "ep->mtx" and to use the global
67 * mutex "epmutex" (together with "ep->lock") to have it working, 74 * mutex "epmutex" (together with "ep->lock") to have it working,
68 * but having "ep->mtx" will make the interface more scalable. 75 * but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
224 */ 231 */
225static DEFINE_MUTEX(epmutex); 232static DEFINE_MUTEX(epmutex);
226 233
234/* Used to check for epoll file descriptor inclusion loops */
235static struct nested_calls poll_loop_ncalls;
236
227/* Used for safe wake up implementation */ 237/* Used for safe wake up implementation */
228static struct nested_calls poll_safewake_ncalls; 238static struct nested_calls poll_safewake_ncalls;
229 239
@@ -1198,6 +1208,62 @@ retry:
1198 return res; 1208 return res;
1199} 1209}
1200 1210
1211/**
1212 * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
1213 * API, to verify that adding an epoll file inside another
1214 * epoll structure, does not violate the constraints, in
1215 * terms of closed loops, or too deep chains (which can
1216 * result in excessive stack usage).
1217 *
1218 * @priv: Pointer to the epoll file to be currently checked.
1219 * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
1220 * data structure pointer.
1221 * @call_nests: Current dept of the @ep_call_nested() call stack.
1222 *
1223 * Returns: Returns zero if adding the epoll @file inside current epoll
1224 * structure @ep does not violate the constraints, or -1 otherwise.
1225 */
1226static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1227{
1228 int error = 0;
1229 struct file *file = priv;
1230 struct eventpoll *ep = file->private_data;
1231 struct rb_node *rbp;
1232 struct epitem *epi;
1233
1234 mutex_lock(&ep->mtx);
1235 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1236 epi = rb_entry(rbp, struct epitem, rbn);
1237 if (unlikely(is_file_epoll(epi->ffd.file))) {
1238 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1239 ep_loop_check_proc, epi->ffd.file,
1240 epi->ffd.file->private_data, current);
1241 if (error != 0)
1242 break;
1243 }
1244 }
1245 mutex_unlock(&ep->mtx);
1246
1247 return error;
1248}
1249
1250/**
1251 * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
1252 * another epoll file (represented by @ep) does not create
1253 * closed loops or too deep chains.
1254 *
1255 * @ep: Pointer to the epoll private data structure.
1256 * @file: Pointer to the epoll file to be checked.
1257 *
1258 * Returns: Returns zero if adding the epoll @file inside current epoll
1259 * structure @ep does not violate the constraints, or -1 otherwise.
1260 */
1261static int ep_loop_check(struct eventpoll *ep, struct file *file)
1262{
1263 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1264 ep_loop_check_proc, file, ep, current);
1265}
1266
1201/* 1267/*
1202 * Open an eventpoll file descriptor. 1268 * Open an eventpoll file descriptor.
1203 */ 1269 */
@@ -1246,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1246 struct epoll_event __user *, event) 1312 struct epoll_event __user *, event)
1247{ 1313{
1248 int error; 1314 int error;
1315 int did_lock_epmutex = 0;
1249 struct file *file, *tfile; 1316 struct file *file, *tfile;
1250 struct eventpoll *ep; 1317 struct eventpoll *ep;
1251 struct epitem *epi; 1318 struct epitem *epi;
@@ -1287,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1287 */ 1354 */
1288 ep = file->private_data; 1355 ep = file->private_data;
1289 1356
1357 /*
1358 * When we insert an epoll file descriptor, inside another epoll file
1359 * descriptor, there is the change of creating closed loops, which are
1360 * better be handled here, than in more critical paths.
1361 *
1362 * We hold epmutex across the loop check and the insert in this case, in
1363 * order to prevent two separate inserts from racing and each doing the
1364 * insert "at the same time" such that ep_loop_check passes on both
1365 * before either one does the insert, thereby creating a cycle.
1366 */
1367 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
1368 mutex_lock(&epmutex);
1369 did_lock_epmutex = 1;
1370 error = -ELOOP;
1371 if (ep_loop_check(ep, tfile) != 0)
1372 goto error_tgt_fput;
1373 }
1374
1375
1290 mutex_lock(&ep->mtx); 1376 mutex_lock(&ep->mtx);
1291 1377
1292 /* 1378 /*
@@ -1322,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1322 mutex_unlock(&ep->mtx); 1408 mutex_unlock(&ep->mtx);
1323 1409
1324error_tgt_fput: 1410error_tgt_fput:
1411 if (unlikely(did_lock_epmutex))
1412 mutex_unlock(&epmutex);
1413
1325 fput(tfile); 1414 fput(tfile);
1326error_fput: 1415error_fput:
1327 fput(file); 1416 fput(file);
@@ -1441,6 +1530,12 @@ static int __init eventpoll_init(void)
1441 EP_ITEM_COST; 1530 EP_ITEM_COST;
1442 BUG_ON(max_user_watches < 0); 1531 BUG_ON(max_user_watches < 0);
1443 1532
1533 /*
1534 * Initialize the structure used to perform epoll file descriptor
1535 * inclusion loops checks.
1536 */
1537 ep_nested_calls_init(&poll_loop_ncalls);
1538
1444 /* Initialize the structure used to perform safe poll wait head wake ups */ 1539 /* Initialize the structure used to perform safe poll wait head wake ups */
1445 ep_nested_calls_init(&poll_safewake_ncalls); 1540 ep_nested_calls_init(&poll_safewake_ncalls);
1446 1541
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d02830..4d70db110cfc 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page); 272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
273 if (!new_de) 273 if (!new_de)
274 goto out_dir; 274 goto out_dir;
275 inode_inc_link_count(old_inode);
276 err = exofs_set_link(new_dir, new_de, new_page, old_inode); 275 err = exofs_set_link(new_dir, new_de, new_page, old_inode);
277 new_inode->i_ctime = CURRENT_TIME; 276 new_inode->i_ctime = CURRENT_TIME;
278 if (dir_de) 277 if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
286 if (new_dir->i_nlink >= EXOFS_LINK_MAX) 285 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
287 goto out_dir; 286 goto out_dir;
288 } 287 }
289 inode_inc_link_count(old_inode);
290 err = exofs_add_link(new_dentry, old_inode); 288 err = exofs_add_link(new_dentry, old_inode);
291 if (err) { 289 if (err)
292 inode_dec_link_count(old_inode);
293 goto out_dir; 290 goto out_dir;
294 }
295 if (dir_de) 291 if (dir_de)
296 inode_inc_link_count(new_dir); 292 inode_inc_link_count(new_dir);
297 } 293 }
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
299 old_inode->i_ctime = CURRENT_TIME; 295 old_inode->i_ctime = CURRENT_TIME;
300 296
301 exofs_delete_entry(old_de, old_page); 297 exofs_delete_entry(old_de, old_page);
302 inode_dec_link_count(old_inode); 298 mark_inode_dirty(old_inode);
303 299
304 if (dir_de) { 300 if (dir_de) {
305 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir); 301 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d8341d827..adb91855ccd0 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page); 344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
345 if (!new_de) 345 if (!new_de)
346 goto out_dir; 346 goto out_dir;
347 inode_inc_link_count(old_inode);
348 ext2_set_link(new_dir, new_de, new_page, old_inode, 1); 347 ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
349 new_inode->i_ctime = CURRENT_TIME_SEC; 348 new_inode->i_ctime = CURRENT_TIME_SEC;
350 if (dir_de) 349 if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
356 if (new_dir->i_nlink >= EXT2_LINK_MAX) 355 if (new_dir->i_nlink >= EXT2_LINK_MAX)
357 goto out_dir; 356 goto out_dir;
358 } 357 }
359 inode_inc_link_count(old_inode);
360 err = ext2_add_link(new_dentry, old_inode); 358 err = ext2_add_link(new_dentry, old_inode);
361 if (err) { 359 if (err)
362 inode_dec_link_count(old_inode);
363 goto out_dir; 360 goto out_dir;
364 }
365 if (dir_de) 361 if (dir_de)
366 inode_inc_link_count(new_dir); 362 inode_inc_link_count(new_dir);
367 } 363 }
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
369 /* 365 /*
370 * Like most other Unix systems, set the ctime for inodes on a 366 * Like most other Unix systems, set the ctime for inodes on a
371 * rename. 367 * rename.
372 * inode_dec_link_count() will mark the inode dirty.
373 */ 368 */
374 old_inode->i_ctime = CURRENT_TIME_SEC; 369 old_inode->i_ctime = CURRENT_TIME_SEC;
370 mark_inode_dirty(old_inode);
375 371
376 ext2_delete_entry (old_de, old_page); 372 ext2_delete_entry (old_de, old_page);
377 inode_dec_link_count(old_inode);
378 373
379 if (dir_de) { 374 if (dir_de) {
380 if (old_dir != new_dir) 375 if (old_dir != new_dir)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0c8d97b56f34..3aa0b72b3b94 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -848,6 +848,7 @@ struct ext4_inode_info {
848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 848 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
849 /* current io_end structure for async DIO write*/ 849 /* current io_end structure for async DIO write*/
850 ext4_io_end_t *cur_aio_dio; 850 ext4_io_end_t *cur_aio_dio;
851 atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
851 852
852 spinlock_t i_block_reservation_lock; 853 spinlock_t i_block_reservation_lock;
853 854
@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
2119 2120
2120#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 2121#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
2121 2122
2123/* For ioend & aio unwritten conversion wait queues */
2124#define EXT4_WQ_HASH_SZ 37
2125#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
2126 EXT4_WQ_HASH_SZ])
2127#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
2128 EXT4_WQ_HASH_SZ])
2129extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
2130extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
2131
2122#endif /* __KERNEL__ */ 2132#endif /* __KERNEL__ */
2123 2133
2124#endif /* _EXT4_H */ 2134#endif /* _EXT4_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 63a75810b7c3..ccce8a7e94ed 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3174 * that this IO needs to convertion to written when IO is 3174 * that this IO needs to convertion to written when IO is
3175 * completed 3175 * completed
3176 */ 3176 */
3177 if (io) 3177 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3178 io->flag = EXT4_IO_END_UNWRITTEN; 3178 io->flag = EXT4_IO_END_UNWRITTEN;
3179 else 3179 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3180 } else
3180 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3181 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3181 if (ext4_should_dioread_nolock(inode)) 3182 if (ext4_should_dioread_nolock(inode))
3182 map->m_flags |= EXT4_MAP_UNINIT; 3183 map->m_flags |= EXT4_MAP_UNINIT;
@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3463 * that we need to perform convertion when IO is done. 3464 * that we need to perform convertion when IO is done.
3464 */ 3465 */
3465 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3466 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3466 if (io) 3467 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
3467 io->flag = EXT4_IO_END_UNWRITTEN; 3468 io->flag = EXT4_IO_END_UNWRITTEN;
3468 else 3469 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
3470 } else
3469 ext4_set_inode_state(inode, 3471 ext4_set_inode_state(inode,
3470 EXT4_STATE_DIO_UNWRITTEN); 3472 EXT4_STATE_DIO_UNWRITTEN);
3471 } 3473 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2e8322c8aa88..7b80d543b89e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
55 return 0; 55 return 0;
56} 56}
57 57
58static void ext4_aiodio_wait(struct inode *inode)
59{
60 wait_queue_head_t *wq = ext4_ioend_wq(inode);
61
62 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
63}
64
65/*
66 * This tests whether the IO in question is block-aligned or not.
67 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
68 * are converted to written only after the IO is complete. Until they are
69 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
70 * it needs to zero out portions of the start and/or end block. If 2 AIO
71 * threads are at work on the same unwritten block, they must be synchronized
72 * or one thread will zero the other's data, causing corruption.
73 */
74static int
75ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
76 unsigned long nr_segs, loff_t pos)
77{
78 struct super_block *sb = inode->i_sb;
79 int blockmask = sb->s_blocksize - 1;
80 size_t count = iov_length(iov, nr_segs);
81 loff_t final_size = pos + count;
82
83 if (pos >= inode->i_size)
84 return 0;
85
86 if ((pos & blockmask) || (final_size & blockmask))
87 return 1;
88
89 return 0;
90}
91
58static ssize_t 92static ssize_t
59ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 93ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
60 unsigned long nr_segs, loff_t pos) 94 unsigned long nr_segs, loff_t pos)
61{ 95{
62 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 96 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
97 int unaligned_aio = 0;
98 int ret;
63 99
64 /* 100 /*
65 * If we have encountered a bitmap-format file, the size limit 101 * If we have encountered a bitmap-format file, the size limit
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
78 nr_segs = iov_shorten((struct iovec *)iov, nr_segs, 114 nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
79 sbi->s_bitmap_maxbytes - pos); 115 sbi->s_bitmap_maxbytes - pos);
80 } 116 }
117 } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
118 !is_sync_kiocb(iocb))) {
119 unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
81 } 120 }
82 121
83 return generic_file_aio_write(iocb, iov, nr_segs, pos); 122 /* Unaligned direct AIO must be serialized; see comment above */
123 if (unaligned_aio) {
124 static unsigned long unaligned_warn_time;
125
126 /* Warn about this once per day */
127 if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
128 ext4_msg(inode->i_sb, KERN_WARNING,
129 "Unaligned AIO/DIO on inode %ld by %s; "
130 "performance will be poor.",
131 inode->i_ino, current->comm);
132 mutex_lock(ext4_aio_mutex(inode));
133 ext4_aiodio_wait(inode);
134 }
135
136 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
137
138 if (unaligned_aio)
139 mutex_unlock(ext4_aio_mutex(inode));
140
141 return ret;
84} 142}
85 143
86static const struct vm_operations_struct ext4_file_vm_ops = { 144static const struct vm_operations_struct ext4_file_vm_ops = {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 851f49b2f9d2..d1fe09aea73d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep;
342/* We create slab caches for groupinfo data structures based on the 342/* We create slab caches for groupinfo data structures based on the
343 * superblock block size. There will be one per mounted filesystem for 343 * superblock block size. There will be one per mounted filesystem for
344 * each unique s_blocksize_bits */ 344 * each unique s_blocksize_bits */
345#define NR_GRPINFO_CACHES \ 345#define NR_GRPINFO_CACHES 8
346 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
347static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; 346static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
348 347
348static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
349 "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
350 "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
351 "ext4_groupinfo_64k", "ext4_groupinfo_128k"
352};
353
349static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 354static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
350 ext4_group_t group); 355 ext4_group_t group);
351static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 356static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -2414,6 +2419,55 @@ err_freesgi:
2414 return -ENOMEM; 2419 return -ENOMEM;
2415} 2420}
2416 2421
2422static void ext4_groupinfo_destroy_slabs(void)
2423{
2424 int i;
2425
2426 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2427 if (ext4_groupinfo_caches[i])
2428 kmem_cache_destroy(ext4_groupinfo_caches[i]);
2429 ext4_groupinfo_caches[i] = NULL;
2430 }
2431}
2432
2433static int ext4_groupinfo_create_slab(size_t size)
2434{
2435 static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
2436 int slab_size;
2437 int blocksize_bits = order_base_2(size);
2438 int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
2439 struct kmem_cache *cachep;
2440
2441 if (cache_index >= NR_GRPINFO_CACHES)
2442 return -EINVAL;
2443
2444 if (unlikely(cache_index < 0))
2445 cache_index = 0;
2446
2447 mutex_lock(&ext4_grpinfo_slab_create_mutex);
2448 if (ext4_groupinfo_caches[cache_index]) {
2449 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2450 return 0; /* Already created */
2451 }
2452
2453 slab_size = offsetof(struct ext4_group_info,
2454 bb_counters[blocksize_bits + 2]);
2455
2456 cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
2457 slab_size, 0, SLAB_RECLAIM_ACCOUNT,
2458 NULL);
2459
2460 mutex_unlock(&ext4_grpinfo_slab_create_mutex);
2461 if (!cachep) {
2462 printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
2463 return -ENOMEM;
2464 }
2465
2466 ext4_groupinfo_caches[cache_index] = cachep;
2467
2468 return 0;
2469}
2470
2417int ext4_mb_init(struct super_block *sb, int needs_recovery) 2471int ext4_mb_init(struct super_block *sb, int needs_recovery)
2418{ 2472{
2419 struct ext4_sb_info *sbi = EXT4_SB(sb); 2473 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2421 unsigned offset; 2475 unsigned offset;
2422 unsigned max; 2476 unsigned max;
2423 int ret; 2477 int ret;
2424 int cache_index;
2425 struct kmem_cache *cachep;
2426 char *namep = NULL;
2427 2478
2428 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); 2479 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2429 2480
@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2440 goto out; 2491 goto out;
2441 } 2492 }
2442 2493
2443 cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; 2494 ret = ext4_groupinfo_create_slab(sb->s_blocksize);
2444 cachep = ext4_groupinfo_caches[cache_index]; 2495 if (ret < 0)
2445 if (!cachep) { 2496 goto out;
2446 char name[32];
2447 int len = offsetof(struct ext4_group_info,
2448 bb_counters[sb->s_blocksize_bits + 2]);
2449
2450 sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
2451 namep = kstrdup(name, GFP_KERNEL);
2452 if (!namep) {
2453 ret = -ENOMEM;
2454 goto out;
2455 }
2456
2457 /* Need to free the kmem_cache_name() when we
2458 * destroy the slab */
2459 cachep = kmem_cache_create(namep, len, 0,
2460 SLAB_RECLAIM_ACCOUNT, NULL);
2461 if (!cachep) {
2462 ret = -ENOMEM;
2463 goto out;
2464 }
2465 ext4_groupinfo_caches[cache_index] = cachep;
2466 }
2467 2497
2468 /* order 0 is regular bitmap */ 2498 /* order 0 is regular bitmap */
2469 sbi->s_mb_maxs[0] = sb->s_blocksize << 3; 2499 sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
@@ -2520,7 +2550,6 @@ out:
2520 if (ret) { 2550 if (ret) {
2521 kfree(sbi->s_mb_offsets); 2551 kfree(sbi->s_mb_offsets);
2522 kfree(sbi->s_mb_maxs); 2552 kfree(sbi->s_mb_maxs);
2523 kfree(namep);
2524 } 2553 }
2525 return ret; 2554 return ret;
2526} 2555}
@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void)
2734 2763
2735void ext4_exit_mballoc(void) 2764void ext4_exit_mballoc(void)
2736{ 2765{
2737 int i;
2738 /* 2766 /*
2739 * Wait for completion of call_rcu()'s on ext4_pspace_cachep 2767 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
2740 * before destroying the slab cache. 2768 * before destroying the slab cache.
@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void)
2743 kmem_cache_destroy(ext4_pspace_cachep); 2771 kmem_cache_destroy(ext4_pspace_cachep);
2744 kmem_cache_destroy(ext4_ac_cachep); 2772 kmem_cache_destroy(ext4_ac_cachep);
2745 kmem_cache_destroy(ext4_free_ext_cachep); 2773 kmem_cache_destroy(ext4_free_ext_cachep);
2746 2774 ext4_groupinfo_destroy_slabs();
2747 for (i = 0; i < NR_GRPINFO_CACHES; i++) {
2748 struct kmem_cache *cachep = ext4_groupinfo_caches[i];
2749 if (cachep) {
2750 char *name = (char *)kmem_cache_name(cachep);
2751 kmem_cache_destroy(cachep);
2752 kfree(name);
2753 }
2754 }
2755 ext4_remove_debugfs_entry(); 2775 ext4_remove_debugfs_entry();
2756} 2776}
2757 2777
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7270dcfca92a..955cc309142f 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,14 +32,8 @@
32 32
33static struct kmem_cache *io_page_cachep, *io_end_cachep; 33static struct kmem_cache *io_page_cachep, *io_end_cachep;
34 34
35#define WQ_HASH_SZ 37
36#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
37static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
38
39int __init ext4_init_pageio(void) 35int __init ext4_init_pageio(void)
40{ 36{
41 int i;
42
43 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); 37 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
44 if (io_page_cachep == NULL) 38 if (io_page_cachep == NULL)
45 return -ENOMEM; 39 return -ENOMEM;
@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void)
48 kmem_cache_destroy(io_page_cachep); 42 kmem_cache_destroy(io_page_cachep);
49 return -ENOMEM; 43 return -ENOMEM;
50 } 44 }
51 for (i = 0; i < WQ_HASH_SZ; i++)
52 init_waitqueue_head(&ioend_wq[i]);
53
54 return 0; 45 return 0;
55} 46}
56 47
@@ -62,7 +53,7 @@ void ext4_exit_pageio(void)
62 53
63void ext4_ioend_wait(struct inode *inode) 54void ext4_ioend_wait(struct inode *inode)
64{ 55{
65 wait_queue_head_t *wq = to_ioend_wq(inode); 56 wait_queue_head_t *wq = ext4_ioend_wq(inode);
66 57
67 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); 58 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
68} 59}
@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
87 for (i = 0; i < io->num_io_pages; i++) 78 for (i = 0; i < io->num_io_pages; i++)
88 put_io_page(io->pages[i]); 79 put_io_page(io->pages[i]);
89 io->num_io_pages = 0; 80 io->num_io_pages = 0;
90 wq = to_ioend_wq(io->inode); 81 wq = ext4_ioend_wq(io->inode);
91 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && 82 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
92 waitqueue_active(wq)) 83 waitqueue_active(wq))
93 wake_up_all(wq); 84 wake_up_all(wq);
@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
102 struct inode *inode = io->inode; 93 struct inode *inode = io->inode;
103 loff_t offset = io->offset; 94 loff_t offset = io->offset;
104 ssize_t size = io->size; 95 ssize_t size = io->size;
96 wait_queue_head_t *wq;
105 int ret = 0; 97 int ret = 0;
106 98
107 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," 99 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
126 if (io->iocb) 118 if (io->iocb)
127 aio_complete(io->iocb, io->result, 0); 119 aio_complete(io->iocb, io->result, 0);
128 /* clear the DIO AIO unwritten flag */ 120 /* clear the DIO AIO unwritten flag */
129 io->flag &= ~EXT4_IO_END_UNWRITTEN; 121 if (io->flag & EXT4_IO_END_UNWRITTEN) {
122 io->flag &= ~EXT4_IO_END_UNWRITTEN;
123 /* Wake up anyone waiting on unwritten extent conversion */
124 wq = ext4_ioend_wq(io->inode);
125 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
126 waitqueue_active(wq)) {
127 wake_up_all(wq);
128 }
129 }
130
130 return ret; 131 return ret;
131} 132}
132 133
@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error)
190 struct inode *inode; 191 struct inode *inode;
191 unsigned long flags; 192 unsigned long flags;
192 int i; 193 int i;
194 sector_t bi_sector = bio->bi_sector;
193 195
194 BUG_ON(!io_end); 196 BUG_ON(!io_end);
195 bio->bi_private = NULL; 197 bio->bi_private = NULL;
@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error)
207 if (error) 209 if (error)
208 SetPageError(page); 210 SetPageError(page);
209 BUG_ON(!head); 211 BUG_ON(!head);
210 if (head->b_size == PAGE_CACHE_SIZE) 212 if (head->b_size != PAGE_CACHE_SIZE) {
211 clear_buffer_dirty(head);
212 else {
213 loff_t offset; 213 loff_t offset;
214 loff_t io_end_offset = io_end->offset + io_end->size; 214 loff_t io_end_offset = io_end->offset + io_end->size;
215 215
@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error)
221 if (error) 221 if (error)
222 buffer_io_error(bh); 222 buffer_io_error(bh);
223 223
224 clear_buffer_dirty(bh);
225 } 224 }
226 if (buffer_delay(bh)) 225 if (buffer_delay(bh))
227 partial_write = 1; 226 partial_write = 1;
@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error)
257 (unsigned long long) io_end->offset, 256 (unsigned long long) io_end->offset,
258 (long) io_end->size, 257 (long) io_end->size,
259 (unsigned long long) 258 (unsigned long long)
260 bio->bi_sector >> (inode->i_blkbits - 9)); 259 bi_sector >> (inode->i_blkbits - 9));
261 } 260 }
262 261
263 /* Add the io_end to per-inode completed io list*/ 262 /* Add the io_end to per-inode completed io list*/
@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
380 379
381 blocksize = 1 << inode->i_blkbits; 380 blocksize = 1 << inode->i_blkbits;
382 381
382 BUG_ON(!PageLocked(page));
383 BUG_ON(PageWriteback(page)); 383 BUG_ON(PageWriteback(page));
384 set_page_writeback(page); 384 set_page_writeback(page);
385 ClearPageError(page); 385 ClearPageError(page);
@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
397 for (bh = head = page_buffers(page), block_start = 0; 397 for (bh = head = page_buffers(page), block_start = 0;
398 bh != head || !block_start; 398 bh != head || !block_start;
399 block_start = block_end, bh = bh->b_this_page) { 399 block_start = block_end, bh = bh->b_this_page) {
400
400 block_end = block_start + blocksize; 401 block_end = block_start + blocksize;
401 if (block_start >= len) { 402 if (block_start >= len) {
402 clear_buffer_dirty(bh); 403 clear_buffer_dirty(bh);
403 set_buffer_uptodate(bh); 404 set_buffer_uptodate(bh);
404 continue; 405 continue;
405 } 406 }
407 clear_buffer_dirty(bh);
406 ret = io_submit_add_bh(io, io_page, inode, wbc, bh); 408 ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
407 if (ret) { 409 if (ret) {
408 /* 410 /*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 48ce561fafac..f6a318f836b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
77 const char *dev_name, void *data); 77 const char *dev_name, void *data);
78static void ext4_destroy_lazyinit_thread(void); 78static void ext4_destroy_lazyinit_thread(void);
79static void ext4_unregister_li_request(struct super_block *sb); 79static void ext4_unregister_li_request(struct super_block *sb);
80static void ext4_clear_request_list(void);
80 81
81#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) 82#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
82static struct file_system_type ext3_fs_type = { 83static struct file_system_type ext3_fs_type = {
@@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
832 ei->i_sync_tid = 0; 833 ei->i_sync_tid = 0;
833 ei->i_datasync_tid = 0; 834 ei->i_datasync_tid = 0;
834 atomic_set(&ei->i_ioend_count, 0); 835 atomic_set(&ei->i_ioend_count, 0);
836 atomic_set(&ei->i_aiodio_unwritten, 0);
835 837
836 return &ei->vfs_inode; 838 return &ei->vfs_inode;
837} 839}
@@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb)
2716 mutex_unlock(&ext4_li_info->li_list_mtx); 2718 mutex_unlock(&ext4_li_info->li_list_mtx);
2717} 2719}
2718 2720
2721static struct task_struct *ext4_lazyinit_task;
2722
2719/* 2723/*
2720 * This is the function where ext4lazyinit thread lives. It walks 2724 * This is the function where ext4lazyinit thread lives. It walks
2721 * through the request list searching for next scheduled filesystem. 2725 * through the request list searching for next scheduled filesystem.
@@ -2784,6 +2788,10 @@ cont_thread:
2784 if (time_before(jiffies, next_wakeup)) 2788 if (time_before(jiffies, next_wakeup))
2785 schedule(); 2789 schedule();
2786 finish_wait(&eli->li_wait_daemon, &wait); 2790 finish_wait(&eli->li_wait_daemon, &wait);
2791 if (kthread_should_stop()) {
2792 ext4_clear_request_list();
2793 goto exit_thread;
2794 }
2787 } 2795 }
2788 2796
2789exit_thread: 2797exit_thread:
@@ -2808,6 +2816,7 @@ exit_thread:
2808 wake_up(&eli->li_wait_task); 2816 wake_up(&eli->li_wait_task);
2809 2817
2810 kfree(ext4_li_info); 2818 kfree(ext4_li_info);
2819 ext4_lazyinit_task = NULL;
2811 ext4_li_info = NULL; 2820 ext4_li_info = NULL;
2812 mutex_unlock(&ext4_li_mtx); 2821 mutex_unlock(&ext4_li_mtx);
2813 2822
@@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void)
2830 2839
2831static int ext4_run_lazyinit_thread(void) 2840static int ext4_run_lazyinit_thread(void)
2832{ 2841{
2833 struct task_struct *t; 2842 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
2834 2843 ext4_li_info, "ext4lazyinit");
2835 t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); 2844 if (IS_ERR(ext4_lazyinit_task)) {
2836 if (IS_ERR(t)) { 2845 int err = PTR_ERR(ext4_lazyinit_task);
2837 int err = PTR_ERR(t);
2838 ext4_clear_request_list(); 2846 ext4_clear_request_list();
2839 del_timer_sync(&ext4_li_info->li_timer); 2847 del_timer_sync(&ext4_li_info->li_timer);
2840 kfree(ext4_li_info); 2848 kfree(ext4_li_info);
@@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void)
2985 * If thread exited earlier 2993 * If thread exited earlier
2986 * there's nothing to be done. 2994 * there's nothing to be done.
2987 */ 2995 */
2988 if (!ext4_li_info) 2996 if (!ext4_li_info || !ext4_lazyinit_task)
2989 return; 2997 return;
2990 2998
2991 ext4_clear_request_list(); 2999 kthread_stop(ext4_lazyinit_task);
2992
2993 while (ext4_li_info->li_task) {
2994 wake_up(&ext4_li_info->li_wait_daemon);
2995 wait_event(ext4_li_info->li_wait_task,
2996 ext4_li_info->li_task == NULL);
2997 }
2998} 3000}
2999 3001
3000static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3002static int ext4_fill_super(struct super_block *sb, void *data, int silent)
@@ -4768,7 +4770,7 @@ static struct file_system_type ext4_fs_type = {
4768 .fs_flags = FS_REQUIRES_DEV, 4770 .fs_flags = FS_REQUIRES_DEV,
4769}; 4771};
4770 4772
4771int __init ext4_init_feat_adverts(void) 4773static int __init ext4_init_feat_adverts(void)
4772{ 4774{
4773 struct ext4_features *ef; 4775 struct ext4_features *ef;
4774 int ret = -ENOMEM; 4776 int ret = -ENOMEM;
@@ -4792,23 +4794,44 @@ out:
4792 return ret; 4794 return ret;
4793} 4795}
4794 4796
4797static void ext4_exit_feat_adverts(void)
4798{
4799 kobject_put(&ext4_feat->f_kobj);
4800 wait_for_completion(&ext4_feat->f_kobj_unregister);
4801 kfree(ext4_feat);
4802}
4803
4804/* Shared across all ext4 file systems */
4805wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
4806struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
4807
4795static int __init ext4_init_fs(void) 4808static int __init ext4_init_fs(void)
4796{ 4809{
4797 int err; 4810 int i, err;
4798 4811
4799 ext4_check_flag_values(); 4812 ext4_check_flag_values();
4813
4814 for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
4815 mutex_init(&ext4__aio_mutex[i]);
4816 init_waitqueue_head(&ext4__ioend_wq[i]);
4817 }
4818
4800 err = ext4_init_pageio(); 4819 err = ext4_init_pageio();
4801 if (err) 4820 if (err)
4802 return err; 4821 return err;
4803 err = ext4_init_system_zone(); 4822 err = ext4_init_system_zone();
4804 if (err) 4823 if (err)
4805 goto out5; 4824 goto out7;
4806 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 4825 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
4807 if (!ext4_kset) 4826 if (!ext4_kset)
4808 goto out4; 4827 goto out6;
4809 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 4828 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4829 if (!ext4_proc_root)
4830 goto out5;
4810 4831
4811 err = ext4_init_feat_adverts(); 4832 err = ext4_init_feat_adverts();
4833 if (err)
4834 goto out4;
4812 4835
4813 err = ext4_init_mballoc(); 4836 err = ext4_init_mballoc();
4814 if (err) 4837 if (err)
@@ -4838,12 +4861,14 @@ out1:
4838out2: 4861out2:
4839 ext4_exit_mballoc(); 4862 ext4_exit_mballoc();
4840out3: 4863out3:
4841 kfree(ext4_feat); 4864 ext4_exit_feat_adverts();
4865out4:
4842 remove_proc_entry("fs/ext4", NULL); 4866 remove_proc_entry("fs/ext4", NULL);
4867out5:
4843 kset_unregister(ext4_kset); 4868 kset_unregister(ext4_kset);
4844out4: 4869out6:
4845 ext4_exit_system_zone(); 4870 ext4_exit_system_zone();
4846out5: 4871out7:
4847 ext4_exit_pageio(); 4872 ext4_exit_pageio();
4848 return err; 4873 return err;
4849} 4874}
@@ -4857,6 +4882,7 @@ static void __exit ext4_exit_fs(void)
4857 destroy_inodecache(); 4882 destroy_inodecache();
4858 ext4_exit_xattr(); 4883 ext4_exit_xattr();
4859 ext4_exit_mballoc(); 4884 ext4_exit_mballoc();
4885 ext4_exit_feat_adverts();
4860 remove_proc_entry("fs/ext4", NULL); 4886 remove_proc_entry("fs/ext4", NULL);
4861 kset_unregister(ext4_kset); 4887 kset_unregister(ext4_kset);
4862 ext4_exit_system_zone(); 4888 ext4_exit_system_zone();
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index f88f752babd9..adae3fb7451a 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,7 +43,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
43 43
44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
45{ 45{
46 if (nd->flags & LOOKUP_RCU) 46 if (nd && nd->flags & LOOKUP_RCU)
47 return -ECHILD; 47 return -ECHILD;
48 48
49 /* This is not negative dentry. Always valid. */ 49 /* This is not negative dentry. Always valid. */
@@ -54,7 +54,7 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
54 54
55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) 55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
56{ 56{
57 if (nd->flags & LOOKUP_RCU) 57 if (nd && nd->flags & LOOKUP_RCU)
58 return -ECHILD; 58 return -ECHILD;
59 59
60 /* 60 /*
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed80..8bd0ef9286c3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,7 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
158{ 158{
159 struct inode *inode; 159 struct inode *inode;
160 160
161 if (nd->flags & LOOKUP_RCU) 161 if (nd && nd->flags & LOOKUP_RCU)
162 return -ECHILD; 162 return -ECHILD;
163 163
164 inode = entry->d_inode; 164 inode = entry->d_inode;
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1283 if (err) 1283 if (err)
1284 return err; 1284 return err;
1285 1285
1286 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) 1286 if (attr->ia_valid & ATTR_OPEN) {
1287 return 0; 1287 if (fc->atomic_o_trunc)
1288 return 0;
1289 file = NULL;
1290 }
1288 1291
1289 if (attr->ia_valid & ATTR_SIZE) 1292 if (attr->ia_valid & ATTR_SIZE)
1290 is_truncate = true; 1293 is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c826..9e0832dbb1e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
86 return ff; 86 return ff;
87} 87}
88 88
89static void fuse_release_async(struct work_struct *work)
90{
91 struct fuse_req *req;
92 struct fuse_conn *fc;
93 struct path path;
94
95 req = container_of(work, struct fuse_req, misc.release.work);
96 path = req->misc.release.path;
97 fc = get_fuse_conn(path.dentry->d_inode);
98
99 fuse_put_request(fc, req);
100 path_put(&path);
101}
102
89static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 103static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
90{ 104{
91 path_put(&req->misc.release.path); 105 if (fc->destroy_req) {
106 /*
107 * If this is a fuseblk mount, then it's possible that
108 * releasing the path will result in releasing the
109 * super block and sending the DESTROY request. If
110 * the server is single threaded, this would hang.
111 * For this reason do the path_put() in a separate
112 * thread.
113 */
114 atomic_inc(&req->count);
115 INIT_WORK(&req->misc.release.work, fuse_release_async);
116 schedule_work(&req->misc.release.work);
117 } else {
118 path_put(&req->misc.release.path);
119 }
92} 120}
93 121
94static void fuse_file_put(struct fuse_file *ff) 122static void fuse_file_put(struct fuse_file *ff, bool sync)
95{ 123{
96 if (atomic_dec_and_test(&ff->count)) { 124 if (atomic_dec_and_test(&ff->count)) {
97 struct fuse_req *req = ff->reserved_req; 125 struct fuse_req *req = ff->reserved_req;
98 126
99 req->end = fuse_release_end; 127 if (sync) {
100 fuse_request_send_background(ff->fc, req); 128 fuse_request_send(ff->fc, req);
129 path_put(&req->misc.release.path);
130 fuse_put_request(ff->fc, req);
131 } else {
132 req->end = fuse_release_end;
133 fuse_request_send_background(ff->fc, req);
134 }
101 kfree(ff); 135 kfree(ff);
102 } 136 }
103} 137}
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
219 * Normally this will send the RELEASE request, however if 253 * Normally this will send the RELEASE request, however if
220 * some asynchronous READ or WRITE requests are outstanding, 254 * some asynchronous READ or WRITE requests are outstanding,
221 * the sending will be delayed. 255 * the sending will be delayed.
256 *
257 * Make the release synchronous if this is a fuseblk mount,
258 * synchronous RELEASE is allowed (and desirable) in this case
259 * because the server can be trusted not to screw up.
222 */ 260 */
223 fuse_file_put(ff); 261 fuse_file_put(ff, ff->fc->destroy_req != NULL);
224} 262}
225 263
226static int fuse_open(struct inode *inode, struct file *file) 264static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
558 page_cache_release(page); 596 page_cache_release(page);
559 } 597 }
560 if (req->ff) 598 if (req->ff)
561 fuse_file_put(req->ff); 599 fuse_file_put(req->ff, false);
562} 600}
563 601
564static void fuse_send_readpages(struct fuse_req *req, struct file *file) 602static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1137static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) 1175static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1138{ 1176{
1139 __free_page(req->pages[0]); 1177 __free_page(req->pages[0]);
1140 fuse_file_put(req->ff); 1178 fuse_file_put(req->ff, false);
1141} 1179}
1142 1180
1143static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) 1181static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e9..d4286947bc2c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22#include <linux/rbtree.h> 22#include <linux/rbtree.h>
23#include <linux/poll.h> 23#include <linux/poll.h>
24#include <linux/workqueue.h>
24 25
25/** Max number of pages that can be used in a single read request */ 26/** Max number of pages that can be used in a single read request */
26#define FUSE_MAX_PAGES_PER_REQ 32 27#define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
262 /** Data for asynchronous requests */ 263 /** Data for asynchronous requests */
263 union { 264 union {
264 struct { 265 struct {
265 struct fuse_release_in in; 266 union {
267 struct fuse_release_in in;
268 struct work_struct work;
269 };
266 struct path path; 270 struct path path;
267 } release; 271 } release;
268 struct fuse_init_in init_in; 272 struct fuse_init_in init_in;
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4a456338b873..0da8da2c991d 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
44 int error; 44 int error;
45 int had_lock = 0; 45 int had_lock = 0;
46 46
47 if (nd->flags & LOOKUP_RCU) 47 if (nd && nd->flags & LOOKUP_RCU)
48 return -ECHILD; 48 return -ECHILD;
49 49
50 parent = dget_parent(dentry); 50 parent = dget_parent(dentry);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 08a8beb152e6..7cd9a5a68d59 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1779,11 +1779,11 @@ int __init gfs2_glock_init(void)
1779#endif 1779#endif
1780 1780
1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1782 WQ_HIGHPRI | WQ_FREEZEABLE, 0); 1782 WQ_HIGHPRI | WQ_FREEZABLE, 0);
1783 if (IS_ERR(glock_workqueue)) 1783 if (IS_ERR(glock_workqueue))
1784 return PTR_ERR(glock_workqueue); 1784 return PTR_ERR(glock_workqueue);
1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", 1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
1786 WQ_MEM_RECLAIM | WQ_FREEZEABLE, 1786 WQ_MEM_RECLAIM | WQ_FREEZABLE,
1787 0); 1787 0);
1788 if (IS_ERR(gfs2_delete_workqueue)) { 1788 if (IS_ERR(gfs2_delete_workqueue)) {
1789 destroy_workqueue(glock_workqueue); 1789 destroy_workqueue(glock_workqueue);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index ebef7ab6e17e..72c31a315d96 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -59,14 +59,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
59 struct address_space *mapping = (struct address_space *)(gl + 1); 59 struct address_space *mapping = (struct address_space *)(gl + 1);
60 60
61 gfs2_init_glock_once(gl); 61 gfs2_init_glock_once(gl);
62 memset(mapping, 0, sizeof(*mapping)); 62 address_space_init_once(mapping);
63 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
64 spin_lock_init(&mapping->tree_lock);
65 spin_lock_init(&mapping->i_mmap_lock);
66 INIT_LIST_HEAD(&mapping->private_list);
67 spin_lock_init(&mapping->private_lock);
68 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
69 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
70} 63}
71 64
72/** 65/**
@@ -144,7 +137,7 @@ static int __init init_gfs2_fs(void)
144 137
145 error = -ENOMEM; 138 error = -ENOMEM;
146 gfs_recovery_wq = alloc_workqueue("gfs_recovery", 139 gfs_recovery_wq = alloc_workqueue("gfs_recovery",
147 WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); 140 WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
148 if (!gfs_recovery_wq) 141 if (!gfs_recovery_wq)
149 goto fail_wq; 142 goto fail_wq;
150 143
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aaa2237..b4d70b13be92 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
238} 238}
239 239
240/* 240/*
241 * hfs_unlink() 241 * hfs_remove()
242 * 242 *
243 * This is the unlink() entry in the inode_operations structure for 243 * This serves as both unlink() and rmdir() in the inode_operations
244 * regular HFS directories. The purpose is to delete an existing 244 * structure for regular HFS directories. The purpose is to delete
245 * file, given the inode for the parent directory and the name 245 * an existing child, given the inode for the parent directory and
246 * (and its length) of the existing file. 246 * the name (and its length) of the existing directory.
247 */
248static int hfs_unlink(struct inode *dir, struct dentry *dentry)
249{
250 struct inode *inode;
251 int res;
252
253 inode = dentry->d_inode;
254 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
255 if (res)
256 return res;
257
258 drop_nlink(inode);
259 hfs_delete_inode(inode);
260 inode->i_ctime = CURRENT_TIME_SEC;
261 mark_inode_dirty(inode);
262
263 return res;
264}
265
266/*
267 * hfs_rmdir()
268 * 247 *
269 * This is the rmdir() entry in the inode_operations structure for 248 * HFS does not have hardlinks, so both rmdir and unlink set the
270 * regular HFS directories. The purpose is to delete an existing 249 * link count to 0. The only difference is the emptiness check.
271 * directory, given the inode for the parent directory and the name
272 * (and its length) of the existing directory.
273 */ 250 */
274static int hfs_rmdir(struct inode *dir, struct dentry *dentry) 251static int hfs_remove(struct inode *dir, struct dentry *dentry)
275{ 252{
276 struct inode *inode; 253 struct inode *inode = dentry->d_inode;
277 int res; 254 int res;
278 255
279 inode = dentry->d_inode; 256 if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
280 if (inode->i_size != 2)
281 return -ENOTEMPTY; 257 return -ENOTEMPTY;
282 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); 258 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
283 if (res) 259 if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
307 283
308 /* Unlink destination if it already exists */ 284 /* Unlink destination if it already exists */
309 if (new_dentry->d_inode) { 285 if (new_dentry->d_inode) {
310 res = hfs_unlink(new_dir, new_dentry); 286 res = hfs_remove(new_dir, new_dentry);
311 if (res) 287 if (res)
312 return res; 288 return res;
313 } 289 }
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
332const struct inode_operations hfs_dir_inode_operations = { 308const struct inode_operations hfs_dir_inode_operations = {
333 .create = hfs_create, 309 .create = hfs_create,
334 .lookup = hfs_lookup, 310 .lookup = hfs_lookup,
335 .unlink = hfs_unlink, 311 .unlink = hfs_remove,
336 .mkdir = hfs_mkdir, 312 .mkdir = hfs_mkdir,
337 .rmdir = hfs_rmdir, 313 .rmdir = hfs_remove,
338 .rename = hfs_rename, 314 .rename = hfs_rename,
339 .setattr = hfs_inode_setattr, 315 .setattr = hfs_inode_setattr,
340}; 316};
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..0647d80accf6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -295,6 +295,20 @@ static void destroy_inode(struct inode *inode)
295 call_rcu(&inode->i_rcu, i_callback); 295 call_rcu(&inode->i_rcu, i_callback);
296} 296}
297 297
298void address_space_init_once(struct address_space *mapping)
299{
300 memset(mapping, 0, sizeof(*mapping));
301 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
302 spin_lock_init(&mapping->tree_lock);
303 spin_lock_init(&mapping->i_mmap_lock);
304 INIT_LIST_HEAD(&mapping->private_list);
305 spin_lock_init(&mapping->private_lock);
306 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
307 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
308 mutex_init(&mapping->unmap_mutex);
309}
310EXPORT_SYMBOL(address_space_init_once);
311
298/* 312/*
299 * These are initializations that only need to be done 313 * These are initializations that only need to be done
300 * once, because the fields are idempotent across use 314 * once, because the fields are idempotent across use
@@ -308,13 +322,7 @@ void inode_init_once(struct inode *inode)
308 INIT_LIST_HEAD(&inode->i_devices); 322 INIT_LIST_HEAD(&inode->i_devices);
309 INIT_LIST_HEAD(&inode->i_wb_list); 323 INIT_LIST_HEAD(&inode->i_wb_list);
310 INIT_LIST_HEAD(&inode->i_lru); 324 INIT_LIST_HEAD(&inode->i_lru);
311 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 325 address_space_init_once(&inode->i_data);
312 spin_lock_init(&inode->i_data.tree_lock);
313 spin_lock_init(&inode->i_data.i_mmap_lock);
314 INIT_LIST_HEAD(&inode->i_data.private_list);
315 spin_lock_init(&inode->i_data.private_lock);
316 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
317 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
318 i_size_ordered_init(inode); 326 i_size_ordered_init(inode);
319#ifdef CONFIG_FSNOTIFY 327#ifdef CONFIG_FSNOTIFY
320 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 328 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -540,11 +548,14 @@ void evict_inodes(struct super_block *sb)
540/** 548/**
541 * invalidate_inodes - attempt to free all inodes on a superblock 549 * invalidate_inodes - attempt to free all inodes on a superblock
542 * @sb: superblock to operate on 550 * @sb: superblock to operate on
551 * @kill_dirty: flag to guide handling of dirty inodes
543 * 552 *
544 * Attempts to free all inodes for a given superblock. If there were any 553 * Attempts to free all inodes for a given superblock. If there were any
545 * busy inodes return a non-zero value, else zero. 554 * busy inodes return a non-zero value, else zero.
555 * If @kill_dirty is set, discard dirty inodes too, otherwise treat
556 * them as busy.
546 */ 557 */
547int invalidate_inodes(struct super_block *sb) 558int invalidate_inodes(struct super_block *sb, bool kill_dirty)
548{ 559{
549 int busy = 0; 560 int busy = 0;
550 struct inode *inode, *next; 561 struct inode *inode, *next;
@@ -556,6 +567,10 @@ int invalidate_inodes(struct super_block *sb)
556 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 567 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
557 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 568 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
558 continue; 569 continue;
570 if (inode->i_state & I_DIRTY && !kill_dirty) {
571 busy = 1;
572 continue;
573 }
559 if (atomic_read(&inode->i_count)) { 574 if (atomic_read(&inode->i_count)) {
560 busy = 1; 575 busy = 1;
561 continue; 576 continue;
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b1247..9b976b57d7fe 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -112,4 +112,4 @@ extern void release_open_intent(struct nameidata *);
112 */ 112 */
113extern int get_nr_dirty_inodes(void); 113extern int get_nr_dirty_inodes(void);
114extern void evict_inodes(struct super_block *); 114extern void evict_inodes(struct super_block *);
115extern int invalidate_inodes(struct super_block *); 115extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9e4686900f18..97e73469b2c4 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal)
473} 473}
474 474
475/* 475/*
476 * Called under j_state_lock. Returns true if a transaction commit was started. 476 * Called with j_state_lock locked for writing.
477 * Returns true if a transaction commit was started.
477 */ 478 */
478int __jbd2_log_start_commit(journal_t *journal, tid_t target) 479int __jbd2_log_start_commit(journal_t *journal, tid_t target)
479{ 480{
@@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
520{ 521{
521 transaction_t *transaction = NULL; 522 transaction_t *transaction = NULL;
522 tid_t tid; 523 tid_t tid;
524 int need_to_start = 0;
523 525
524 read_lock(&journal->j_state_lock); 526 read_lock(&journal->j_state_lock);
525 if (journal->j_running_transaction && !current->journal_info) { 527 if (journal->j_running_transaction && !current->journal_info) {
526 transaction = journal->j_running_transaction; 528 transaction = journal->j_running_transaction;
527 __jbd2_log_start_commit(journal, transaction->t_tid); 529 if (!tid_geq(journal->j_commit_request, transaction->t_tid))
530 need_to_start = 1;
528 } else if (journal->j_committing_transaction) 531 } else if (journal->j_committing_transaction)
529 transaction = journal->j_committing_transaction; 532 transaction = journal->j_committing_transaction;
530 533
@@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
535 538
536 tid = transaction->t_tid; 539 tid = transaction->t_tid;
537 read_unlock(&journal->j_state_lock); 540 read_unlock(&journal->j_state_lock);
541 if (need_to_start)
542 jbd2_log_start_commit(journal, tid);
538 jbd2_log_wait_commit(journal, tid); 543 jbd2_log_wait_commit(journal, tid);
539 return 1; 544 return 1;
540} 545}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index faad2bd787c7..1d1191050f99 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction)
117static int start_this_handle(journal_t *journal, handle_t *handle, 117static int start_this_handle(journal_t *journal, handle_t *handle,
118 int gfp_mask) 118 int gfp_mask)
119{ 119{
120 transaction_t *transaction; 120 transaction_t *transaction, *new_transaction = NULL;
121 int needed; 121 tid_t tid;
122 int nblocks = handle->h_buffer_credits; 122 int needed, need_to_start;
123 transaction_t *new_transaction = NULL; 123 int nblocks = handle->h_buffer_credits;
124 124
125 if (nblocks > journal->j_max_transaction_buffers) { 125 if (nblocks > journal->j_max_transaction_buffers) {
126 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 126 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -222,8 +222,11 @@ repeat:
222 atomic_sub(nblocks, &transaction->t_outstanding_credits); 222 atomic_sub(nblocks, &transaction->t_outstanding_credits);
223 prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 223 prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
224 TASK_UNINTERRUPTIBLE); 224 TASK_UNINTERRUPTIBLE);
225 __jbd2_log_start_commit(journal, transaction->t_tid); 225 tid = transaction->t_tid;
226 need_to_start = !tid_geq(journal->j_commit_request, tid);
226 read_unlock(&journal->j_state_lock); 227 read_unlock(&journal->j_state_lock);
228 if (need_to_start)
229 jbd2_log_start_commit(journal, tid);
227 schedule(); 230 schedule();
228 finish_wait(&journal->j_wait_transaction_locked, &wait); 231 finish_wait(&journal->j_wait_transaction_locked, &wait);
229 goto repeat; 232 goto repeat;
@@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
442{ 445{
443 transaction_t *transaction = handle->h_transaction; 446 transaction_t *transaction = handle->h_transaction;
444 journal_t *journal = transaction->t_journal; 447 journal_t *journal = transaction->t_journal;
445 int ret; 448 tid_t tid;
449 int need_to_start, ret;
446 450
447 /* If we've had an abort of any type, don't even think about 451 /* If we've had an abort of any type, don't even think about
448 * actually doing the restart! */ 452 * actually doing the restart! */
@@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
465 spin_unlock(&transaction->t_handle_lock); 469 spin_unlock(&transaction->t_handle_lock);
466 470
467 jbd_debug(2, "restarting handle %p\n", handle); 471 jbd_debug(2, "restarting handle %p\n", handle);
468 __jbd2_log_start_commit(journal, transaction->t_tid); 472 tid = transaction->t_tid;
473 need_to_start = !tid_geq(journal->j_commit_request, tid);
469 read_unlock(&journal->j_state_lock); 474 read_unlock(&journal->j_state_lock);
475 if (need_to_start)
476 jbd2_log_start_commit(journal, tid);
470 477
471 lock_map_release(&handle->h_lockdep_map); 478 lock_map_release(&handle->h_lockdep_map);
472 handle->h_buffer_credits = nblocks; 479 handle->h_buffer_credits = nblocks;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 81ead850ddb6..5a2b269428a6 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1600,7 +1600,7 @@ out:
1600 1600
1601static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) 1601static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1602{ 1602{
1603 if (nd->flags & LOOKUP_RCU) 1603 if (nd && nd->flags & LOOKUP_RCU)
1604 return -ECHILD; 1604 return -ECHILD;
1605 /* 1605 /*
1606 * This is not negative dentry. Always valid. 1606 * This is not negative dentry. Always valid.
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337ddfdbf..6e6777f1b4b2 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
213 new_de = minix_find_entry(new_dentry, &new_page); 213 new_de = minix_find_entry(new_dentry, &new_page);
214 if (!new_de) 214 if (!new_de)
215 goto out_dir; 215 goto out_dir;
216 inode_inc_link_count(old_inode);
217 minix_set_link(new_de, new_page, old_inode); 216 minix_set_link(new_de, new_page, old_inode);
218 new_inode->i_ctime = CURRENT_TIME_SEC; 217 new_inode->i_ctime = CURRENT_TIME_SEC;
219 if (dir_de) 218 if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
225 if (new_dir->i_nlink >= info->s_link_max) 224 if (new_dir->i_nlink >= info->s_link_max)
226 goto out_dir; 225 goto out_dir;
227 } 226 }
228 inode_inc_link_count(old_inode);
229 err = minix_add_link(new_dentry, old_inode); 227 err = minix_add_link(new_dentry, old_inode);
230 if (err) { 228 if (err)
231 inode_dec_link_count(old_inode);
232 goto out_dir; 229 goto out_dir;
233 }
234 if (dir_de) 230 if (dir_de)
235 inode_inc_link_count(new_dir); 231 inode_inc_link_count(new_dir);
236 } 232 }
237 233
238 minix_delete_entry(old_de, old_page); 234 minix_delete_entry(old_de, old_page);
239 inode_dec_link_count(old_inode); 235 mark_inode_dirty(old_inode);
240 236
241 if (dir_de) { 237 if (dir_de) {
242 minix_set_link(dir_de, dir_page, new_dir); 238 minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 7d77f24d32a9..a4689eb2df28 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -455,14 +455,6 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
455 struct fs_struct *fs = current->fs; 455 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 456 struct dentry *parent = nd->path.dentry;
457 457
458 /*
459 * It can be possible to revalidate the dentry that we started
460 * the path walk with. force_reval_path may also revalidate the
461 * dentry already committed to the nameidata.
462 */
463 if (unlikely(parent == dentry))
464 return nameidata_drop_rcu(nd);
465
466 BUG_ON(!(nd->flags & LOOKUP_RCU)); 458 BUG_ON(!(nd->flags & LOOKUP_RCU));
467 if (nd->root.mnt) { 459 if (nd->root.mnt) {
468 spin_lock(&fs->lock); 460 spin_lock(&fs->lock);
@@ -561,39 +553,25 @@ static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
561 */ 553 */
562void release_open_intent(struct nameidata *nd) 554void release_open_intent(struct nameidata *nd)
563{ 555{
564 if (nd->intent.open.file->f_path.dentry == NULL) 556 struct file *file = nd->intent.open.file;
565 put_filp(nd->intent.open.file);
566 else
567 fput(nd->intent.open.file);
568}
569
570/*
571 * Call d_revalidate and handle filesystems that request rcu-walk
572 * to be dropped. This may be called and return in rcu-walk mode,
573 * regardless of success or error. If -ECHILD is returned, the caller
574 * must return -ECHILD back up the path walk stack so path walk may
575 * be restarted in ref-walk mode.
576 */
577static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
578{
579 int status;
580 557
581 status = dentry->d_op->d_revalidate(dentry, nd); 558 if (file && !IS_ERR(file)) {
582 if (status == -ECHILD) { 559 if (file->f_path.dentry == NULL)
583 if (nameidata_dentry_drop_rcu(nd, dentry)) 560 put_filp(file);
584 return status; 561 else
585 status = dentry->d_op->d_revalidate(dentry, nd); 562 fput(file);
586 } 563 }
564}
587 565
588 return status; 566static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
567{
568 return dentry->d_op->d_revalidate(dentry, nd);
589} 569}
590 570
591static inline struct dentry * 571static struct dentry *
592do_revalidate(struct dentry *dentry, struct nameidata *nd) 572do_revalidate(struct dentry *dentry, struct nameidata *nd)
593{ 573{
594 int status; 574 int status = d_revalidate(dentry, nd);
595
596 status = d_revalidate(dentry, nd);
597 if (unlikely(status <= 0)) { 575 if (unlikely(status <= 0)) {
598 /* 576 /*
599 * The dentry failed validation. 577 * The dentry failed validation.
@@ -602,24 +580,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
602 * to return a fail status. 580 * to return a fail status.
603 */ 581 */
604 if (status < 0) { 582 if (status < 0) {
605 /* If we're in rcu-walk, we don't have a ref */ 583 dput(dentry);
606 if (!(nd->flags & LOOKUP_RCU))
607 dput(dentry);
608 dentry = ERR_PTR(status); 584 dentry = ERR_PTR(status);
609 585 } else if (!d_invalidate(dentry)) {
610 } else { 586 dput(dentry);
611 /* Don't d_invalidate in rcu-walk mode */ 587 dentry = NULL;
612 if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
613 return ERR_PTR(-ECHILD);
614 if (!d_invalidate(dentry)) {
615 dput(dentry);
616 dentry = NULL;
617 }
618 } 588 }
619 } 589 }
620 return dentry; 590 return dentry;
621} 591}
622 592
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
623static inline int need_reval_dot(struct dentry *dentry) 616static inline int need_reval_dot(struct dentry *dentry)
624{ 617{
625 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) 618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
@@ -664,9 +657,6 @@ force_reval_path(struct path *path, struct nameidata *nd)
664 return 0; 657 return 0;
665 658
666 if (!status) { 659 if (!status) {
667 /* Don't d_invalidate in rcu-walk mode */
668 if (nameidata_drop_rcu(nd))
669 return -ECHILD;
670 d_invalidate(dentry); 660 d_invalidate(dentry);
671 status = -ESTALE; 661 status = -ESTALE;
672 } 662 }
@@ -773,6 +763,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
773 int error; 763 int error;
774 struct dentry *dentry = link->dentry; 764 struct dentry *dentry = link->dentry;
775 765
766 BUG_ON(nd->flags & LOOKUP_RCU);
767
776 touch_atime(link->mnt, dentry); 768 touch_atime(link->mnt, dentry);
777 nd_set_link(nd, NULL); 769 nd_set_link(nd, NULL);
778 770
@@ -803,10 +795,16 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
803 * Without that kind of total limit, nasty chains of consecutive 795 * Without that kind of total limit, nasty chains of consecutive
804 * symlinks can cause almost arbitrarily long lookups. 796 * symlinks can cause almost arbitrarily long lookups.
805 */ 797 */
806static inline int do_follow_link(struct path *path, struct nameidata *nd) 798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
807{ 799{
808 void *cookie; 800 void *cookie;
809 int err = -ELOOP; 801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
810 if (current->link_count >= MAX_NESTED_LINKS) 808 if (current->link_count >= MAX_NESTED_LINKS)
811 goto loop; 809 goto loop;
812 if (current->total_link_count >= 40) 810 if (current->total_link_count >= 40)
@@ -1251,9 +1249,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1251 return -ECHILD; 1249 return -ECHILD;
1252 1250
1253 nd->seq = seq; 1251 nd->seq = seq;
1254 if (dentry->d_flags & DCACHE_OP_REVALIDATE) 1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1255 goto need_revalidate; 1253 dentry = do_revalidate_rcu(dentry, nd);
1256done2: 1254 if (!dentry)
1255 goto need_lookup;
1256 if (IS_ERR(dentry))
1257 goto fail;
1258 if (!(nd->flags & LOOKUP_RCU))
1259 goto done;
1260 }
1257 path->mnt = mnt; 1261 path->mnt = mnt;
1258 path->dentry = dentry; 1262 path->dentry = dentry;
1259 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1263 if (likely(__follow_mount_rcu(nd, path, inode, false)))
@@ -1266,8 +1270,13 @@ done2:
1266 if (!dentry) 1270 if (!dentry)
1267 goto need_lookup; 1271 goto need_lookup;
1268found: 1272found:
1269 if (dentry->d_flags & DCACHE_OP_REVALIDATE) 1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1270 goto need_revalidate; 1274 dentry = do_revalidate(dentry, nd);
1275 if (!dentry)
1276 goto need_lookup;
1277 if (IS_ERR(dentry))
1278 goto fail;
1279 }
1271done: 1280done:
1272 path->mnt = mnt; 1281 path->mnt = mnt;
1273 path->dentry = dentry; 1282 path->dentry = dentry;
@@ -1309,16 +1318,6 @@ need_lookup:
1309 mutex_unlock(&dir->i_mutex); 1318 mutex_unlock(&dir->i_mutex);
1310 goto found; 1319 goto found;
1311 1320
1312need_revalidate:
1313 dentry = do_revalidate(dentry, nd);
1314 if (!dentry)
1315 goto need_lookup;
1316 if (IS_ERR(dentry))
1317 goto fail;
1318 if (nd->flags & LOOKUP_RCU)
1319 goto done2;
1320 goto done;
1321
1322fail: 1321fail:
1323 return PTR_ERR(dentry); 1322 return PTR_ERR(dentry);
1324} 1323}
@@ -1415,11 +1414,7 @@ exec_again:
1415 goto out_dput; 1414 goto out_dput;
1416 1415
1417 if (inode->i_op->follow_link) { 1416 if (inode->i_op->follow_link) {
1418 /* We commonly drop rcu-walk here */ 1417 err = do_follow_link(inode, &next, nd);
1419 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1420 return -ECHILD;
1421 BUG_ON(inode != next.dentry->d_inode);
1422 err = do_follow_link(&next, nd);
1423 if (err) 1418 if (err)
1424 goto return_err; 1419 goto return_err;
1425 nd->inode = nd->path.dentry->d_inode; 1420 nd->inode = nd->path.dentry->d_inode;
@@ -1463,10 +1458,7 @@ last_component:
1463 break; 1458 break;
1464 if (inode && unlikely(inode->i_op->follow_link) && 1459 if (inode && unlikely(inode->i_op->follow_link) &&
1465 (lookup_flags & LOOKUP_FOLLOW)) { 1460 (lookup_flags & LOOKUP_FOLLOW)) {
1466 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) 1461 err = do_follow_link(inode, &next, nd);
1467 return -ECHILD;
1468 BUG_ON(inode != next.dentry->d_inode);
1469 err = do_follow_link(&next, nd);
1470 if (err) 1462 if (err)
1471 goto return_err; 1463 goto return_err;
1472 nd->inode = nd->path.dentry->d_inode; 1464 nd->inode = nd->path.dentry->d_inode;
@@ -1500,12 +1492,15 @@ return_reval:
1500 * We may need to check the cached dentry for staleness. 1492 * We may need to check the cached dentry for staleness.
1501 */ 1493 */
1502 if (need_reval_dot(nd->path.dentry)) { 1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1503 /* Note: we do not d_invalidate() */ 1497 /* Note: we do not d_invalidate() */
1504 err = d_revalidate(nd->path.dentry, nd); 1498 err = d_revalidate(nd->path.dentry, nd);
1505 if (!err) 1499 if (!err)
1506 err = -ESTALE; 1500 err = -ESTALE;
1507 if (err < 0) 1501 if (err < 0)
1508 break; 1502 break;
1503 return 0;
1509 } 1504 }
1510return_base: 1505return_base:
1511 if (nameidata_drop_rcu_last_maybe(nd)) 1506 if (nameidata_drop_rcu_last_maybe(nd))
@@ -1551,6 +1546,7 @@ static int path_walk(const char *name, struct nameidata *nd)
1551 /* nd->path had been dropped */ 1546 /* nd->path had been dropped */
1552 current->total_link_count = 0; 1547 current->total_link_count = 0;
1553 nd->path = save; 1548 nd->path = save;
1549 nd->inode = save.dentry->d_inode;
1554 path_get(&nd->path); 1550 path_get(&nd->path);
1555 nd->flags |= LOOKUP_REVAL; 1551 nd->flags |= LOOKUP_REVAL;
1556 result = link_path_walk(name, nd); 1552 result = link_path_walk(name, nd);
@@ -2265,8 +2261,6 @@ static struct file *finish_open(struct nameidata *nd,
2265 return filp; 2261 return filp;
2266 2262
2267exit: 2263exit:
2268 if (!IS_ERR(nd->intent.open.file))
2269 release_open_intent(nd);
2270 path_put(&nd->path); 2264 path_put(&nd->path);
2271 return ERR_PTR(error); 2265 return ERR_PTR(error);
2272} 2266}
@@ -2389,8 +2383,6 @@ exit_mutex_unlock:
2389exit_dput: 2383exit_dput:
2390 path_put_conditional(path, nd); 2384 path_put_conditional(path, nd);
2391exit: 2385exit:
2392 if (!IS_ERR(nd->intent.open.file))
2393 release_open_intent(nd);
2394 path_put(&nd->path); 2386 path_put(&nd->path);
2395 return ERR_PTR(error); 2387 return ERR_PTR(error);
2396} 2388}
@@ -2464,21 +2456,29 @@ struct file *do_filp_open(int dfd, const char *pathname,
2464 /* !O_CREAT, simple open */ 2456 /* !O_CREAT, simple open */
2465 error = do_path_lookup(dfd, pathname, flags, &nd); 2457 error = do_path_lookup(dfd, pathname, flags, &nd);
2466 if (unlikely(error)) 2458 if (unlikely(error))
2467 goto out_filp; 2459 goto out_filp2;
2468 error = -ELOOP; 2460 error = -ELOOP;
2469 if (!(nd.flags & LOOKUP_FOLLOW)) { 2461 if (!(nd.flags & LOOKUP_FOLLOW)) {
2470 if (nd.inode->i_op->follow_link) 2462 if (nd.inode->i_op->follow_link)
2471 goto out_path; 2463 goto out_path2;
2472 } 2464 }
2473 error = -ENOTDIR; 2465 error = -ENOTDIR;
2474 if (nd.flags & LOOKUP_DIRECTORY) { 2466 if (nd.flags & LOOKUP_DIRECTORY) {
2475 if (!nd.inode->i_op->lookup) 2467 if (!nd.inode->i_op->lookup)
2476 goto out_path; 2468 goto out_path2;
2477 } 2469 }
2478 audit_inode(pathname, nd.path.dentry); 2470 audit_inode(pathname, nd.path.dentry);
2479 filp = finish_open(&nd, open_flag, acc_mode); 2471 filp = finish_open(&nd, open_flag, acc_mode);
2472out2:
2473 release_open_intent(&nd);
2480 return filp; 2474 return filp;
2481 2475
2476out_path2:
2477 path_put(&nd.path);
2478out_filp2:
2479 filp = ERR_PTR(error);
2480 goto out2;
2481
2482creat: 2482creat:
2483 /* OK, have to create the file. Find the parent. */ 2483 /* OK, have to create the file. Find the parent. */
2484 error = path_init_rcu(dfd, pathname, 2484 error = path_init_rcu(dfd, pathname,
@@ -2553,6 +2553,7 @@ out:
2553 path_put(&nd.root); 2553 path_put(&nd.root);
2554 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2554 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
2555 goto reval; 2555 goto reval;
2556 release_open_intent(&nd);
2556 return filp; 2557 return filp;
2557 2558
2558exit_dput: 2559exit_dput:
@@ -2560,8 +2561,6 @@ exit_dput:
2560out_path: 2561out_path:
2561 path_put(&nd.path); 2562 path_put(&nd.path);
2562out_filp: 2563out_filp:
2563 if (!IS_ERR(nd.intent.open.file))
2564 release_open_intent(&nd);
2565 filp = ERR_PTR(error); 2564 filp = ERR_PTR(error);
2566 goto out; 2565 goto out;
2567} 2566}
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b95371696..d1edf26025dc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1244,7 +1244,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1244 */ 1244 */
1245 br_write_lock(vfsmount_lock); 1245 br_write_lock(vfsmount_lock);
1246 if (mnt_get_count(mnt) != 2) { 1246 if (mnt_get_count(mnt) != 2) {
1247 br_write_lock(vfsmount_lock); 1247 br_write_unlock(vfsmount_lock);
1248 return -EBUSY; 1248 return -EBUSY;
1249 } 1249 }
1250 br_write_unlock(vfsmount_lock); 1250 br_write_unlock(vfsmount_lock);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb4..2f8e61816d75 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
37#include <linux/inet.h> 37#include <linux/inet.h>
38#include <linux/nfs_xdr.h> 38#include <linux/nfs_xdr.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h>
40 41
41#include <asm/system.h> 42#include <asm/system.h>
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
89 */ 90 */
90u64 nfs_compat_user_ino64(u64 fileid) 91u64 nfs_compat_user_ino64(u64 fileid)
91{ 92{
92 int ino; 93#ifdef CONFIG_COMPAT
94 compat_ulong_t ino;
95#else
96 unsigned long ino;
97#endif
93 98
94 if (enable_ino64) 99 if (enable_ino64)
95 return fileid; 100 return fileid;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a7474073148..1be36cf65bfc 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -298,6 +298,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
298#if defined(CONFIG_NFS_V4_1) 298#if defined(CONFIG_NFS_V4_1)
299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
301extern void nfs4_schedule_session_recovery(struct nfs4_session *);
302#else
303static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
304{
305}
301#endif /* CONFIG_NFS_V4_1 */ 306#endif /* CONFIG_NFS_V4_1 */
302 307
303extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 308extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +312,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
307extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); 312extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
308extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); 313extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
309extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 314extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
310extern void nfs4_schedule_state_recovery(struct nfs_client *); 315extern void nfs4_schedule_lease_recovery(struct nfs_client *);
311extern void nfs4_schedule_state_manager(struct nfs_client *); 316extern void nfs4_schedule_state_manager(struct nfs_client *);
312extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 317extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
313extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
314extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 318extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
315extern void nfs41_handle_recall_slot(struct nfs_client *clp); 319extern void nfs41_handle_recall_slot(struct nfs_client *clp);
316extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 320extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8cc..b73c34375f60 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -219,6 +219,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
219 goto out_err; 219 goto out_err;
220 } 220 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL); 221 buf = kmalloc(rlen + 1, GFP_KERNEL);
222 if (!buf) {
223 dprintk("%s: Not enough memory\n", __func__);
224 goto out_err;
225 }
222 buf[rlen] = '\0'; 226 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen); 227 memcpy(buf, r_addr, rlen);
224 228
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40ab..0a07e353a961 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -256,12 +256,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
256 case -NFS4ERR_OPENMODE: 256 case -NFS4ERR_OPENMODE:
257 if (state == NULL) 257 if (state == NULL)
258 break; 258 break;
259 nfs4_state_mark_reclaim_nograce(clp, state); 259 nfs4_schedule_stateid_recovery(server, state);
260 goto do_state_recovery; 260 goto wait_on_recovery;
261 case -NFS4ERR_STALE_STATEID: 261 case -NFS4ERR_STALE_STATEID:
262 case -NFS4ERR_STALE_CLIENTID: 262 case -NFS4ERR_STALE_CLIENTID:
263 case -NFS4ERR_EXPIRED: 263 case -NFS4ERR_EXPIRED:
264 goto do_state_recovery; 264 nfs4_schedule_lease_recovery(clp);
265 goto wait_on_recovery;
265#if defined(CONFIG_NFS_V4_1) 266#if defined(CONFIG_NFS_V4_1)
266 case -NFS4ERR_BADSESSION: 267 case -NFS4ERR_BADSESSION:
267 case -NFS4ERR_BADSLOT: 268 case -NFS4ERR_BADSLOT:
@@ -272,7 +273,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
272 case -NFS4ERR_SEQ_MISORDERED: 273 case -NFS4ERR_SEQ_MISORDERED:
273 dprintk("%s ERROR: %d Reset session\n", __func__, 274 dprintk("%s ERROR: %d Reset session\n", __func__,
274 errorcode); 275 errorcode);
275 nfs4_schedule_state_recovery(clp); 276 nfs4_schedule_session_recovery(clp->cl_session);
276 exception->retry = 1; 277 exception->retry = 1;
277 break; 278 break;
278#endif /* defined(CONFIG_NFS_V4_1) */ 279#endif /* defined(CONFIG_NFS_V4_1) */
@@ -295,8 +296,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
295 } 296 }
296 /* We failed to handle the error */ 297 /* We failed to handle the error */
297 return nfs4_map_errors(ret); 298 return nfs4_map_errors(ret);
298do_state_recovery: 299wait_on_recovery:
299 nfs4_schedule_state_recovery(clp);
300 ret = nfs4_wait_clnt_recover(clp); 300 ret = nfs4_wait_clnt_recover(clp);
301 if (ret == 0) 301 if (ret == 0)
302 exception->retry = 1; 302 exception->retry = 1;
@@ -435,8 +435,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
435 clp = res->sr_session->clp; 435 clp = res->sr_session->clp;
436 do_renew_lease(clp, timestamp); 436 do_renew_lease(clp, timestamp);
437 /* Check sequence flags */ 437 /* Check sequence flags */
438 if (atomic_read(&clp->cl_count) > 1) 438 if (res->sr_status_flags != 0)
439 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); 439 nfs4_schedule_lease_recovery(clp);
440 break; 440 break;
441 case -NFS4ERR_DELAY: 441 case -NFS4ERR_DELAY:
442 /* The server detected a resend of the RPC call and 442 /* The server detected a resend of the RPC call and
@@ -1255,14 +1255,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1255 case -NFS4ERR_BAD_HIGH_SLOT: 1255 case -NFS4ERR_BAD_HIGH_SLOT:
1256 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1256 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1257 case -NFS4ERR_DEADSESSION: 1257 case -NFS4ERR_DEADSESSION:
1258 nfs4_schedule_state_recovery( 1258 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
1259 server->nfs_client);
1260 goto out; 1259 goto out;
1261 case -NFS4ERR_STALE_CLIENTID: 1260 case -NFS4ERR_STALE_CLIENTID:
1262 case -NFS4ERR_STALE_STATEID: 1261 case -NFS4ERR_STALE_STATEID:
1263 case -NFS4ERR_EXPIRED: 1262 case -NFS4ERR_EXPIRED:
1264 /* Don't recall a delegation if it was lost */ 1263 /* Don't recall a delegation if it was lost */
1265 nfs4_schedule_state_recovery(server->nfs_client); 1264 nfs4_schedule_lease_recovery(server->nfs_client);
1266 goto out; 1265 goto out;
1267 case -ERESTARTSYS: 1266 case -ERESTARTSYS:
1268 /* 1267 /*
@@ -1271,7 +1270,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1271 */ 1270 */
1272 case -NFS4ERR_ADMIN_REVOKED: 1271 case -NFS4ERR_ADMIN_REVOKED:
1273 case -NFS4ERR_BAD_STATEID: 1272 case -NFS4ERR_BAD_STATEID:
1274 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1273 nfs4_schedule_stateid_recovery(server, state);
1275 case -EKEYEXPIRED: 1274 case -EKEYEXPIRED:
1276 /* 1275 /*
1277 * User RPCSEC_GSS context has expired. 1276 * User RPCSEC_GSS context has expired.
@@ -1587,7 +1586,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
1587 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && 1586 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1588 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) 1587 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1589 break; 1588 break;
1590 nfs4_schedule_state_recovery(clp); 1589 nfs4_schedule_state_manager(clp);
1591 ret = -EIO; 1590 ret = -EIO;
1592 } 1591 }
1593 return ret; 1592 return ret;
@@ -3178,7 +3177,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3178 if (task->tk_status < 0) { 3177 if (task->tk_status < 0) {
3179 /* Unless we're shutting down, schedule state recovery! */ 3178 /* Unless we're shutting down, schedule state recovery! */
3180 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) 3179 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
3181 nfs4_schedule_state_recovery(clp); 3180 nfs4_schedule_lease_recovery(clp);
3182 return; 3181 return;
3183 } 3182 }
3184 do_renew_lease(clp, timestamp); 3183 do_renew_lease(clp, timestamp);
@@ -3252,6 +3251,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
3252 } 3251 }
3253} 3252}
3254 3253
3254static int buf_to_pages_noslab(const void *buf, size_t buflen,
3255 struct page **pages, unsigned int *pgbase)
3256{
3257 struct page *newpage, **spages;
3258 int rc = 0;
3259 size_t len;
3260 spages = pages;
3261
3262 do {
3263 len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
3264 newpage = alloc_page(GFP_KERNEL);
3265
3266 if (newpage == NULL)
3267 goto unwind;
3268 memcpy(page_address(newpage), buf, len);
3269 buf += len;
3270 buflen -= len;
3271 *pages++ = newpage;
3272 rc++;
3273 } while (buflen != 0);
3274
3275 return rc;
3276
3277unwind:
3278 for(; rc > 0; rc--)
3279 __free_page(spages[rc-1]);
3280 return -ENOMEM;
3281}
3282
3255struct nfs4_cached_acl { 3283struct nfs4_cached_acl {
3256 int cached; 3284 int cached;
3257 size_t len; 3285 size_t len;
@@ -3420,13 +3448,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3420 .rpc_argp = &arg, 3448 .rpc_argp = &arg,
3421 .rpc_resp = &res, 3449 .rpc_resp = &res,
3422 }; 3450 };
3423 int ret; 3451 int ret, i;
3424 3452
3425 if (!nfs4_server_supports_acls(server)) 3453 if (!nfs4_server_supports_acls(server))
3426 return -EOPNOTSUPP; 3454 return -EOPNOTSUPP;
3455 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3456 if (i < 0)
3457 return i;
3427 nfs_inode_return_delegation(inode); 3458 nfs_inode_return_delegation(inode);
3428 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3429 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3459 ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
3460
3461 /*
3462 * Free each page after tx, so the only ref left is
3463 * held by the network stack
3464 */
3465 for (; i > 0; i--)
3466 put_page(pages[i-1]);
3467
3430 /* 3468 /*
3431 * Acl update can result in inode attribute update. 3469 * Acl update can result in inode attribute update.
3432 * so mark the attribute cache invalid. 3470 * so mark the attribute cache invalid.
@@ -3464,12 +3502,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3464 case -NFS4ERR_OPENMODE: 3502 case -NFS4ERR_OPENMODE:
3465 if (state == NULL) 3503 if (state == NULL)
3466 break; 3504 break;
3467 nfs4_state_mark_reclaim_nograce(clp, state); 3505 nfs4_schedule_stateid_recovery(server, state);
3468 goto do_state_recovery; 3506 goto wait_on_recovery;
3469 case -NFS4ERR_STALE_STATEID: 3507 case -NFS4ERR_STALE_STATEID:
3470 case -NFS4ERR_STALE_CLIENTID: 3508 case -NFS4ERR_STALE_CLIENTID:
3471 case -NFS4ERR_EXPIRED: 3509 case -NFS4ERR_EXPIRED:
3472 goto do_state_recovery; 3510 nfs4_schedule_lease_recovery(clp);
3511 goto wait_on_recovery;
3473#if defined(CONFIG_NFS_V4_1) 3512#if defined(CONFIG_NFS_V4_1)
3474 case -NFS4ERR_BADSESSION: 3513 case -NFS4ERR_BADSESSION:
3475 case -NFS4ERR_BADSLOT: 3514 case -NFS4ERR_BADSLOT:
@@ -3480,7 +3519,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3480 case -NFS4ERR_SEQ_MISORDERED: 3519 case -NFS4ERR_SEQ_MISORDERED:
3481 dprintk("%s ERROR %d, Reset session\n", __func__, 3520 dprintk("%s ERROR %d, Reset session\n", __func__,
3482 task->tk_status); 3521 task->tk_status);
3483 nfs4_schedule_state_recovery(clp); 3522 nfs4_schedule_session_recovery(clp->cl_session);
3484 task->tk_status = 0; 3523 task->tk_status = 0;
3485 return -EAGAIN; 3524 return -EAGAIN;
3486#endif /* CONFIG_NFS_V4_1 */ 3525#endif /* CONFIG_NFS_V4_1 */
@@ -3497,9 +3536,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3497 } 3536 }
3498 task->tk_status = nfs4_map_errors(task->tk_status); 3537 task->tk_status = nfs4_map_errors(task->tk_status);
3499 return 0; 3538 return 0;
3500do_state_recovery: 3539wait_on_recovery:
3501 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 3540 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
3502 nfs4_schedule_state_recovery(clp);
3503 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) 3541 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3504 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); 3542 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3505 task->tk_status = 0; 3543 task->tk_status = 0;
@@ -4110,7 +4148,7 @@ static void nfs4_lock_release(void *calldata)
4110 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, 4148 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
4111 data->arg.lock_seqid); 4149 data->arg.lock_seqid);
4112 if (!IS_ERR(task)) 4150 if (!IS_ERR(task))
4113 rpc_put_task(task); 4151 rpc_put_task_async(task);
4114 dprintk("%s: cancelling lock!\n", __func__); 4152 dprintk("%s: cancelling lock!\n", __func__);
4115 } else 4153 } else
4116 nfs_free_seqid(data->arg.lock_seqid); 4154 nfs_free_seqid(data->arg.lock_seqid);
@@ -4134,23 +4172,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
4134 4172
4135static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) 4173static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
4136{ 4174{
4137 struct nfs_client *clp = server->nfs_client;
4138 struct nfs4_state *state = lsp->ls_state;
4139
4140 switch (error) { 4175 switch (error) {
4141 case -NFS4ERR_ADMIN_REVOKED: 4176 case -NFS4ERR_ADMIN_REVOKED:
4142 case -NFS4ERR_BAD_STATEID: 4177 case -NFS4ERR_BAD_STATEID:
4143 case -NFS4ERR_EXPIRED: 4178 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4144 if (new_lock_owner != 0 || 4179 if (new_lock_owner != 0 ||
4145 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 4180 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4146 nfs4_state_mark_reclaim_nograce(clp, state); 4181 nfs4_schedule_stateid_recovery(server, lsp->ls_state);
4147 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4148 break; 4182 break;
4149 case -NFS4ERR_STALE_STATEID: 4183 case -NFS4ERR_STALE_STATEID:
4150 if (new_lock_owner != 0 ||
4151 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4152 nfs4_state_mark_reclaim_reboot(clp, state);
4153 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; 4184 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4185 case -NFS4ERR_EXPIRED:
4186 nfs4_schedule_lease_recovery(server->nfs_client);
4154 }; 4187 };
4155} 4188}
4156 4189
@@ -4366,12 +4399,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4366 case -NFS4ERR_EXPIRED: 4399 case -NFS4ERR_EXPIRED:
4367 case -NFS4ERR_STALE_CLIENTID: 4400 case -NFS4ERR_STALE_CLIENTID:
4368 case -NFS4ERR_STALE_STATEID: 4401 case -NFS4ERR_STALE_STATEID:
4402 nfs4_schedule_lease_recovery(server->nfs_client);
4403 goto out;
4369 case -NFS4ERR_BADSESSION: 4404 case -NFS4ERR_BADSESSION:
4370 case -NFS4ERR_BADSLOT: 4405 case -NFS4ERR_BADSLOT:
4371 case -NFS4ERR_BAD_HIGH_SLOT: 4406 case -NFS4ERR_BAD_HIGH_SLOT:
4372 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 4407 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4373 case -NFS4ERR_DEADSESSION: 4408 case -NFS4ERR_DEADSESSION:
4374 nfs4_schedule_state_recovery(server->nfs_client); 4409 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
4375 goto out; 4410 goto out;
4376 case -ERESTARTSYS: 4411 case -ERESTARTSYS:
4377 /* 4412 /*
@@ -4381,7 +4416,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4381 case -NFS4ERR_ADMIN_REVOKED: 4416 case -NFS4ERR_ADMIN_REVOKED:
4382 case -NFS4ERR_BAD_STATEID: 4417 case -NFS4ERR_BAD_STATEID:
4383 case -NFS4ERR_OPENMODE: 4418 case -NFS4ERR_OPENMODE:
4384 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4419 nfs4_schedule_stateid_recovery(server, state);
4385 err = 0; 4420 err = 0;
4386 goto out; 4421 goto out;
4387 case -EKEYEXPIRED: 4422 case -EKEYEXPIRED:
@@ -4988,10 +5023,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
4988 int status; 5023 int status;
4989 unsigned *ptr; 5024 unsigned *ptr;
4990 struct nfs4_session *session = clp->cl_session; 5025 struct nfs4_session *session = clp->cl_session;
5026 long timeout = 0;
5027 int err;
4991 5028
4992 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5029 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
4993 5030
4994 status = _nfs4_proc_create_session(clp); 5031 do {
5032 status = _nfs4_proc_create_session(clp);
5033 if (status == -NFS4ERR_DELAY) {
5034 err = nfs4_delay(clp->cl_rpcclient, &timeout);
5035 if (err)
5036 status = err;
5037 }
5038 } while (status == -NFS4ERR_DELAY);
5039
4995 if (status) 5040 if (status)
4996 goto out; 5041 goto out;
4997 5042
@@ -5100,7 +5145,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
5100 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5145 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5101 return -EAGAIN; 5146 return -EAGAIN;
5102 default: 5147 default:
5103 nfs4_schedule_state_recovery(clp); 5148 nfs4_schedule_lease_recovery(clp);
5104 } 5149 }
5105 return 0; 5150 return 0;
5106} 5151}
@@ -5187,7 +5232,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
5187 if (IS_ERR(task)) 5232 if (IS_ERR(task))
5188 ret = PTR_ERR(task); 5233 ret = PTR_ERR(task);
5189 else 5234 else
5190 rpc_put_task(task); 5235 rpc_put_task_async(task);
5191 dprintk("<-- %s status=%d\n", __func__, ret); 5236 dprintk("<-- %s status=%d\n", __func__, ret);
5192 return ret; 5237 return ret;
5193} 5238}
@@ -5203,8 +5248,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5203 goto out; 5248 goto out;
5204 } 5249 }
5205 ret = rpc_wait_for_completion_task(task); 5250 ret = rpc_wait_for_completion_task(task);
5206 if (!ret) 5251 if (!ret) {
5252 struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
5253
5254 if (task->tk_status == 0)
5255 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
5207 ret = task->tk_status; 5256 ret = task->tk_status;
5257 }
5208 rpc_put_task(task); 5258 rpc_put_task(task);
5209out: 5259out:
5210 dprintk("<-- %s status=%d\n", __func__, ret); 5260 dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5241,7 +5291,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5241 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5291 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5242 return -EAGAIN; 5292 return -EAGAIN;
5243 default: 5293 default:
5244 nfs4_schedule_state_recovery(clp); 5294 nfs4_schedule_lease_recovery(clp);
5245 } 5295 }
5246 return 0; 5296 return 0;
5247} 5297}
@@ -5309,6 +5359,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5309 status = PTR_ERR(task); 5359 status = PTR_ERR(task);
5310 goto out; 5360 goto out;
5311 } 5361 }
5362 status = nfs4_wait_for_completion_rpc_task(task);
5363 if (status == 0)
5364 status = task->tk_status;
5312 rpc_put_task(task); 5365 rpc_put_task(task);
5313 return 0; 5366 return 0;
5314out: 5367out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04c..0592288f9f06 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1007,9 +1007,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
1007} 1007}
1008 1008
1009/* 1009/*
1010 * Schedule a state recovery attempt 1010 * Schedule a lease recovery attempt
1011 */ 1011 */
1012void nfs4_schedule_state_recovery(struct nfs_client *clp) 1012void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1013{ 1013{
1014 if (!clp) 1014 if (!clp)
1015 return; 1015 return;
@@ -1018,7 +1018,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
1018 nfs4_schedule_state_manager(clp); 1018 nfs4_schedule_state_manager(clp);
1019} 1019}
1020 1020
1021int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) 1021static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1022{ 1022{
1023 1023
1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1032,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
1032 return 1; 1032 return 1;
1033} 1033}
1034 1034
1035int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) 1035static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1036{ 1036{
1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); 1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1041,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
1041 return 1; 1041 return 1;
1042} 1042}
1043 1043
1044void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1045{
1046 struct nfs_client *clp = server->nfs_client;
1047
1048 nfs4_state_mark_reclaim_nograce(clp, state);
1049 nfs4_schedule_state_manager(clp);
1050}
1051
1044static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1052static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1045{ 1053{
1046 struct inode *inode = state->inode; 1054 struct inode *inode = state->inode;
@@ -1436,10 +1444,15 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
1436} 1444}
1437 1445
1438#ifdef CONFIG_NFS_V4_1 1446#ifdef CONFIG_NFS_V4_1
1447void nfs4_schedule_session_recovery(struct nfs4_session *session)
1448{
1449 nfs4_schedule_lease_recovery(session->clp);
1450}
1451
1439void nfs41_handle_recall_slot(struct nfs_client *clp) 1452void nfs41_handle_recall_slot(struct nfs_client *clp)
1440{ 1453{
1441 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1454 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1442 nfs4_schedule_state_recovery(clp); 1455 nfs4_schedule_state_manager(clp);
1443} 1456}
1444 1457
1445static void nfs4_reset_all_state(struct nfs_client *clp) 1458static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1460,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
1447 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1460 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1448 clp->cl_boot_time = CURRENT_TIME; 1461 clp->cl_boot_time = CURRENT_TIME;
1449 nfs4_state_start_reclaim_nograce(clp); 1462 nfs4_state_start_reclaim_nograce(clp);
1450 nfs4_schedule_state_recovery(clp); 1463 nfs4_schedule_state_manager(clp);
1451 } 1464 }
1452} 1465}
1453 1466
@@ -1455,7 +1468,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
1455{ 1468{
1456 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1469 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1457 nfs4_state_start_reclaim_reboot(clp); 1470 nfs4_state_start_reclaim_reboot(clp);
1458 nfs4_schedule_state_recovery(clp); 1471 nfs4_schedule_state_manager(clp);
1459 } 1472 }
1460} 1473}
1461 1474
@@ -1475,7 +1488,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1475{ 1488{
1476 nfs_expire_all_delegations(clp); 1489 nfs_expire_all_delegations(clp);
1477 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) 1490 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1478 nfs4_schedule_state_recovery(clp); 1491 nfs4_schedule_state_manager(clp);
1479} 1492}
1480 1493
1481void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1494void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee9..94d50e86a124 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1660,7 +1660,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1660 1660
1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); 1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1662 *p++ = cpu_to_be32(OP_CREATE_SESSION); 1662 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1663 p = xdr_encode_hyper(p, clp->cl_ex_clid); 1663 p = xdr_encode_hyper(p, clp->cl_clientid);
1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ 1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1665 *p++ = cpu_to_be32(args->flags); /*flags */ 1665 *p++ = cpu_to_be32(args->flags); /*flags */
1666 1666
@@ -4694,7 +4694,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4694 p = xdr_inline_decode(xdr, 8); 4694 p = xdr_inline_decode(xdr, 8);
4695 if (unlikely(!p)) 4695 if (unlikely(!p))
4696 goto out_overflow; 4696 goto out_overflow;
4697 xdr_decode_hyper(p, &clp->cl_ex_clid); 4697 xdr_decode_hyper(p, &clp->cl_clientid);
4698 p = xdr_inline_decode(xdr, 12); 4698 p = xdr_inline_decode(xdr, 12);
4699 if (unlikely(!p)) 4699 if (unlikely(!p))
4700 goto out_overflow; 4700 goto out_overflow;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a20023..c541093a5bf2 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
86/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
87#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
88 88
89/* Default NFSROOT mount options. */
90#define NFS_DEF_OPTIONS "udp"
91
89/* Parameters passed from the kernel command line */ 92/* Parameters passed from the kernel command line */
90static char nfs_root_parms[256] __initdata = ""; 93static char nfs_root_parms[256] __initdata = "";
91 94
92/* Text-based mount options passed to super.c */ 95/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = ""; 96static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
94 97
95/* Address of NFS server */ 98/* Address of NFS server */
96static __be32 servaddr __initdata = htonl(INADDR_NONE); 99static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
160} 163}
161 164
162static int __init root_nfs_cat(char *dest, const char *src, 165static int __init root_nfs_cat(char *dest, const char *src,
163 const size_t destlen) 166 const size_t destlen)
164{ 167{
168 size_t len = strlen(dest);
169
170 if (len && dest[len - 1] != ',')
171 if (strlcat(dest, ",", destlen) > destlen)
172 return -1;
173
165 if (strlcat(dest, src, destlen) > destlen) 174 if (strlcat(dest, src, destlen) > destlen)
166 return -1; 175 return -1;
167 return 0; 176 return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
194 if (root_nfs_cat(nfs_root_options, incoming, 203 if (root_nfs_cat(nfs_root_options, incoming,
195 sizeof(nfs_root_options))) 204 sizeof(nfs_root_options)))
196 return -1; 205 return -1;
197
198 /*
199 * Possibly prepare for more options to be appended
200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
206
207 return 0; 206 return 0;
208} 207}
209 208
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
217 */ 216 */
218static int __init root_nfs_data(char *cmdline) 217static int __init root_nfs_data(char *cmdline)
219{ 218{
220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; 219 char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
221 int len, retval = -1; 220 int len, retval = -1;
222 char *tmp = NULL; 221 char *tmp = NULL;
223 const size_t tmplen = sizeof(nfs_export_path); 222 const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
244 * Append mandatory options for nfsroot so they override 243 * Append mandatory options for nfsroot so they override
245 * what has come before 244 * what has come before
246 */ 245 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4", 246 snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
248 &servaddr); 247 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option, 248 if (root_nfs_cat(nfs_root_options, mand_options,
250 sizeof(nfs_root_options))) 249 sizeof(nfs_root_options)))
251 goto out_optionstoolong; 250 goto out_optionstoolong;
252 251
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd1..6481d537d69d 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -180,7 +180,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
180 task_setup_data.rpc_client = NFS_CLIENT(dir); 180 task_setup_data.rpc_client = NFS_CLIENT(dir);
181 task = rpc_run_task(&task_setup_data); 181 task = rpc_run_task(&task_setup_data);
182 if (!IS_ERR(task)) 182 if (!IS_ERR(task))
183 rpc_put_task(task); 183 rpc_put_task_async(task);
184 return 1; 184 return 1;
185} 185}
186 186
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046cb..42b92d7a9cc4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1292,6 +1292,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1292 task = rpc_run_task(&task_setup_data); 1292 task = rpc_run_task(&task_setup_data);
1293 if (IS_ERR(task)) 1293 if (IS_ERR(task))
1294 return PTR_ERR(task); 1294 return PTR_ERR(task);
1295 if (how & FLUSH_SYNC)
1296 rpc_wait_for_completion_task(task);
1295 rpc_put_task(task); 1297 rpc_put_task(task);
1296 return 0; 1298 return 0;
1297} 1299}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3be975e18919..02eb4edf0ece 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,7 +432,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
432 * If the server returns different values for sessionID, slotID or 432 * If the server returns different values for sessionID, slotID or
433 * sequence number, the server is looney tunes. 433 * sequence number, the server is looney tunes.
434 */ 434 */
435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4); 435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
436 if (unlikely(p == NULL)) 436 if (unlikely(p == NULL))
437 goto out_overflow; 437 goto out_overflow;
438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
@@ -484,7 +484,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
484out: 484out:
485 return status; 485 return status;
486out_default: 486out_default:
487 return nfs_cb_stat_to_errno(status); 487 return nfs_cb_stat_to_errno(nfserr);
488} 488}
489 489
490/* 490/*
@@ -564,11 +564,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
564 if (unlikely(status)) 564 if (unlikely(status))
565 goto out; 565 goto out;
566 if (unlikely(nfserr != NFS4_OK)) 566 if (unlikely(nfserr != NFS4_OK))
567 goto out_default; 567 status = nfs_cb_stat_to_errno(nfserr);
568out: 568out:
569 return status; 569 return status;
570out_default:
571 return nfs_cb_stat_to_errno(status);
572} 570}
573 571
574/* 572/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d98d0213285d..7b566ec14e18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -230,9 +230,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
230 dp->dl_client = clp; 230 dp->dl_client = clp;
231 get_nfs4_file(fp); 231 get_nfs4_file(fp);
232 dp->dl_file = fp; 232 dp->dl_file = fp;
233 dp->dl_vfs_file = find_readable_file(fp);
234 get_file(dp->dl_vfs_file);
235 dp->dl_flock = NULL;
236 dp->dl_type = type; 233 dp->dl_type = type;
237 dp->dl_stateid.si_boot = boot_time; 234 dp->dl_stateid.si_boot = boot_time;
238 dp->dl_stateid.si_stateownerid = current_delegid++; 235 dp->dl_stateid.si_stateownerid = current_delegid++;
@@ -241,8 +238,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
241 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle); 238 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
242 dp->dl_time = 0; 239 dp->dl_time = 0;
243 atomic_set(&dp->dl_count, 1); 240 atomic_set(&dp->dl_count, 1);
244 list_add(&dp->dl_perfile, &fp->fi_delegations);
245 list_add(&dp->dl_perclnt, &clp->cl_delegations);
246 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); 241 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
247 return dp; 242 return dp;
248} 243}
@@ -253,36 +248,30 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
253 if (atomic_dec_and_test(&dp->dl_count)) { 248 if (atomic_dec_and_test(&dp->dl_count)) {
254 dprintk("NFSD: freeing dp %p\n",dp); 249 dprintk("NFSD: freeing dp %p\n",dp);
255 put_nfs4_file(dp->dl_file); 250 put_nfs4_file(dp->dl_file);
256 fput(dp->dl_vfs_file);
257 kmem_cache_free(deleg_slab, dp); 251 kmem_cache_free(deleg_slab, dp);
258 num_delegations--; 252 num_delegations--;
259 } 253 }
260} 254}
261 255
262/* Remove the associated file_lock first, then remove the delegation. 256static void nfs4_put_deleg_lease(struct nfs4_file *fp)
263 * lease_modify() is called to remove the FS_LEASE file_lock from
264 * the i_flock list, eventually calling nfsd's lock_manager
265 * fl_release_callback.
266 */
267static void
268nfs4_close_delegation(struct nfs4_delegation *dp)
269{ 257{
270 dprintk("NFSD: close_delegation dp %p\n",dp); 258 if (atomic_dec_and_test(&fp->fi_delegees)) {
271 /* XXX: do we even need this check?: */ 259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
272 if (dp->dl_flock) 260 fp->fi_lease = NULL;
273 vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock); 261 fp->fi_deleg_file = NULL;
262 }
274} 263}
275 264
276/* Called under the state lock. */ 265/* Called under the state lock. */
277static void 266static void
278unhash_delegation(struct nfs4_delegation *dp) 267unhash_delegation(struct nfs4_delegation *dp)
279{ 268{
280 list_del_init(&dp->dl_perfile);
281 list_del_init(&dp->dl_perclnt); 269 list_del_init(&dp->dl_perclnt);
282 spin_lock(&recall_lock); 270 spin_lock(&recall_lock);
271 list_del_init(&dp->dl_perfile);
283 list_del_init(&dp->dl_recall_lru); 272 list_del_init(&dp->dl_recall_lru);
284 spin_unlock(&recall_lock); 273 spin_unlock(&recall_lock);
285 nfs4_close_delegation(dp); 274 nfs4_put_deleg_lease(dp->dl_file);
286 nfs4_put_delegation(dp); 275 nfs4_put_delegation(dp);
287} 276}
288 277
@@ -958,8 +947,6 @@ expire_client(struct nfs4_client *clp)
958 spin_lock(&recall_lock); 947 spin_lock(&recall_lock);
959 while (!list_empty(&clp->cl_delegations)) { 948 while (!list_empty(&clp->cl_delegations)) {
960 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); 949 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
961 dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
962 dp->dl_flock);
963 list_del_init(&dp->dl_perclnt); 950 list_del_init(&dp->dl_perclnt);
964 list_move(&dp->dl_recall_lru, &reaplist); 951 list_move(&dp->dl_recall_lru, &reaplist);
965 } 952 }
@@ -2078,6 +2065,7 @@ alloc_init_file(struct inode *ino)
2078 fp->fi_inode = igrab(ino); 2065 fp->fi_inode = igrab(ino);
2079 fp->fi_id = current_fileid++; 2066 fp->fi_id = current_fileid++;
2080 fp->fi_had_conflict = false; 2067 fp->fi_had_conflict = false;
2068 fp->fi_lease = NULL;
2081 memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); 2069 memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
2082 memset(fp->fi_access, 0, sizeof(fp->fi_access)); 2070 memset(fp->fi_access, 0, sizeof(fp->fi_access));
2083 spin_lock(&recall_lock); 2071 spin_lock(&recall_lock);
@@ -2329,23 +2317,8 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
2329 nfs4_file_put_access(fp, O_RDONLY); 2317 nfs4_file_put_access(fp, O_RDONLY);
2330} 2318}
2331 2319
2332/* 2320static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
2333 * Spawn a thread to perform a recall on the delegation represented
2334 * by the lease (file_lock)
2335 *
2336 * Called from break_lease() with lock_flocks() held.
2337 * Note: we assume break_lease will only call this *once* for any given
2338 * lease.
2339 */
2340static
2341void nfsd_break_deleg_cb(struct file_lock *fl)
2342{ 2321{
2343 struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
2344
2345 dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
2346 if (!dp)
2347 return;
2348
2349 /* We're assuming the state code never drops its reference 2322 /* We're assuming the state code never drops its reference
2350 * without first removing the lease. Since we're in this lease 2323 * without first removing the lease. Since we're in this lease
2351 * callback (and since the lease code is serialized by the kernel 2324 * callback (and since the lease code is serialized by the kernel
@@ -2353,22 +2326,35 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
2353 * it's safe to take a reference: */ 2326 * it's safe to take a reference: */
2354 atomic_inc(&dp->dl_count); 2327 atomic_inc(&dp->dl_count);
2355 2328
2356 spin_lock(&recall_lock);
2357 list_add_tail(&dp->dl_recall_lru, &del_recall_lru); 2329 list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
2358 spin_unlock(&recall_lock);
2359 2330
2360 /* only place dl_time is set. protected by lock_flocks*/ 2331 /* only place dl_time is set. protected by lock_flocks*/
2361 dp->dl_time = get_seconds(); 2332 dp->dl_time = get_seconds();
2362 2333
2334 nfsd4_cb_recall(dp);
2335}
2336
2337/* Called from break_lease() with lock_flocks() held. */
2338static void nfsd_break_deleg_cb(struct file_lock *fl)
2339{
2340 struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
2341 struct nfs4_delegation *dp;
2342
2343 BUG_ON(!fp);
2344 /* We assume break_lease is only called once per lease: */
2345 BUG_ON(fp->fi_had_conflict);
2363 /* 2346 /*
2364 * We don't want the locks code to timeout the lease for us; 2347 * We don't want the locks code to timeout the lease for us;
2365 * we'll remove it ourself if the delegation isn't returned 2348 * we'll remove it ourself if a delegation isn't returned
2366 * in time. 2349 * in time:
2367 */ 2350 */
2368 fl->fl_break_time = 0; 2351 fl->fl_break_time = 0;
2369 2352
2370 dp->dl_file->fi_had_conflict = true; 2353 spin_lock(&recall_lock);
2371 nfsd4_cb_recall(dp); 2354 fp->fi_had_conflict = true;
2355 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
2356 nfsd_break_one_deleg(dp);
2357 spin_unlock(&recall_lock);
2372} 2358}
2373 2359
2374static 2360static
@@ -2461,10 +2447,13 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2461{ 2447{
2462 struct nfs4_delegation *dp; 2448 struct nfs4_delegation *dp;
2463 2449
2464 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { 2450 spin_lock(&recall_lock);
2465 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) 2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) {
2453 spin_unlock(&recall_lock);
2466 return dp; 2454 return dp;
2467 } 2455 }
2456 spin_unlock(&recall_lock);
2468 return NULL; 2457 return NULL;
2469} 2458}
2470 2459
@@ -2641,6 +2630,66 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
2641 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; 2630 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
2642} 2631}
2643 2632
2633static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag)
2634{
2635 struct file_lock *fl;
2636
2637 fl = locks_alloc_lock();
2638 if (!fl)
2639 return NULL;
2640 locks_init_lock(fl);
2641 fl->fl_lmops = &nfsd_lease_mng_ops;
2642 fl->fl_flags = FL_LEASE;
2643 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2644 fl->fl_end = OFFSET_MAX;
2645 fl->fl_owner = (fl_owner_t)(dp->dl_file);
2646 fl->fl_pid = current->tgid;
2647 return fl;
2648}
2649
2650static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
2651{
2652 struct nfs4_file *fp = dp->dl_file;
2653 struct file_lock *fl;
2654 int status;
2655
2656 fl = nfs4_alloc_init_lease(dp, flag);
2657 if (!fl)
2658 return -ENOMEM;
2659 fl->fl_file = find_readable_file(fp);
2660 list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
2661 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
2662 if (status) {
2663 list_del_init(&dp->dl_perclnt);
2664 locks_free_lock(fl);
2665 return -ENOMEM;
2666 }
2667 fp->fi_lease = fl;
2668 fp->fi_deleg_file = fl->fl_file;
2669 get_file(fp->fi_deleg_file);
2670 atomic_set(&fp->fi_delegees, 1);
2671 list_add(&dp->dl_perfile, &fp->fi_delegations);
2672 return 0;
2673}
2674
2675static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
2676{
2677 struct nfs4_file *fp = dp->dl_file;
2678
2679 if (!fp->fi_lease)
2680 return nfs4_setlease(dp, flag);
2681 spin_lock(&recall_lock);
2682 if (fp->fi_had_conflict) {
2683 spin_unlock(&recall_lock);
2684 return -EAGAIN;
2685 }
2686 atomic_inc(&fp->fi_delegees);
2687 list_add(&dp->dl_perfile, &fp->fi_delegations);
2688 spin_unlock(&recall_lock);
2689 list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
2690 return 0;
2691}
2692
2644/* 2693/*
2645 * Attempt to hand out a delegation. 2694 * Attempt to hand out a delegation.
2646 */ 2695 */
@@ -2650,7 +2699,6 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2650 struct nfs4_delegation *dp; 2699 struct nfs4_delegation *dp;
2651 struct nfs4_stateowner *sop = stp->st_stateowner; 2700 struct nfs4_stateowner *sop = stp->st_stateowner;
2652 int cb_up; 2701 int cb_up;
2653 struct file_lock *fl;
2654 int status, flag = 0; 2702 int status, flag = 0;
2655 2703
2656 cb_up = nfsd4_cb_channel_good(sop->so_client); 2704 cb_up = nfsd4_cb_channel_good(sop->so_client);
@@ -2681,36 +2729,11 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2681 } 2729 }
2682 2730
2683 dp = alloc_init_deleg(sop->so_client, stp, fh, flag); 2731 dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
2684 if (dp == NULL) { 2732 if (dp == NULL)
2685 flag = NFS4_OPEN_DELEGATE_NONE; 2733 goto out_no_deleg;
2686 goto out; 2734 status = nfs4_set_delegation(dp, flag);
2687 } 2735 if (status)
2688 status = -ENOMEM; 2736 goto out_free;
2689 fl = locks_alloc_lock();
2690 if (!fl)
2691 goto out;
2692 locks_init_lock(fl);
2693 fl->fl_lmops = &nfsd_lease_mng_ops;
2694 fl->fl_flags = FL_LEASE;
2695 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
2696 fl->fl_end = OFFSET_MAX;
2697 fl->fl_owner = (fl_owner_t)dp;
2698 fl->fl_file = find_readable_file(stp->st_file);
2699 BUG_ON(!fl->fl_file);
2700 fl->fl_pid = current->tgid;
2701 dp->dl_flock = fl;
2702
2703 /* vfs_setlease checks to see if delegation should be handed out.
2704 * the lock_manager callback fl_change is used
2705 */
2706 if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
2707 dprintk("NFSD: setlease failed [%d], no delegation\n", status);
2708 dp->dl_flock = NULL;
2709 locks_free_lock(fl);
2710 unhash_delegation(dp);
2711 flag = NFS4_OPEN_DELEGATE_NONE;
2712 goto out;
2713 }
2714 2737
2715 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); 2738 memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
2716 2739
@@ -2722,6 +2745,12 @@ out:
2722 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) 2745 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
2723 dprintk("NFSD: WARNING: refusing delegation reclaim\n"); 2746 dprintk("NFSD: WARNING: refusing delegation reclaim\n");
2724 open->op_delegate_type = flag; 2747 open->op_delegate_type = flag;
2748 return;
2749out_free:
2750 nfs4_put_delegation(dp);
2751out_no_deleg:
2752 flag = NFS4_OPEN_DELEGATE_NONE;
2753 goto out;
2725} 2754}
2726 2755
2727/* 2756/*
@@ -2916,8 +2945,6 @@ nfs4_laundromat(void)
2916 test_val = u; 2945 test_val = u;
2917 break; 2946 break;
2918 } 2947 }
2919 dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
2920 dp, dp->dl_flock);
2921 list_move(&dp->dl_recall_lru, &reaplist); 2948 list_move(&dp->dl_recall_lru, &reaplist);
2922 } 2949 }
2923 spin_unlock(&recall_lock); 2950 spin_unlock(&recall_lock);
@@ -3128,7 +3155,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
3128 goto out; 3155 goto out;
3129 renew_client(dp->dl_client); 3156 renew_client(dp->dl_client);
3130 if (filpp) { 3157 if (filpp) {
3131 *filpp = find_readable_file(dp->dl_file); 3158 *filpp = dp->dl_file->fi_deleg_file;
3132 BUG_ON(!*filpp); 3159 BUG_ON(!*filpp);
3133 } 3160 }
3134 } else { /* open or lock stateid */ 3161 } else { /* open or lock stateid */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 956629b9cdc9..615f0a9f0600 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -317,8 +317,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
317 READ_BUF(dummy32); 317 READ_BUF(dummy32);
318 len += (XDR_QUADLEN(dummy32) << 2); 318 len += (XDR_QUADLEN(dummy32) << 2);
319 READMEM(buf, dummy32); 319 READMEM(buf, dummy32);
320 if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) 320 if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
321 goto out_nfserr; 321 return status;
322 iattr->ia_valid |= ATTR_UID; 322 iattr->ia_valid |= ATTR_UID;
323 } 323 }
324 if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { 324 if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
@@ -328,8 +328,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
328 READ_BUF(dummy32); 328 READ_BUF(dummy32);
329 len += (XDR_QUADLEN(dummy32) << 2); 329 len += (XDR_QUADLEN(dummy32) << 2);
330 READMEM(buf, dummy32); 330 READMEM(buf, dummy32);
331 if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) 331 if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
332 goto out_nfserr; 332 return status;
333 iattr->ia_valid |= ATTR_GID; 333 iattr->ia_valid |= ATTR_GID;
334 } 334 }
335 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { 335 if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1142 1142
1143 u32 dummy; 1143 u32 dummy;
1144 char *machine_name; 1144 char *machine_name;
1145 int i; 1145 int i, j;
1146 int nr_secflavs; 1146 int nr_secflavs;
1147 1147
1148 READ_BUF(16); 1148 READ_BUF(16);
@@ -1215,7 +1215,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1215 READ_BUF(4); 1215 READ_BUF(4);
1216 READ32(dummy); 1216 READ32(dummy);
1217 READ_BUF(dummy * 4); 1217 READ_BUF(dummy * 4);
1218 for (i = 0; i < dummy; ++i) 1218 for (j = 0; j < dummy; ++j)
1219 READ32(dummy); 1219 READ32(dummy);
1220 break; 1220 break;
1221 case RPC_AUTH_GSS: 1221 case RPC_AUTH_GSS:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 3074656ba7bf..2d31224b07bf 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -83,8 +83,6 @@ struct nfs4_delegation {
83 atomic_t dl_count; /* ref count */ 83 atomic_t dl_count; /* ref count */
84 struct nfs4_client *dl_client; 84 struct nfs4_client *dl_client;
85 struct nfs4_file *dl_file; 85 struct nfs4_file *dl_file;
86 struct file *dl_vfs_file;
87 struct file_lock *dl_flock;
88 u32 dl_type; 86 u32 dl_type;
89 time_t dl_time; 87 time_t dl_time;
90/* For recall: */ 88/* For recall: */
@@ -379,6 +377,9 @@ struct nfs4_file {
379 */ 377 */
380 atomic_t fi_readers; 378 atomic_t fi_readers;
381 atomic_t fi_writers; 379 atomic_t fi_writers;
380 struct file *fi_deleg_file;
381 struct file_lock *fi_lease;
382 atomic_t fi_delegees;
382 struct inode *fi_inode; 383 struct inode *fi_inode;
383 u32 fi_id; /* used with stateowner->so_id 384 u32 fi_id; /* used with stateowner->so_id
384 * for stateid_hashtbl hash */ 385 * for stateid_hashtbl hash */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 641117f2188d..da1d9701f8e4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -808,7 +808,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
808 if (ra->p_count == 0) 808 if (ra->p_count == 0)
809 frap = rap; 809 frap = rap;
810 } 810 }
811 depth = nfsdstats.ra_size*11/10; 811 depth = nfsdstats.ra_size;
812 if (!frap) { 812 if (!frap) {
813 spin_unlock(&rab->pb_lock); 813 spin_unlock(&rab->pb_lock);
814 return NULL; 814 return NULL;
@@ -1744,6 +1744,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1744 host_err = nfsd_break_lease(odentry->d_inode); 1744 host_err = nfsd_break_lease(odentry->d_inode);
1745 if (host_err) 1745 if (host_err)
1746 goto out_drop_write; 1746 goto out_drop_write;
1747 if (ndentry->d_inode) {
1748 host_err = nfsd_break_lease(ndentry->d_inode);
1749 if (host_err)
1750 goto out_drop_write;
1751 }
1752 if (host_err)
1753 goto out_drop_write;
1747 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1754 host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1748 if (!host_err) { 1755 if (!host_err) {
1749 host_err = commit_metadata(tfhp); 1756 host_err = commit_metadata(tfhp);
@@ -1812,22 +1819,22 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1812 1819
1813 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1820 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1814 if (host_err) 1821 if (host_err)
1815 goto out_nfserr; 1822 goto out_put;
1816 1823
1817 host_err = nfsd_break_lease(rdentry->d_inode); 1824 host_err = nfsd_break_lease(rdentry->d_inode);
1818 if (host_err) 1825 if (host_err)
1819 goto out_put; 1826 goto out_drop_write;
1820 if (type != S_IFDIR) 1827 if (type != S_IFDIR)
1821 host_err = vfs_unlink(dirp, rdentry); 1828 host_err = vfs_unlink(dirp, rdentry);
1822 else 1829 else
1823 host_err = vfs_rmdir(dirp, rdentry); 1830 host_err = vfs_rmdir(dirp, rdentry);
1824out_put:
1825 dput(rdentry);
1826
1827 if (!host_err) 1831 if (!host_err)
1828 host_err = commit_metadata(fhp); 1832 host_err = commit_metadata(fhp);
1829 1833out_drop_write:
1830 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1834 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1835out_put:
1836 dput(rdentry);
1837
1831out_nfserr: 1838out_nfserr:
1832 err = nfserrno(host_err); 1839 err = nfserrno(host_err);
1833out: 1840out:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 388e9e8f5286..85f7baa15f5d 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,11 +35,6 @@
35#include "btnode.h" 35#include "btnode.h"
36 36
37 37
38void nilfs_btnode_cache_init_once(struct address_space *btnc)
39{
40 nilfs_mapping_init_once(btnc);
41}
42
43static const struct address_space_operations def_btnode_aops = { 38static const struct address_space_operations def_btnode_aops = {
44 .sync_page = block_sync_page, 39 .sync_page = block_sync_page,
45}; 40};
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e0..1b8ebd888c28 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init_once(struct address_space *);
41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); 40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
42void nilfs_btnode_cache_clear(struct address_space *); 41void nilfs_btnode_cache_clear(struct address_space *);
43struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 6a0e2a189f60..a0babd2bff6a 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
454 struct backing_dev_info *bdi = inode->i_sb->s_bdi; 454 struct backing_dev_info *bdi = inode->i_sb->s_bdi;
455 455
456 INIT_LIST_HEAD(&shadow->frozen_buffers); 456 INIT_LIST_HEAD(&shadow->frozen_buffers);
457 nilfs_mapping_init_once(&shadow->frozen_data); 457 address_space_init_once(&shadow->frozen_data);
458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); 458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
459 nilfs_mapping_init_once(&shadow->frozen_btnodes); 459 address_space_init_once(&shadow->frozen_btnodes);
460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); 460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
461 mi->mi_shadow = shadow; 461 mi->mi_shadow = shadow;
462 return 0; 462 return 0;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 98034271cd02..161791d26458 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); 397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
398 if (!new_de) 398 if (!new_de)
399 goto out_dir; 399 goto out_dir;
400 inc_nlink(old_inode);
401 nilfs_set_link(new_dir, new_de, new_page, old_inode); 400 nilfs_set_link(new_dir, new_de, new_page, old_inode);
402 nilfs_mark_inode_dirty(new_dir); 401 nilfs_mark_inode_dirty(new_dir);
403 new_inode->i_ctime = CURRENT_TIME; 402 new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
411 if (new_dir->i_nlink >= NILFS_LINK_MAX) 410 if (new_dir->i_nlink >= NILFS_LINK_MAX)
412 goto out_dir; 411 goto out_dir;
413 } 412 }
414 inc_nlink(old_inode);
415 err = nilfs_add_link(new_dentry, old_inode); 413 err = nilfs_add_link(new_dentry, old_inode);
416 if (err) { 414 if (err)
417 drop_nlink(old_inode);
418 nilfs_mark_inode_dirty(old_inode);
419 goto out_dir; 415 goto out_dir;
420 }
421 if (dir_de) { 416 if (dir_de) {
422 inc_nlink(new_dir); 417 inc_nlink(new_dir);
423 nilfs_mark_inode_dirty(new_dir); 418 nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
431 old_inode->i_ctime = CURRENT_TIME; 426 old_inode->i_ctime = CURRENT_TIME;
432 427
433 nilfs_delete_entry(old_de, old_page); 428 nilfs_delete_entry(old_de, old_page);
434 drop_nlink(old_inode);
435 429
436 if (dir_de) { 430 if (dir_de) {
437 nilfs_set_link(old_inode, dir_de, dir_page, new_dir); 431 nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0c432416cfef..a585b35fd6bc 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 492 return nc;
493} 493}
494 494
495void nilfs_mapping_init_once(struct address_space *mapping)
496{
497 memset(mapping, 0, sizeof(*mapping));
498 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
499 spin_lock_init(&mapping->tree_lock);
500 INIT_LIST_HEAD(&mapping->private_list);
501 spin_lock_init(&mapping->private_lock);
502
503 spin_lock_init(&mapping->i_mmap_lock);
504 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
505 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
506}
507
508void nilfs_mapping_init(struct address_space *mapping, 495void nilfs_mapping_init(struct address_space *mapping,
509 struct backing_dev_info *bdi, 496 struct backing_dev_info *bdi,
510 const struct address_space_operations *aops) 497 const struct address_space_operations *aops)
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 622df27cd891..2a00953ebd5f 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 62void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 63void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init_once(struct address_space *mapping);
65void nilfs_mapping_init(struct address_space *mapping, 64void nilfs_mapping_init(struct address_space *mapping,
66 struct backing_dev_info *bdi, 65 struct backing_dev_info *bdi,
67 const struct address_space_operations *aops); 66 const struct address_space_operations *aops);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5c7f39..2de9f636792a 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,7 +430,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
430 nilfs_segctor_map_segsum_entry( 430 nilfs_segctor_map_segsum_entry(
431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); 431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
432 432
433 if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 433 if (NILFS_I(inode)->i_root &&
434 !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
434 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 435 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
435 /* skip finfo */ 436 /* skip finfo */
436} 437}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 58fd707174e1..1673b3d99842 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1279,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
1279#ifdef CONFIG_NILFS_XATTR 1279#ifdef CONFIG_NILFS_XATTR
1280 init_rwsem(&ii->xattr_sem); 1280 init_rwsem(&ii->xattr_sem);
1281#endif 1281#endif
1282 nilfs_btnode_cache_init_once(&ii->i_btnode_cache); 1282 address_space_init_once(&ii->i_btnode_cache);
1283 ii->i_bmap = &ii->i_bmap_data; 1283 ii->i_bmap = &ii->i_bmap_data;
1284 inode_init_once(&ii->vfs_inode); 1284 inode_init_once(&ii->vfs_inode);
1285} 1285}
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 6d80ecc7834f..7eb90403fc8a 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -56,7 +56,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
56 int ret = 0; /* if all else fails, just return false */ 56 int ret = 0; /* if all else fails, just return false */
57 struct ocfs2_super *osb; 57 struct ocfs2_super *osb;
58 58
59 if (nd->flags & LOOKUP_RCU) 59 if (nd && nd->flags & LOOKUP_RCU)
60 return -ECHILD; 60 return -ECHILD;
61 61
62 inode = dentry->d_inode; 62 inode = dentry->d_inode;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c0..6180da1e37e6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
405 ocfs2_quota_trans_credits(sb); 405 ocfs2_quota_trans_credits(sb);
406} 406}
407 407
408/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + 408/* data block for new dir/symlink, allocation of directory block, dx_root
409 * bitmap block for the new bit) dx_root update for free list */ 409 * update for free list */
410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1) 410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
411 411
412static inline int ocfs2_add_dir_index_credits(struct super_block *sb) 412static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
413{ 413{
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e9..19ebc5aad391 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3228 u32 num_clusters, unsigned int e_flags) 3228 u32 num_clusters, unsigned int e_flags)
3229{ 3229{
3230 int ret, delete, index, credits = 0; 3230 int ret, delete, index, credits = 0;
3231 u32 new_bit, new_len; 3231 u32 new_bit, new_len, orig_num_clusters;
3232 unsigned int set_len; 3232 unsigned int set_len;
3233 struct ocfs2_super *osb = OCFS2_SB(sb); 3233 struct ocfs2_super *osb = OCFS2_SB(sb);
3234 handle_t *handle; 3234 handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3261 goto out; 3261 goto out;
3262 } 3262 }
3263 3263
3264 orig_num_clusters = num_clusters;
3265
3264 while (num_clusters) { 3266 while (num_clusters) {
3265 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh, 3267 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
3266 p_cluster, num_clusters, 3268 p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3348 * in write-back mode. 3350 * in write-back mode.
3349 */ 3351 */
3350 if (context->get_clusters == ocfs2_di_get_clusters) { 3352 if (context->get_clusters == ocfs2_di_get_clusters) {
3351 ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); 3353 ret = ocfs2_cow_sync_writeback(sb, context, cpos,
3354 orig_num_clusters);
3352 if (ret) 3355 if (ret)
3353 mlog_errno(ret); 3356 mlog_errno(ret);
3354 } 3357 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447e..36c423fb0635 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1316 struct mount_options *mopt, 1316 struct mount_options *mopt,
1317 int is_remount) 1317 int is_remount)
1318{ 1318{
1319 int status; 1319 int status, user_stack = 0;
1320 char *p; 1320 char *p;
1321 u32 tmp; 1321 u32 tmp;
1322 1322
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
1459 memcpy(mopt->cluster_stack, args[0].from, 1459 memcpy(mopt->cluster_stack, args[0].from,
1460 OCFS2_STACK_LABEL_LEN); 1460 OCFS2_STACK_LABEL_LEN);
1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; 1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
1462 /*
1463 * Open code the memcmp here as we don't have
1464 * an osb to pass to
1465 * ocfs2_userspace_stack().
1466 */
1467 if (memcmp(mopt->cluster_stack,
1468 OCFS2_CLASSIC_CLUSTER_STACK,
1469 OCFS2_STACK_LABEL_LEN))
1470 user_stack = 1;
1462 break; 1471 break;
1463 case Opt_inode64: 1472 case Opt_inode64:
1464 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1473 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
1514 } 1523 }
1515 } 1524 }
1516 1525
1517 /* Ensure only one heartbeat mode */ 1526 if (user_stack == 0) {
1518 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | 1527 /* Ensure only one heartbeat mode */
1519 OCFS2_MOUNT_HB_NONE); 1528 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
1520 if (hweight32(tmp) != 1) { 1529 OCFS2_MOUNT_HB_GLOBAL |
1521 mlog(ML_ERROR, "Invalid heartbeat mount options\n"); 1530 OCFS2_MOUNT_HB_NONE);
1522 status = 0; 1531 if (hweight32(tmp) != 1) {
1523 goto bail; 1532 mlog(ML_ERROR, "Invalid heartbeat mount options\n");
1533 status = 0;
1534 goto bail;
1535 }
1524 } 1536 }
1525 1537
1526 status = 1; 1538 status = 1;
diff --git a/fs/open.c b/fs/open.c
index e52389e1f05b..b47aab39c057 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
233 233
234 if (!(file->f_mode & FMODE_WRITE)) 234 if (!(file->f_mode & FMODE_WRITE))
235 return -EBADF; 235 return -EBADF;
236
237 /* It's not possible punch hole on append only file */
238 if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
239 return -EPERM;
240
241 if (IS_IMMUTABLE(inode))
242 return -EPERM;
243
236 /* 244 /*
237 * Revalidate the write permissions, in case security policy has 245 * Revalidate the write permissions, in case security policy has
238 * changed since the files were opened. 246 * changed since the files were opened.
@@ -790,6 +798,8 @@ struct file *nameidata_to_filp(struct nameidata *nd)
790 798
791 /* Pick up the filp from the open intent */ 799 /* Pick up the filp from the open intent */
792 filp = nd->intent.open.file; 800 filp = nd->intent.open.file;
801 nd->intent.open.file = NULL;
802
793 /* Has the filesystem initialised the file for us? */ 803 /* Has the filesystem initialised the file for us? */
794 if (filp->f_path.dentry == NULL) { 804 if (filp->f_path.dentry == NULL) {
795 path_get(&nd->path); 805 path_get(&nd->path);
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa5..b10e3540d5b7 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
251 } 251 }
252 252
253 vm->vblk_size = get_unaligned_be32(data + 0x08); 253 vm->vblk_size = get_unaligned_be32(data + 0x08);
254 if (vm->vblk_size == 0) {
255 ldm_error ("Illegal VBLK size");
256 return false;
257 }
258
254 vm->vblk_offset = get_unaligned_be32(data + 0x0C); 259 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
255 vm->last_vblk_seq = get_unaligned_be32(data + 0x04); 260 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
256 261
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index 68d6a216ee79..11f688bd76c5 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -29,10 +29,9 @@ static inline void mac_fix_string(char *stg, int len)
29 29
30int mac_partition(struct parsed_partitions *state) 30int mac_partition(struct parsed_partitions *state)
31{ 31{
32 int slot = 1;
33 Sector sect; 32 Sector sect;
34 unsigned char *data; 33 unsigned char *data;
35 int blk, blocks_in_map; 34 int slot, blocks_in_map;
36 unsigned secsize; 35 unsigned secsize;
37#ifdef CONFIG_PPC_PMAC 36#ifdef CONFIG_PPC_PMAC
38 int found_root = 0; 37 int found_root = 0;
@@ -59,10 +58,14 @@ int mac_partition(struct parsed_partitions *state)
59 put_dev_sector(sect); 58 put_dev_sector(sect);
60 return 0; /* not a MacOS disk */ 59 return 0; /* not a MacOS disk */
61 } 60 }
62 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
63 blocks_in_map = be32_to_cpu(part->map_count); 61 blocks_in_map = be32_to_cpu(part->map_count);
64 for (blk = 1; blk <= blocks_in_map; ++blk) { 62 if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) {
65 int pos = blk * secsize; 63 put_dev_sector(sect);
64 return 0;
65 }
66 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
67 for (slot = 1; slot <= blocks_in_map; ++slot) {
68 int pos = slot * secsize;
66 put_dev_sector(sect); 69 put_dev_sector(sect);
67 data = read_part_sector(state, pos/512, &sect); 70 data = read_part_sector(state, pos/512, &sect);
68 if (!data) 71 if (!data)
@@ -113,13 +116,11 @@ int mac_partition(struct parsed_partitions *state)
113 } 116 }
114 117
115 if (goodness > found_root_goodness) { 118 if (goodness > found_root_goodness) {
116 found_root = blk; 119 found_root = slot;
117 found_root_goodness = goodness; 120 found_root_goodness = goodness;
118 } 121 }
119 } 122 }
120#endif /* CONFIG_PPC_PMAC */ 123#endif /* CONFIG_PPC_PMAC */
121
122 ++slot;
123 } 124 }
124#ifdef CONFIG_PPC_PMAC 125#ifdef CONFIG_PPC_PMAC
125 if (found_root_goodness) 126 if (found_root_goodness)
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index 48cec7cbca17..be03a0b08b47 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,10 +10,13 @@
10#include "check.h" 10#include "check.h"
11#include "osf.h" 11#include "osf.h"
12 12
13#define MAX_OSF_PARTITIONS 8
14
13int osf_partition(struct parsed_partitions *state) 15int osf_partition(struct parsed_partitions *state)
14{ 16{
15 int i; 17 int i;
16 int slot = 1; 18 int slot = 1;
19 unsigned int npartitions;
17 Sector sect; 20 Sector sect;
18 unsigned char *data; 21 unsigned char *data;
19 struct disklabel { 22 struct disklabel {
@@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state)
45 u8 p_fstype; 48 u8 p_fstype;
46 u8 p_frag; 49 u8 p_frag;
47 __le16 p_cpg; 50 __le16 p_cpg;
48 } d_partitions[8]; 51 } d_partitions[MAX_OSF_PARTITIONS];
49 } * label; 52 } * label;
50 struct d_partition * partition; 53 struct d_partition * partition;
51 54
@@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state)
63 put_dev_sector(sect); 66 put_dev_sector(sect);
64 return 0; 67 return 0;
65 } 68 }
66 for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) { 69 npartitions = le16_to_cpu(label->d_npartitions);
70 if (npartitions > MAX_OSF_PARTITIONS) {
71 put_dev_sector(sect);
72 return 0;
73 }
74 for (i = 0 ; i < npartitions; i++, partition++) {
67 if (slot == state->limit) 75 if (slot == state->limit)
68 break; 76 break;
69 if (le32_to_cpu(partition->p_size)) 77 if (le32_to_cpu(partition->p_size))
diff --git a/fs/proc/array.c b/fs/proc/array.c
index df2b703b9d0f..7c99c1cf7e5c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -353,9 +353,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
353 task_cap(m, task); 353 task_cap(m, task);
354 task_cpus_allowed(m, task); 354 task_cpus_allowed(m, task);
355 cpuset_task_status_allowed(m, task); 355 cpuset_task_status_allowed(m, task);
356#if defined(CONFIG_S390)
357 task_show_regs(m, task);
358#endif
359 task_context_switch_counts(m, task); 356 task_context_switch_counts(m, task);
360 return 0; 357 return 0;
361} 358}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d096e82b201..d49c4b5d2c3e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2620,35 +2620,6 @@ static const struct pid_entry proc_base_stuff[] = {
2620 &proc_self_inode_operations, NULL, {}), 2620 &proc_self_inode_operations, NULL, {}),
2621}; 2621};
2622 2622
2623/*
2624 * Exceptional case: normally we are not allowed to unhash a busy
2625 * directory. In this case, however, we can do it - no aliasing problems
2626 * due to the way we treat inodes.
2627 */
2628static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2629{
2630 struct inode *inode;
2631 struct task_struct *task;
2632
2633 if (nd->flags & LOOKUP_RCU)
2634 return -ECHILD;
2635
2636 inode = dentry->d_inode;
2637 task = get_proc_task(inode);
2638 if (task) {
2639 put_task_struct(task);
2640 return 1;
2641 }
2642 d_drop(dentry);
2643 return 0;
2644}
2645
2646static const struct dentry_operations proc_base_dentry_operations =
2647{
2648 .d_revalidate = proc_base_revalidate,
2649 .d_delete = pid_delete_dentry,
2650};
2651
2652static struct dentry *proc_base_instantiate(struct inode *dir, 2623static struct dentry *proc_base_instantiate(struct inode *dir,
2653 struct dentry *dentry, struct task_struct *task, const void *ptr) 2624 struct dentry *dentry, struct task_struct *task, const void *ptr)
2654{ 2625{
@@ -2685,7 +2656,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2685 if (p->fop) 2656 if (p->fop)
2686 inode->i_fop = p->fop; 2657 inode->i_fop = p->fop;
2687 ei->op = p->op; 2658 ei->op = p->op;
2688 d_set_d_op(dentry, &proc_base_dentry_operations);
2689 d_add(dentry, inode); 2659 d_add(dentry, inode);
2690 error = NULL; 2660 error = NULL;
2691out: 2661out:
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 176ce4cda68a..d6a7ca1fdac5 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -27,6 +27,7 @@
27static void proc_evict_inode(struct inode *inode) 27static void proc_evict_inode(struct inode *inode)
28{ 28{
29 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
30 struct ctl_table_header *head;
30 31
31 truncate_inode_pages(&inode->i_data, 0); 32 truncate_inode_pages(&inode->i_data, 0);
32 end_writeback(inode); 33 end_writeback(inode);
@@ -38,8 +39,11 @@ static void proc_evict_inode(struct inode *inode)
38 de = PROC_I(inode)->pde; 39 de = PROC_I(inode)->pde;
39 if (de) 40 if (de)
40 pde_put(de); 41 pde_put(de);
41 if (PROC_I(inode)->sysctl) 42 head = PROC_I(inode)->sysctl;
42 sysctl_head_put(PROC_I(inode)->sysctl); 43 if (head) {
44 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
45 sysctl_head_put(head);
46 }
43} 47}
44 48
45struct vfsmount *proc_mnt; 49struct vfsmount *proc_mnt;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7ff..927cbd115e53 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
233 return; 233 return;
234 root = of_find_node_by_path("/"); 234 root = of_find_node_by_path("/");
235 if (root == NULL) { 235 if (root == NULL) {
236 printk(KERN_ERR "/proc/device-tree: can't find root\n"); 236 pr_debug("/proc/device-tree: can't find root\n");
237 return; 237 return;
238 } 238 }
239 proc_device_tree_add_node(root, proc_device_tree); 239 proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92a34ef..8eb2522111c5 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -408,15 +408,18 @@ static int proc_sys_compare(const struct dentry *parent,
408 const struct dentry *dentry, const struct inode *inode, 408 const struct dentry *dentry, const struct inode *inode,
409 unsigned int len, const char *str, const struct qstr *name) 409 unsigned int len, const char *str, const struct qstr *name)
410{ 410{
411 struct ctl_table_header *head;
411 /* Although proc doesn't have negative dentries, rcu-walk means 412 /* Although proc doesn't have negative dentries, rcu-walk means
412 * that inode here can be NULL */ 413 * that inode here can be NULL */
414 /* AV: can it, indeed? */
413 if (!inode) 415 if (!inode)
414 return 0; 416 return 1;
415 if (name->len != len) 417 if (name->len != len)
416 return 1; 418 return 1;
417 if (memcmp(name->name, str, len)) 419 if (memcmp(name->name, str, len))
418 return 1; 420 return 1;
419 return !sysctl_is_seen(PROC_I(inode)->sysctl); 421 head = rcu_dereference(PROC_I(inode)->sysctl);
422 return !head || !sysctl_is_seen(head);
420} 423}
421 424
422static const struct dentry_operations proc_sys_dentry_operations = { 425static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51ec3458..68fdf45cc6c9 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, 771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
772 dentry, inode, &security); 772 dentry, inode, &security);
773 if (retval) { 773 if (retval) {
774 dir->i_nlink--; 774 DEC_DIR_INODE_NLINK(dir)
775 goto out_failed; 775 goto out_failed;
776 } 776 }
777 777
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 3cfb2e933644..5c11ca82b782 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -978,8 +978,6 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
978 978
979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
980{ 980{
981 if (nd->flags & LOOKUP_RCU)
982 return -ECHILD;
983 return -EPERM; 981 return -EPERM;
984} 982}
985 983
diff --git a/fs/super.c b/fs/super.c
index 74e149efed81..7e9dd4cc2c01 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -177,6 +177,11 @@ void deactivate_locked_super(struct super_block *s)
177 struct file_system_type *fs = s->s_type; 177 struct file_system_type *fs = s->s_type;
178 if (atomic_dec_and_test(&s->s_active)) { 178 if (atomic_dec_and_test(&s->s_active)) {
179 fs->kill_sb(s); 179 fs->kill_sb(s);
180 /*
181 * We need to call rcu_barrier so all the delayed rcu free
182 * inodes are flushed before we release the fs module.
183 */
184 rcu_barrier();
180 put_filesystem(fs); 185 put_filesystem(fs);
181 put_super(s); 186 put_super(s);
182 } else { 187 } else {
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b1208c26..e474fbcf8bde 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
245 new_de = sysv_find_entry(new_dentry, &new_page); 245 new_de = sysv_find_entry(new_dentry, &new_page);
246 if (!new_de) 246 if (!new_de)
247 goto out_dir; 247 goto out_dir;
248 inode_inc_link_count(old_inode);
249 sysv_set_link(new_de, new_page, old_inode); 248 sysv_set_link(new_de, new_page, old_inode);
250 new_inode->i_ctime = CURRENT_TIME_SEC; 249 new_inode->i_ctime = CURRENT_TIME_SEC;
251 if (dir_de) 250 if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
257 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max) 256 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
258 goto out_dir; 257 goto out_dir;
259 } 258 }
260 inode_inc_link_count(old_inode);
261 err = sysv_add_link(new_dentry, old_inode); 259 err = sysv_add_link(new_dentry, old_inode);
262 if (err) { 260 if (err)
263 inode_dec_link_count(old_inode);
264 goto out_dir; 261 goto out_dir;
265 }
266 if (dir_de) 262 if (dir_de)
267 inode_inc_link_count(new_dir); 263 inode_inc_link_count(new_dir);
268 } 264 }
269 265
270 sysv_delete_entry(old_de, old_page); 266 sysv_delete_entry(old_de, old_page);
271 inode_dec_link_count(old_inode); 267 mark_inode_dirty(old_inode);
272 268
273 if (dir_de) { 269 if (dir_de) {
274 sysv_set_link(dir_de, dir_page, new_dir); 270 sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9eb86d2..b7c338d5e9df 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
32#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 33#include <linux/exportfs.h>
34 34
35enum { UDF_MAX_LINKS = 0xffff };
36
35static inline int udf_match(int len1, const unsigned char *name1, int len2, 37static inline int udf_match(int len1, const unsigned char *name1, int len2,
36 const unsigned char *name2) 38 const unsigned char *name2)
37{ 39{
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
650 struct udf_inode_info *iinfo; 652 struct udf_inode_info *iinfo;
651 653
652 err = -EMLINK; 654 err = -EMLINK;
653 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) 655 if (dir->i_nlink >= UDF_MAX_LINKS)
654 goto out; 656 goto out;
655 657
656 err = -EIO; 658 err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1034 struct fileIdentDesc cfi, *fi; 1036 struct fileIdentDesc cfi, *fi;
1035 int err; 1037 int err;
1036 1038
1037 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { 1039 if (inode->i_nlink >= UDF_MAX_LINKS)
1038 return -EMLINK; 1040 return -EMLINK;
1039 }
1040 1041
1041 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1042 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1042 if (!fi) { 1043 if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1131 goto end_rename; 1132 goto end_rename;
1132 1133
1133 retval = -EMLINK; 1134 retval = -EMLINK;
1134 if (!new_inode && 1135 if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
1135 new_dir->i_nlink >=
1136 (256 << sizeof(new_dir->i_nlink)) - 1)
1137 goto end_rename; 1136 goto end_rename;
1138 } 1137 }
1139 if (!nfi) { 1138 if (!nfi) {
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 12f39b9e4437..d6f681535eb8 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -306,7 +306,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page); 306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
307 if (!new_de) 307 if (!new_de)
308 goto out_dir; 308 goto out_dir;
309 inode_inc_link_count(old_inode);
310 ufs_set_link(new_dir, new_de, new_page, old_inode); 309 ufs_set_link(new_dir, new_de, new_page, old_inode);
311 new_inode->i_ctime = CURRENT_TIME_SEC; 310 new_inode->i_ctime = CURRENT_TIME_SEC;
312 if (dir_de) 311 if (dir_de)
@@ -318,12 +317,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
318 if (new_dir->i_nlink >= UFS_LINK_MAX) 317 if (new_dir->i_nlink >= UFS_LINK_MAX)
319 goto out_dir; 318 goto out_dir;
320 } 319 }
321 inode_inc_link_count(old_inode);
322 err = ufs_add_link(new_dentry, old_inode); 320 err = ufs_add_link(new_dentry, old_inode);
323 if (err) { 321 if (err)
324 inode_dec_link_count(old_inode);
325 goto out_dir; 322 goto out_dir;
326 }
327 if (dir_de) 323 if (dir_de)
328 inode_inc_link_count(new_dir); 324 inode_inc_link_count(new_dir);
329 } 325 }
@@ -331,12 +327,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
331 /* 327 /*
332 * Like most other Unix systems, set the ctime for inodes on a 328 * Like most other Unix systems, set the ctime for inodes on a
333 * rename. 329 * rename.
334 * inode_dec_link_count() will mark the inode dirty.
335 */ 330 */
336 old_inode->i_ctime = CURRENT_TIME_SEC; 331 old_inode->i_ctime = CURRENT_TIME_SEC;
337 332
338 ufs_delete_entry(old_dir, old_de, old_page); 333 ufs_delete_entry(old_dir, old_de, old_page);
339 inode_dec_link_count(old_inode); 334 mark_inode_dirty(old_inode);
340 335
341 if (dir_de) { 336 if (dir_de) {
342 ufs_set_link(old_inode, dir_de, dir_page, new_dir); 337 ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e5..d61611c88012 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
152 152
153 if (!capable(CAP_SYS_ADMIN)) 153 if (!capable(CAP_SYS_ADMIN))
154 return -XFS_ERROR(EPERM); 154 return -XFS_ERROR(EPERM);
155 if (!blk_queue_discard(q))
156 return -XFS_ERROR(EOPNOTSUPP);
155 if (copy_from_user(&range, urange, sizeof(range))) 157 if (copy_from_user(&range, urange, sizeof(range)))
156 return -XFS_ERROR(EFAULT); 158 return -XFS_ERROR(EFAULT);
157 159
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f5e2a19e0f8e..0ca0e3c024d7 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
695 xfs_mount_t *mp, 695 xfs_mount_t *mp,
696 void __user *arg) 696 void __user *arg)
697{ 697{
698 xfs_fsop_geom_v1_t fsgeo; 698 xfs_fsop_geom_t fsgeo;
699 int error; 699 int error;
700 700
701 error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); 701 error = xfs_fs_geometry(mp, &fsgeo, 3);
702 if (error) 702 if (error)
703 return -error; 703 return -error;
704 704
705 if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) 705 /*
706 * Caller should have passed an argument of type
707 * xfs_fsop_geom_v1_t. This is a proper subset of the
708 * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
709 */
710 if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
706 return -XFS_ERROR(EFAULT); 711 return -XFS_ERROR(EFAULT);
707 return 0; 712 return 0;
708} 713}
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d2..85668efb3e3e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
53 xfs_fsop_geom_t *geo, 53 xfs_fsop_geom_t *geo,
54 int new_version) 54 int new_version)
55{ 55{
56
57 memset(geo, 0, sizeof(*geo));
58
56 geo->blocksize = mp->m_sb.sb_blocksize; 59 geo->blocksize = mp->m_sb.sb_blocksize;
57 geo->rtextsize = mp->m_sb.sb_rextsize; 60 geo->rtextsize = mp->m_sb.sb_rextsize;
58 geo->agblocks = mp->m_sb.sb_agblocks; 61 geo->agblocks = mp->m_sb.sb_agblocks;