aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c13
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.h13
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/disk-io.c5
-rw-r--r--fs/btrfs/extent-tree.c148
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/extent_map.h1
-rw-r--r--fs/btrfs/free-space-cache.c192
-rw-r--r--fs/btrfs/inode.c41
-rw-r--r--fs/btrfs/ioctl.c37
-rw-r--r--fs/btrfs/locking.c80
-rw-r--r--fs/btrfs/print-tree.c9
-rw-r--r--fs/btrfs/raid56.c5
-rw-r--r--fs/btrfs/scrub.c19
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/sysfs.c32
-rw-r--r--fs/btrfs/sysfs.h4
-rw-r--r--fs/btrfs/transaction.c12
-rw-r--r--fs/btrfs/volumes.c66
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/btrfs/zlib.c2
-rw-r--r--fs/cifs/cifs_unicode.c7
-rw-r--r--fs/cifs/cifsfs.c17
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/ext4/balloc.c16
-rw-r--r--fs/ext4/extents_status.c4
-rw-r--r--fs/ext4/ialloc.c37
-rw-r--r--fs/ext4/indirect.c24
-rw-r--r--fs/ext4/mballoc.c12
-rw-r--r--fs/ext4/super.c60
-rw-r--r--fs/f2fs/data.c23
-rw-r--r--fs/f2fs/dir.c2
-rw-r--r--fs/f2fs/f2fs.h6
-rw-r--r--fs/f2fs/file.c12
-rw-r--r--fs/f2fs/inode.c1
-rw-r--r--fs/f2fs/namei.c13
-rw-r--r--fs/f2fs/node.c2
-rw-r--r--fs/f2fs/segment.c5
-rw-r--r--fs/f2fs/super.c4
-rw-r--r--fs/fuse/dev.c51
-rw-r--r--fs/fuse/dir.c41
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/fuse/inode.c22
-rw-r--r--fs/jbd2/transaction.c5
-rw-r--r--fs/kernfs/file.c69
-rw-r--r--fs/kernfs/mount.c30
-rw-r--r--fs/locks.c2
-rw-r--r--fs/mbcache.c3
-rw-r--r--fs/nfs/inode.c76
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4namespace.c102
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nfsd/nfs4proc.c9
-rw-r--r--fs/nfsd/nfs4state.c78
-rw-r--r--fs/nfsd/nfs4xdr.c18
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c57
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c3
-rw-r--r--fs/ocfs2/dlm/dlmthread.c13
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c18
-rw-r--r--fs/ocfs2/namei.c145
-rw-r--r--fs/ocfs2/ocfs2_trace.h2
-rw-r--r--fs/ocfs2/refcounttree.c8
-rw-r--r--fs/ocfs2/super.c8
-rw-r--r--fs/proc/stat.c22
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/seq_file.c30
72 files changed, 1251 insertions, 535 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 4f078c054b41..1c9c5f0a9e2b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -830,16 +830,20 @@ void exit_aio(struct mm_struct *mm)
830static void put_reqs_available(struct kioctx *ctx, unsigned nr) 830static void put_reqs_available(struct kioctx *ctx, unsigned nr)
831{ 831{
832 struct kioctx_cpu *kcpu; 832 struct kioctx_cpu *kcpu;
833 unsigned long flags;
833 834
834 preempt_disable(); 835 preempt_disable();
835 kcpu = this_cpu_ptr(ctx->cpu); 836 kcpu = this_cpu_ptr(ctx->cpu);
836 837
838 local_irq_save(flags);
837 kcpu->reqs_available += nr; 839 kcpu->reqs_available += nr;
840
838 while (kcpu->reqs_available >= ctx->req_batch * 2) { 841 while (kcpu->reqs_available >= ctx->req_batch * 2) {
839 kcpu->reqs_available -= ctx->req_batch; 842 kcpu->reqs_available -= ctx->req_batch;
840 atomic_add(ctx->req_batch, &ctx->reqs_available); 843 atomic_add(ctx->req_batch, &ctx->reqs_available);
841 } 844 }
842 845
846 local_irq_restore(flags);
843 preempt_enable(); 847 preempt_enable();
844} 848}
845 849
@@ -847,10 +851,12 @@ static bool get_reqs_available(struct kioctx *ctx)
847{ 851{
848 struct kioctx_cpu *kcpu; 852 struct kioctx_cpu *kcpu;
849 bool ret = false; 853 bool ret = false;
854 unsigned long flags;
850 855
851 preempt_disable(); 856 preempt_disable();
852 kcpu = this_cpu_ptr(ctx->cpu); 857 kcpu = this_cpu_ptr(ctx->cpu);
853 858
859 local_irq_save(flags);
854 if (!kcpu->reqs_available) { 860 if (!kcpu->reqs_available) {
855 int old, avail = atomic_read(&ctx->reqs_available); 861 int old, avail = atomic_read(&ctx->reqs_available);
856 862
@@ -869,6 +875,7 @@ static bool get_reqs_available(struct kioctx *ctx)
869 ret = true; 875 ret = true;
870 kcpu->reqs_available--; 876 kcpu->reqs_available--;
871out: 877out:
878 local_irq_restore(flags);
872 preempt_enable(); 879 preempt_enable();
873 return ret; 880 return ret;
874} 881}
@@ -1021,6 +1028,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1021 1028
1022 /* everything turned out well, dispose of the aiocb. */ 1029 /* everything turned out well, dispose of the aiocb. */
1023 kiocb_free(iocb); 1030 kiocb_free(iocb);
1031 put_reqs_available(ctx, 1);
1024 1032
1025 /* 1033 /*
1026 * We have to order our ring_info tail store above and test 1034 * We have to order our ring_info tail store above and test
@@ -1062,6 +1070,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
1062 if (head == tail) 1070 if (head == tail)
1063 goto out; 1071 goto out;
1064 1072
1073 head %= ctx->nr_events;
1074 tail %= ctx->nr_events;
1075
1065 while (ret < nr) { 1076 while (ret < nr) {
1066 long avail; 1077 long avail;
1067 struct io_event *ev; 1078 struct io_event *ev;
@@ -1100,8 +1111,6 @@ static long aio_read_events_ring(struct kioctx *ctx,
1100 flush_dcache_page(ctx->ring_pages[0]); 1111 flush_dcache_page(ctx->ring_pages[0]);
1101 1112
1102 pr_debug("%li h%u t%u\n", ret, head, tail); 1113 pr_debug("%li h%u t%u\n", ret, head, tail);
1103
1104 put_reqs_available(ctx, ret);
1105out: 1114out:
1106 mutex_unlock(&ctx->ring_lock); 1115 mutex_unlock(&ctx->ring_lock);
1107 1116
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d7bd395ab586..1c55388ae633 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -210,7 +210,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
210 int pipefd; 210 int pipefd;
211 struct autofs_sb_info *sbi; 211 struct autofs_sb_info *sbi;
212 struct autofs_info *ino; 212 struct autofs_info *ino;
213 int pgrp; 213 int pgrp = 0;
214 bool pgrp_set = false; 214 bool pgrp_set = false;
215 int ret = -EINVAL; 215 int ret = -EINVAL;
216 216
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 92371c414228..1daea0b47187 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -821,7 +821,7 @@ static void free_workspace(int type, struct list_head *workspace)
821 821
822 spin_lock(workspace_lock); 822 spin_lock(workspace_lock);
823 if (*num_workspace < num_online_cpus()) { 823 if (*num_workspace < num_online_cpus()) {
824 list_add_tail(workspace, idle_workspace); 824 list_add(workspace, idle_workspace);
825 (*num_workspace)++; 825 (*num_workspace)++;
826 spin_unlock(workspace_lock); 826 spin_unlock(workspace_lock);
827 goto wake; 827 goto wake;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b7e2c1c1ef36..be91397f4e92 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1259,11 +1259,19 @@ struct btrfs_block_group_cache {
1259 spinlock_t lock; 1259 spinlock_t lock;
1260 u64 pinned; 1260 u64 pinned;
1261 u64 reserved; 1261 u64 reserved;
1262 u64 delalloc_bytes;
1262 u64 bytes_super; 1263 u64 bytes_super;
1263 u64 flags; 1264 u64 flags;
1264 u64 sectorsize; 1265 u64 sectorsize;
1265 u64 cache_generation; 1266 u64 cache_generation;
1266 1267
1268 /*
1269 * It is just used for the delayed data space allocation because
1270 * only the data space allocation and the relative metadata update
1271 * can be done cross the transaction.
1272 */
1273 struct rw_semaphore data_rwsem;
1274
1267 /* for raid56, this is a full stripe, without parity */ 1275 /* for raid56, this is a full stripe, without parity */
1268 unsigned long full_stripe_len; 1276 unsigned long full_stripe_len;
1269 1277
@@ -3316,7 +3324,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
3316 struct btrfs_key *ins); 3324 struct btrfs_key *ins);
3317int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, 3325int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
3318 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 3326 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
3319 struct btrfs_key *ins, int is_data); 3327 struct btrfs_key *ins, int is_data, int delalloc);
3320int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3328int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3321 struct extent_buffer *buf, int full_backref, int no_quota); 3329 struct extent_buffer *buf, int full_backref, int no_quota);
3322int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3330int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -3330,7 +3338,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
3330 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 3338 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
3331 u64 owner, u64 offset, int no_quota); 3339 u64 owner, u64 offset, int no_quota);
3332 3340
3333int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 3341int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len,
3342 int delalloc);
3334int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, 3343int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
3335 u64 start, u64 len); 3344 u64 start, u64 len);
3336void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 3345void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 2af6e66fe788..eea26e1b2fda 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -36,6 +36,7 @@
36#include "check-integrity.h" 36#include "check-integrity.h"
37#include "rcu-string.h" 37#include "rcu-string.h"
38#include "dev-replace.h" 38#include "dev-replace.h"
39#include "sysfs.h"
39 40
40static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, 41static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
41 int scrub_ret); 42 int scrub_ret);
@@ -562,6 +563,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
562 fs_info->fs_devices->latest_bdev = tgt_device->bdev; 563 fs_info->fs_devices->latest_bdev = tgt_device->bdev;
563 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); 564 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
564 565
566 /* replace the sysfs entry */
567 btrfs_kobj_rm_device(fs_info, src_device);
568 btrfs_kobj_add_device(fs_info, tgt_device);
569
565 btrfs_rm_dev_replace_blocked(fs_info); 570 btrfs_rm_dev_replace_blocked(fs_info);
566 571
567 btrfs_rm_dev_replace_srcdev(fs_info, src_device); 572 btrfs_rm_dev_replace_srcdev(fs_info, src_device);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8bb4aa19898f..08e65e9cf2aa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -369,7 +369,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
369out: 369out:
370 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, 370 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
371 &cached_state, GFP_NOFS); 371 &cached_state, GFP_NOFS);
372 btrfs_tree_read_unlock_blocking(eb); 372 if (need_lock)
373 btrfs_tree_read_unlock_blocking(eb);
373 return ret; 374 return ret;
374} 375}
375 376
@@ -2904,7 +2905,9 @@ retry_root_backup:
2904 if (ret) 2905 if (ret)
2905 goto fail_qgroup; 2906 goto fail_qgroup;
2906 2907
2908 mutex_lock(&fs_info->cleaner_mutex);
2907 ret = btrfs_recover_relocation(tree_root); 2909 ret = btrfs_recover_relocation(tree_root);
2910 mutex_unlock(&fs_info->cleaner_mutex);
2908 if (ret < 0) { 2911 if (ret < 0) {
2909 printk(KERN_WARNING 2912 printk(KERN_WARNING
2910 "BTRFS: failed to recover relocation\n"); 2913 "BTRFS: failed to recover relocation\n");
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fafb3e53ecde..813537f362f9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -105,7 +105,8 @@ static int find_next_key(struct btrfs_path *path, int level,
105static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 105static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
106 int dump_block_groups); 106 int dump_block_groups);
107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 107static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
108 u64 num_bytes, int reserve); 108 u64 num_bytes, int reserve,
109 int delalloc);
109static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 110static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
110 u64 num_bytes); 111 u64 num_bytes);
111int btrfs_pin_extent(struct btrfs_root *root, 112int btrfs_pin_extent(struct btrfs_root *root,
@@ -3260,7 +3261,8 @@ again:
3260 3261
3261 spin_lock(&block_group->lock); 3262 spin_lock(&block_group->lock);
3262 if (block_group->cached != BTRFS_CACHE_FINISHED || 3263 if (block_group->cached != BTRFS_CACHE_FINISHED ||
3263 !btrfs_test_opt(root, SPACE_CACHE)) { 3264 !btrfs_test_opt(root, SPACE_CACHE) ||
3265 block_group->delalloc_bytes) {
3264 /* 3266 /*
3265 * don't bother trying to write stuff out _if_ 3267 * don't bother trying to write stuff out _if_
3266 * a) we're not cached, 3268 * a) we're not cached,
@@ -5613,6 +5615,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
5613 * @cache: The cache we are manipulating 5615 * @cache: The cache we are manipulating
5614 * @num_bytes: The number of bytes in question 5616 * @num_bytes: The number of bytes in question
5615 * @reserve: One of the reservation enums 5617 * @reserve: One of the reservation enums
5618 * @delalloc: The blocks are allocated for the delalloc write
5616 * 5619 *
5617 * This is called by the allocator when it reserves space, or by somebody who is 5620 * This is called by the allocator when it reserves space, or by somebody who is
5618 * freeing space that was never actually used on disk. For example if you 5621 * freeing space that was never actually used on disk. For example if you
@@ -5631,7 +5634,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
5631 * succeeds. 5634 * succeeds.
5632 */ 5635 */
5633static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 5636static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5634 u64 num_bytes, int reserve) 5637 u64 num_bytes, int reserve, int delalloc)
5635{ 5638{
5636 struct btrfs_space_info *space_info = cache->space_info; 5639 struct btrfs_space_info *space_info = cache->space_info;
5637 int ret = 0; 5640 int ret = 0;
@@ -5650,12 +5653,18 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5650 num_bytes, 0); 5653 num_bytes, 0);
5651 space_info->bytes_may_use -= num_bytes; 5654 space_info->bytes_may_use -= num_bytes;
5652 } 5655 }
5656
5657 if (delalloc)
5658 cache->delalloc_bytes += num_bytes;
5653 } 5659 }
5654 } else { 5660 } else {
5655 if (cache->ro) 5661 if (cache->ro)
5656 space_info->bytes_readonly += num_bytes; 5662 space_info->bytes_readonly += num_bytes;
5657 cache->reserved -= num_bytes; 5663 cache->reserved -= num_bytes;
5658 space_info->bytes_reserved -= num_bytes; 5664 space_info->bytes_reserved -= num_bytes;
5665
5666 if (delalloc)
5667 cache->delalloc_bytes -= num_bytes;
5659 } 5668 }
5660 spin_unlock(&cache->lock); 5669 spin_unlock(&cache->lock);
5661 spin_unlock(&space_info->lock); 5670 spin_unlock(&space_info->lock);
@@ -5669,7 +5678,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
5669 struct btrfs_caching_control *next; 5678 struct btrfs_caching_control *next;
5670 struct btrfs_caching_control *caching_ctl; 5679 struct btrfs_caching_control *caching_ctl;
5671 struct btrfs_block_group_cache *cache; 5680 struct btrfs_block_group_cache *cache;
5672 struct btrfs_space_info *space_info;
5673 5681
5674 down_write(&fs_info->commit_root_sem); 5682 down_write(&fs_info->commit_root_sem);
5675 5683
@@ -5692,9 +5700,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
5692 5700
5693 up_write(&fs_info->commit_root_sem); 5701 up_write(&fs_info->commit_root_sem);
5694 5702
5695 list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
5696 percpu_counter_set(&space_info->total_bytes_pinned, 0);
5697
5698 update_global_block_rsv(fs_info); 5703 update_global_block_rsv(fs_info);
5699} 5704}
5700 5705
@@ -5732,6 +5737,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
5732 spin_lock(&cache->lock); 5737 spin_lock(&cache->lock);
5733 cache->pinned -= len; 5738 cache->pinned -= len;
5734 space_info->bytes_pinned -= len; 5739 space_info->bytes_pinned -= len;
5740 percpu_counter_add(&space_info->total_bytes_pinned, -len);
5735 if (cache->ro) { 5741 if (cache->ro) {
5736 space_info->bytes_readonly += len; 5742 space_info->bytes_readonly += len;
5737 readonly = true; 5743 readonly = true;
@@ -6206,7 +6212,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
6206 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 6212 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
6207 6213
6208 btrfs_add_free_space(cache, buf->start, buf->len); 6214 btrfs_add_free_space(cache, buf->start, buf->len);
6209 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); 6215 btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
6210 trace_btrfs_reserved_extent_free(root, buf->start, buf->len); 6216 trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
6211 pin = 0; 6217 pin = 0;
6212 } 6218 }
@@ -6365,6 +6371,70 @@ enum btrfs_loop_type {
6365 LOOP_NO_EMPTY_SIZE = 3, 6371 LOOP_NO_EMPTY_SIZE = 3,
6366}; 6372};
6367 6373
6374static inline void
6375btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
6376 int delalloc)
6377{
6378 if (delalloc)
6379 down_read(&cache->data_rwsem);
6380}
6381
6382static inline void
6383btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
6384 int delalloc)
6385{
6386 btrfs_get_block_group(cache);
6387 if (delalloc)
6388 down_read(&cache->data_rwsem);
6389}
6390
6391static struct btrfs_block_group_cache *
6392btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
6393 struct btrfs_free_cluster *cluster,
6394 int delalloc)
6395{
6396 struct btrfs_block_group_cache *used_bg;
6397 bool locked = false;
6398again:
6399 spin_lock(&cluster->refill_lock);
6400 if (locked) {
6401 if (used_bg == cluster->block_group)
6402 return used_bg;
6403
6404 up_read(&used_bg->data_rwsem);
6405 btrfs_put_block_group(used_bg);
6406 }
6407
6408 used_bg = cluster->block_group;
6409 if (!used_bg)
6410 return NULL;
6411
6412 if (used_bg == block_group)
6413 return used_bg;
6414
6415 btrfs_get_block_group(used_bg);
6416
6417 if (!delalloc)
6418 return used_bg;
6419
6420 if (down_read_trylock(&used_bg->data_rwsem))
6421 return used_bg;
6422
6423 spin_unlock(&cluster->refill_lock);
6424 down_read(&used_bg->data_rwsem);
6425 locked = true;
6426 goto again;
6427}
6428
6429static inline void
6430btrfs_release_block_group(struct btrfs_block_group_cache *cache,
6431 int delalloc)
6432{
6433 if (delalloc)
6434 up_read(&cache->data_rwsem);
6435 btrfs_put_block_group(cache);
6436}
6437
6368/* 6438/*
6369 * walks the btree of allocated extents and find a hole of a given size. 6439 * walks the btree of allocated extents and find a hole of a given size.
6370 * The key ins is changed to record the hole: 6440 * The key ins is changed to record the hole:
@@ -6379,7 +6449,7 @@ enum btrfs_loop_type {
6379static noinline int find_free_extent(struct btrfs_root *orig_root, 6449static noinline int find_free_extent(struct btrfs_root *orig_root,
6380 u64 num_bytes, u64 empty_size, 6450 u64 num_bytes, u64 empty_size,
6381 u64 hint_byte, struct btrfs_key *ins, 6451 u64 hint_byte, struct btrfs_key *ins,
6382 u64 flags) 6452 u64 flags, int delalloc)
6383{ 6453{
6384 int ret = 0; 6454 int ret = 0;
6385 struct btrfs_root *root = orig_root->fs_info->extent_root; 6455 struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -6467,6 +6537,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6467 up_read(&space_info->groups_sem); 6537 up_read(&space_info->groups_sem);
6468 } else { 6538 } else {
6469 index = get_block_group_index(block_group); 6539 index = get_block_group_index(block_group);
6540 btrfs_lock_block_group(block_group, delalloc);
6470 goto have_block_group; 6541 goto have_block_group;
6471 } 6542 }
6472 } else if (block_group) { 6543 } else if (block_group) {
@@ -6481,7 +6552,7 @@ search:
6481 u64 offset; 6552 u64 offset;
6482 int cached; 6553 int cached;
6483 6554
6484 btrfs_get_block_group(block_group); 6555 btrfs_grab_block_group(block_group, delalloc);
6485 search_start = block_group->key.objectid; 6556 search_start = block_group->key.objectid;
6486 6557
6487 /* 6558 /*
@@ -6529,16 +6600,16 @@ have_block_group:
6529 * the refill lock keeps out other 6600 * the refill lock keeps out other
6530 * people trying to start a new cluster 6601 * people trying to start a new cluster
6531 */ 6602 */
6532 spin_lock(&last_ptr->refill_lock); 6603 used_block_group = btrfs_lock_cluster(block_group,
6533 used_block_group = last_ptr->block_group; 6604 last_ptr,
6534 if (used_block_group != block_group && 6605 delalloc);
6535 (!used_block_group || 6606 if (!used_block_group)
6536 used_block_group->ro ||
6537 !block_group_bits(used_block_group, flags)))
6538 goto refill_cluster; 6607 goto refill_cluster;
6539 6608
6540 if (used_block_group != block_group) 6609 if (used_block_group != block_group &&
6541 btrfs_get_block_group(used_block_group); 6610 (used_block_group->ro ||
6611 !block_group_bits(used_block_group, flags)))
6612 goto release_cluster;
6542 6613
6543 offset = btrfs_alloc_from_cluster(used_block_group, 6614 offset = btrfs_alloc_from_cluster(used_block_group,
6544 last_ptr, 6615 last_ptr,
@@ -6552,16 +6623,15 @@ have_block_group:
6552 used_block_group, 6623 used_block_group,
6553 search_start, num_bytes); 6624 search_start, num_bytes);
6554 if (used_block_group != block_group) { 6625 if (used_block_group != block_group) {
6555 btrfs_put_block_group(block_group); 6626 btrfs_release_block_group(block_group,
6627 delalloc);
6556 block_group = used_block_group; 6628 block_group = used_block_group;
6557 } 6629 }
6558 goto checks; 6630 goto checks;
6559 } 6631 }
6560 6632
6561 WARN_ON(last_ptr->block_group != used_block_group); 6633 WARN_ON(last_ptr->block_group != used_block_group);
6562 if (used_block_group != block_group) 6634release_cluster:
6563 btrfs_put_block_group(used_block_group);
6564refill_cluster:
6565 /* If we are on LOOP_NO_EMPTY_SIZE, we can't 6635 /* If we are on LOOP_NO_EMPTY_SIZE, we can't
6566 * set up a new clusters, so lets just skip it 6636 * set up a new clusters, so lets just skip it
6567 * and let the allocator find whatever block 6637 * and let the allocator find whatever block
@@ -6578,8 +6648,10 @@ refill_cluster:
6578 * succeeding in the unclustered 6648 * succeeding in the unclustered
6579 * allocation. */ 6649 * allocation. */
6580 if (loop >= LOOP_NO_EMPTY_SIZE && 6650 if (loop >= LOOP_NO_EMPTY_SIZE &&
6581 last_ptr->block_group != block_group) { 6651 used_block_group != block_group) {
6582 spin_unlock(&last_ptr->refill_lock); 6652 spin_unlock(&last_ptr->refill_lock);
6653 btrfs_release_block_group(used_block_group,
6654 delalloc);
6583 goto unclustered_alloc; 6655 goto unclustered_alloc;
6584 } 6656 }
6585 6657
@@ -6589,6 +6661,10 @@ refill_cluster:
6589 */ 6661 */
6590 btrfs_return_cluster_to_free_space(NULL, last_ptr); 6662 btrfs_return_cluster_to_free_space(NULL, last_ptr);
6591 6663
6664 if (used_block_group != block_group)
6665 btrfs_release_block_group(used_block_group,
6666 delalloc);
6667refill_cluster:
6592 if (loop >= LOOP_NO_EMPTY_SIZE) { 6668 if (loop >= LOOP_NO_EMPTY_SIZE) {
6593 spin_unlock(&last_ptr->refill_lock); 6669 spin_unlock(&last_ptr->refill_lock);
6594 goto unclustered_alloc; 6670 goto unclustered_alloc;
@@ -6696,7 +6772,7 @@ checks:
6696 BUG_ON(offset > search_start); 6772 BUG_ON(offset > search_start);
6697 6773
6698 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 6774 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
6699 alloc_type); 6775 alloc_type, delalloc);
6700 if (ret == -EAGAIN) { 6776 if (ret == -EAGAIN) {
6701 btrfs_add_free_space(block_group, offset, num_bytes); 6777 btrfs_add_free_space(block_group, offset, num_bytes);
6702 goto loop; 6778 goto loop;
@@ -6708,13 +6784,13 @@ checks:
6708 6784
6709 trace_btrfs_reserve_extent(orig_root, block_group, 6785 trace_btrfs_reserve_extent(orig_root, block_group,
6710 search_start, num_bytes); 6786 search_start, num_bytes);
6711 btrfs_put_block_group(block_group); 6787 btrfs_release_block_group(block_group, delalloc);
6712 break; 6788 break;
6713loop: 6789loop:
6714 failed_cluster_refill = false; 6790 failed_cluster_refill = false;
6715 failed_alloc = false; 6791 failed_alloc = false;
6716 BUG_ON(index != get_block_group_index(block_group)); 6792 BUG_ON(index != get_block_group_index(block_group));
6717 btrfs_put_block_group(block_group); 6793 btrfs_release_block_group(block_group, delalloc);
6718 } 6794 }
6719 up_read(&space_info->groups_sem); 6795 up_read(&space_info->groups_sem);
6720 6796
@@ -6827,7 +6903,7 @@ again:
6827int btrfs_reserve_extent(struct btrfs_root *root, 6903int btrfs_reserve_extent(struct btrfs_root *root,
6828 u64 num_bytes, u64 min_alloc_size, 6904 u64 num_bytes, u64 min_alloc_size,
6829 u64 empty_size, u64 hint_byte, 6905 u64 empty_size, u64 hint_byte,
6830 struct btrfs_key *ins, int is_data) 6906 struct btrfs_key *ins, int is_data, int delalloc)
6831{ 6907{
6832 bool final_tried = false; 6908 bool final_tried = false;
6833 u64 flags; 6909 u64 flags;
@@ -6837,7 +6913,7 @@ int btrfs_reserve_extent(struct btrfs_root *root,
6837again: 6913again:
6838 WARN_ON(num_bytes < root->sectorsize); 6914 WARN_ON(num_bytes < root->sectorsize);
6839 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, 6915 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
6840 flags); 6916 flags, delalloc);
6841 6917
6842 if (ret == -ENOSPC) { 6918 if (ret == -ENOSPC) {
6843 if (!final_tried && ins->offset) { 6919 if (!final_tried && ins->offset) {
@@ -6862,7 +6938,8 @@ again:
6862} 6938}
6863 6939
6864static int __btrfs_free_reserved_extent(struct btrfs_root *root, 6940static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6865 u64 start, u64 len, int pin) 6941 u64 start, u64 len,
6942 int pin, int delalloc)
6866{ 6943{
6867 struct btrfs_block_group_cache *cache; 6944 struct btrfs_block_group_cache *cache;
6868 int ret = 0; 6945 int ret = 0;
@@ -6881,7 +6958,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6881 pin_down_extent(root, cache, start, len, 1); 6958 pin_down_extent(root, cache, start, len, 1);
6882 else { 6959 else {
6883 btrfs_add_free_space(cache, start, len); 6960 btrfs_add_free_space(cache, start, len);
6884 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); 6961 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
6885 } 6962 }
6886 btrfs_put_block_group(cache); 6963 btrfs_put_block_group(cache);
6887 6964
@@ -6891,15 +6968,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6891} 6968}
6892 6969
6893int btrfs_free_reserved_extent(struct btrfs_root *root, 6970int btrfs_free_reserved_extent(struct btrfs_root *root,
6894 u64 start, u64 len) 6971 u64 start, u64 len, int delalloc)
6895{ 6972{
6896 return __btrfs_free_reserved_extent(root, start, len, 0); 6973 return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
6897} 6974}
6898 6975
6899int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, 6976int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
6900 u64 start, u64 len) 6977 u64 start, u64 len)
6901{ 6978{
6902 return __btrfs_free_reserved_extent(root, start, len, 1); 6979 return __btrfs_free_reserved_extent(root, start, len, 1, 0);
6903} 6980}
6904 6981
6905static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 6982static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -7114,7 +7191,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
7114 return -EINVAL; 7191 return -EINVAL;
7115 7192
7116 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 7193 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
7117 RESERVE_ALLOC_NO_ACCOUNT); 7194 RESERVE_ALLOC_NO_ACCOUNT, 0);
7118 BUG_ON(ret); /* logic error */ 7195 BUG_ON(ret); /* logic error */
7119 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 7196 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
7120 0, owner, offset, ins, 1); 7197 0, owner, offset, ins, 1);
@@ -7256,7 +7333,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
7256 return ERR_CAST(block_rsv); 7333 return ERR_CAST(block_rsv);
7257 7334
7258 ret = btrfs_reserve_extent(root, blocksize, blocksize, 7335 ret = btrfs_reserve_extent(root, blocksize, blocksize,
7259 empty_size, hint, &ins, 0); 7336 empty_size, hint, &ins, 0, 0);
7260 if (ret) { 7337 if (ret) {
7261 unuse_block_rsv(root->fs_info, block_rsv, blocksize); 7338 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
7262 return ERR_PTR(ret); 7339 return ERR_PTR(ret);
@@ -8659,6 +8736,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
8659 start); 8736 start);
8660 atomic_set(&cache->count, 1); 8737 atomic_set(&cache->count, 1);
8661 spin_lock_init(&cache->lock); 8738 spin_lock_init(&cache->lock);
8739 init_rwsem(&cache->data_rwsem);
8662 INIT_LIST_HEAD(&cache->list); 8740 INIT_LIST_HEAD(&cache->list);
8663 INIT_LIST_HEAD(&cache->cluster_list); 8741 INIT_LIST_HEAD(&cache->cluster_list);
8664 INIT_LIST_HEAD(&cache->new_bg_list); 8742 INIT_LIST_HEAD(&cache->new_bg_list);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 15ce5f2a2b62..ccc264e7bde1 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -158,7 +158,6 @@ struct extent_buffer {
158 * to unlock 158 * to unlock
159 */ 159 */
160 wait_queue_head_t read_lock_wq; 160 wait_queue_head_t read_lock_wq;
161 wait_queue_head_t lock_wq;
162 struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; 161 struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
163#ifdef CONFIG_BTRFS_DEBUG 162#ifdef CONFIG_BTRFS_DEBUG
164 struct list_head leak_list; 163 struct list_head leak_list;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 1874aee69c86..225302b39afb 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -75,6 +75,8 @@ void free_extent_map(struct extent_map *em)
75 if (atomic_dec_and_test(&em->refs)) { 75 if (atomic_dec_and_test(&em->refs)) {
76 WARN_ON(extent_map_in_tree(em)); 76 WARN_ON(extent_map_in_tree(em));
77 WARN_ON(!list_empty(&em->list)); 77 WARN_ON(!list_empty(&em->list));
78 if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
79 kfree(em->bdev);
78 kmem_cache_free(extent_map_cache, em); 80 kmem_cache_free(extent_map_cache, em);
79 } 81 }
80} 82}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index e7fd8a56a140..b2991fd8583e 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -15,6 +15,7 @@
15#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ 15#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
16#define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ 16#define EXTENT_FLAG_LOGGING 4 /* Logging this extent */
17#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */ 17#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */
18#define EXTENT_FLAG_FS_MAPPING 6 /* filesystem extent mapping type */
18 19
19struct extent_map { 20struct extent_map {
20 struct rb_node rb_node; 21 struct rb_node rb_node;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 372b05ff1943..2b0a627cb5f9 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -274,18 +274,32 @@ struct io_ctl {
274}; 274};
275 275
276static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, 276static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
277 struct btrfs_root *root) 277 struct btrfs_root *root, int write)
278{ 278{
279 int num_pages;
280 int check_crcs = 0;
281
282 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
283 PAGE_CACHE_SHIFT;
284
285 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
286 check_crcs = 1;
287
288 /* Make sure we can fit our crcs into the first page */
289 if (write && check_crcs &&
290 (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
291 return -ENOSPC;
292
279 memset(io_ctl, 0, sizeof(struct io_ctl)); 293 memset(io_ctl, 0, sizeof(struct io_ctl));
280 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 294
281 PAGE_CACHE_SHIFT; 295 io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
282 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
283 GFP_NOFS);
284 if (!io_ctl->pages) 296 if (!io_ctl->pages)
285 return -ENOMEM; 297 return -ENOMEM;
298
299 io_ctl->num_pages = num_pages;
286 io_ctl->root = root; 300 io_ctl->root = root;
287 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 301 io_ctl->check_crcs = check_crcs;
288 io_ctl->check_crcs = 1; 302
289 return 0; 303 return 0;
290} 304}
291 305
@@ -666,6 +680,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
666 generation = btrfs_free_space_generation(leaf, header); 680 generation = btrfs_free_space_generation(leaf, header);
667 btrfs_release_path(path); 681 btrfs_release_path(path);
668 682
683 if (!BTRFS_I(inode)->generation) {
684 btrfs_info(root->fs_info,
685 "The free space cache file (%llu) is invalid. skip it\n",
686 offset);
687 return 0;
688 }
689
669 if (BTRFS_I(inode)->generation != generation) { 690 if (BTRFS_I(inode)->generation != generation) {
670 btrfs_err(root->fs_info, 691 btrfs_err(root->fs_info,
671 "free space inode generation (%llu) " 692 "free space inode generation (%llu) "
@@ -677,7 +698,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
677 if (!num_entries) 698 if (!num_entries)
678 return 0; 699 return 0;
679 700
680 ret = io_ctl_init(&io_ctl, inode, root); 701 ret = io_ctl_init(&io_ctl, inode, root, 0);
681 if (ret) 702 if (ret)
682 return ret; 703 return ret;
683 704
@@ -957,19 +978,18 @@ fail:
957} 978}
958 979
959static noinline_for_stack int 980static noinline_for_stack int
960add_ioctl_entries(struct btrfs_root *root, 981write_pinned_extent_entries(struct btrfs_root *root,
961 struct inode *inode, 982 struct btrfs_block_group_cache *block_group,
962 struct btrfs_block_group_cache *block_group, 983 struct io_ctl *io_ctl,
963 struct io_ctl *io_ctl, 984 int *entries)
964 struct extent_state **cached_state,
965 struct list_head *bitmap_list,
966 int *entries)
967{ 985{
968 u64 start, extent_start, extent_end, len; 986 u64 start, extent_start, extent_end, len;
969 struct list_head *pos, *n;
970 struct extent_io_tree *unpin = NULL; 987 struct extent_io_tree *unpin = NULL;
971 int ret; 988 int ret;
972 989
990 if (!block_group)
991 return 0;
992
973 /* 993 /*
974 * We want to add any pinned extents to our free space cache 994 * We want to add any pinned extents to our free space cache
975 * so we don't leak the space 995 * so we don't leak the space
@@ -979,23 +999,19 @@ add_ioctl_entries(struct btrfs_root *root,
979 */ 999 */
980 unpin = root->fs_info->pinned_extents; 1000 unpin = root->fs_info->pinned_extents;
981 1001
982 if (block_group) 1002 start = block_group->key.objectid;
983 start = block_group->key.objectid;
984 1003
985 while (block_group && (start < block_group->key.objectid + 1004 while (start < block_group->key.objectid + block_group->key.offset) {
986 block_group->key.offset)) {
987 ret = find_first_extent_bit(unpin, start, 1005 ret = find_first_extent_bit(unpin, start,
988 &extent_start, &extent_end, 1006 &extent_start, &extent_end,
989 EXTENT_DIRTY, NULL); 1007 EXTENT_DIRTY, NULL);
990 if (ret) { 1008 if (ret)
991 ret = 0; 1009 return 0;
992 break;
993 }
994 1010
995 /* This pinned extent is out of our range */ 1011 /* This pinned extent is out of our range */
996 if (extent_start >= block_group->key.objectid + 1012 if (extent_start >= block_group->key.objectid +
997 block_group->key.offset) 1013 block_group->key.offset)
998 break; 1014 return 0;
999 1015
1000 extent_start = max(extent_start, start); 1016 extent_start = max(extent_start, start);
1001 extent_end = min(block_group->key.objectid + 1017 extent_end = min(block_group->key.objectid +
@@ -1005,11 +1021,20 @@ add_ioctl_entries(struct btrfs_root *root,
1005 *entries += 1; 1021 *entries += 1;
1006 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1022 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
1007 if (ret) 1023 if (ret)
1008 goto out_nospc; 1024 return -ENOSPC;
1009 1025
1010 start = extent_end; 1026 start = extent_end;
1011 } 1027 }
1012 1028
1029 return 0;
1030}
1031
1032static noinline_for_stack int
1033write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list)
1034{
1035 struct list_head *pos, *n;
1036 int ret;
1037
1013 /* Write out the bitmaps */ 1038 /* Write out the bitmaps */
1014 list_for_each_safe(pos, n, bitmap_list) { 1039 list_for_each_safe(pos, n, bitmap_list) {
1015 struct btrfs_free_space *entry = 1040 struct btrfs_free_space *entry =
@@ -1017,36 +1042,24 @@ add_ioctl_entries(struct btrfs_root *root,
1017 1042
1018 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1043 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
1019 if (ret) 1044 if (ret)
1020 goto out_nospc; 1045 return -ENOSPC;
1021 list_del_init(&entry->list); 1046 list_del_init(&entry->list);
1022 } 1047 }
1023 1048
1024 /* Zero out the rest of the pages just to make sure */ 1049 return 0;
1025 io_ctl_zero_remaining_pages(io_ctl); 1050}
1026
1027 ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages,
1028 0, i_size_read(inode), cached_state);
1029 io_ctl_drop_pages(io_ctl);
1030 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1031 i_size_read(inode) - 1, cached_state, GFP_NOFS);
1032 1051
1033 if (ret) 1052static int flush_dirty_cache(struct inode *inode)
1034 goto fail; 1053{
1054 int ret;
1035 1055
1036 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1056 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
1037 if (ret) { 1057 if (ret)
1038 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1058 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1039 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1059 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
1040 GFP_NOFS); 1060 GFP_NOFS);
1041 goto fail;
1042 }
1043 return 0;
1044 1061
1045fail: 1062 return ret;
1046 return -1;
1047
1048out_nospc:
1049 return -ENOSPC;
1050} 1063}
1051 1064
1052static void noinline_for_stack 1065static void noinline_for_stack
@@ -1056,6 +1069,7 @@ cleanup_write_cache_enospc(struct inode *inode,
1056 struct list_head *bitmap_list) 1069 struct list_head *bitmap_list)
1057{ 1070{
1058 struct list_head *pos, *n; 1071 struct list_head *pos, *n;
1072
1059 list_for_each_safe(pos, n, bitmap_list) { 1073 list_for_each_safe(pos, n, bitmap_list) {
1060 struct btrfs_free_space *entry = 1074 struct btrfs_free_space *entry =
1061 list_entry(pos, struct btrfs_free_space, list); 1075 list_entry(pos, struct btrfs_free_space, list);
@@ -1088,64 +1102,104 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1088{ 1102{
1089 struct extent_state *cached_state = NULL; 1103 struct extent_state *cached_state = NULL;
1090 struct io_ctl io_ctl; 1104 struct io_ctl io_ctl;
1091 struct list_head bitmap_list; 1105 LIST_HEAD(bitmap_list);
1092 int entries = 0; 1106 int entries = 0;
1093 int bitmaps = 0; 1107 int bitmaps = 0;
1094 int ret; 1108 int ret;
1095 int err = -1;
1096
1097 INIT_LIST_HEAD(&bitmap_list);
1098 1109
1099 if (!i_size_read(inode)) 1110 if (!i_size_read(inode))
1100 return -1; 1111 return -1;
1101 1112
1102 ret = io_ctl_init(&io_ctl, inode, root); 1113 ret = io_ctl_init(&io_ctl, inode, root, 1);
1103 if (ret) 1114 if (ret)
1104 return -1; 1115 return -1;
1105 1116
1117 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1118 down_write(&block_group->data_rwsem);
1119 spin_lock(&block_group->lock);
1120 if (block_group->delalloc_bytes) {
1121 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1122 spin_unlock(&block_group->lock);
1123 up_write(&block_group->data_rwsem);
1124 BTRFS_I(inode)->generation = 0;
1125 ret = 0;
1126 goto out;
1127 }
1128 spin_unlock(&block_group->lock);
1129 }
1130
1106 /* Lock all pages first so we can lock the extent safely. */ 1131 /* Lock all pages first so we can lock the extent safely. */
1107 io_ctl_prepare_pages(&io_ctl, inode, 0); 1132 io_ctl_prepare_pages(&io_ctl, inode, 0);
1108 1133
1109 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1134 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
1110 0, &cached_state); 1135 0, &cached_state);
1111 1136
1112
1113 /* Make sure we can fit our crcs into the first page */
1114 if (io_ctl.check_crcs &&
1115 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
1116 goto out_nospc;
1117
1118 io_ctl_set_generation(&io_ctl, trans->transid); 1137 io_ctl_set_generation(&io_ctl, trans->transid);
1119 1138
1139 /* Write out the extent entries in the free space cache */
1120 ret = write_cache_extent_entries(&io_ctl, ctl, 1140 ret = write_cache_extent_entries(&io_ctl, ctl,
1121 block_group, &entries, &bitmaps, 1141 block_group, &entries, &bitmaps,
1122 &bitmap_list); 1142 &bitmap_list);
1123 if (ret) 1143 if (ret)
1124 goto out_nospc; 1144 goto out_nospc;
1125 1145
1126 ret = add_ioctl_entries(root, inode, block_group, &io_ctl, 1146 /*
1127 &cached_state, &bitmap_list, &entries); 1147 * Some spaces that are freed in the current transaction are pinned,
1148 * they will be added into free space cache after the transaction is
1149 * committed, we shouldn't lose them.
1150 */
1151 ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries);
1152 if (ret)
1153 goto out_nospc;
1128 1154
1129 if (ret == -ENOSPC) 1155 /* At last, we write out all the bitmaps. */
1156 ret = write_bitmap_entries(&io_ctl, &bitmap_list);
1157 if (ret)
1130 goto out_nospc; 1158 goto out_nospc;
1131 else if (ret) 1159
1160 /* Zero out the rest of the pages just to make sure */
1161 io_ctl_zero_remaining_pages(&io_ctl);
1162
1163 /* Everything is written out, now we dirty the pages in the file. */
1164 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
1165 0, i_size_read(inode), &cached_state);
1166 if (ret)
1167 goto out_nospc;
1168
1169 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1170 up_write(&block_group->data_rwsem);
1171 /*
1172 * Release the pages and unlock the extent, we will flush
1173 * them out later
1174 */
1175 io_ctl_drop_pages(&io_ctl);
1176
1177 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1178 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1179
1180 /* Flush the dirty pages in the cache file. */
1181 ret = flush_dirty_cache(inode);
1182 if (ret)
1132 goto out; 1183 goto out;
1133 1184
1134 err = update_cache_item(trans, root, inode, path, offset, 1185 /* Update the cache item to tell everyone this cache file is valid. */
1186 ret = update_cache_item(trans, root, inode, path, offset,
1135 entries, bitmaps); 1187 entries, bitmaps);
1136
1137out: 1188out:
1138 io_ctl_free(&io_ctl); 1189 io_ctl_free(&io_ctl);
1139 if (err) { 1190 if (ret) {
1140 invalidate_inode_pages2(inode->i_mapping); 1191 invalidate_inode_pages2(inode->i_mapping);
1141 BTRFS_I(inode)->generation = 0; 1192 BTRFS_I(inode)->generation = 0;
1142 } 1193 }
1143 btrfs_update_inode(trans, root, inode); 1194 btrfs_update_inode(trans, root, inode);
1144 return err; 1195 return ret;
1145 1196
1146out_nospc: 1197out_nospc:
1147
1148 cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); 1198 cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list);
1199
1200 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1201 up_write(&block_group->data_rwsem);
1202
1149 goto out; 1203 goto out;
1150} 1204}
1151 1205
@@ -1165,6 +1219,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1165 spin_unlock(&block_group->lock); 1219 spin_unlock(&block_group->lock);
1166 return 0; 1220 return 0;
1167 } 1221 }
1222
1223 if (block_group->delalloc_bytes) {
1224 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1225 spin_unlock(&block_group->lock);
1226 return 0;
1227 }
1168 spin_unlock(&block_group->lock); 1228 spin_unlock(&block_group->lock);
1169 1229
1170 inode = lookup_free_space_inode(root, block_group, path); 1230 inode = lookup_free_space_inode(root, block_group, path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8925f66a1411..3668048e16f8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -693,7 +693,7 @@ retry:
693 ret = btrfs_reserve_extent(root, 693 ret = btrfs_reserve_extent(root,
694 async_extent->compressed_size, 694 async_extent->compressed_size,
695 async_extent->compressed_size, 695 async_extent->compressed_size,
696 0, alloc_hint, &ins, 1); 696 0, alloc_hint, &ins, 1, 1);
697 if (ret) { 697 if (ret) {
698 int i; 698 int i;
699 699
@@ -794,7 +794,7 @@ retry:
794out: 794out:
795 return ret; 795 return ret;
796out_free_reserve: 796out_free_reserve:
797 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 797 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
798out_free: 798out_free:
799 extent_clear_unlock_delalloc(inode, async_extent->start, 799 extent_clear_unlock_delalloc(inode, async_extent->start,
800 async_extent->start + 800 async_extent->start +
@@ -917,7 +917,7 @@ static noinline int cow_file_range(struct inode *inode,
917 cur_alloc_size = disk_num_bytes; 917 cur_alloc_size = disk_num_bytes;
918 ret = btrfs_reserve_extent(root, cur_alloc_size, 918 ret = btrfs_reserve_extent(root, cur_alloc_size,
919 root->sectorsize, 0, alloc_hint, 919 root->sectorsize, 0, alloc_hint,
920 &ins, 1); 920 &ins, 1, 1);
921 if (ret < 0) 921 if (ret < 0)
922 goto out_unlock; 922 goto out_unlock;
923 923
@@ -995,7 +995,7 @@ out:
995 return ret; 995 return ret;
996 996
997out_reserve: 997out_reserve:
998 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 998 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
999out_unlock: 999out_unlock:
1000 extent_clear_unlock_delalloc(inode, start, end, locked_page, 1000 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1001 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | 1001 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
@@ -2599,6 +2599,21 @@ out_kfree:
2599 return NULL; 2599 return NULL;
2600} 2600}
2601 2601
2602static void btrfs_release_delalloc_bytes(struct btrfs_root *root,
2603 u64 start, u64 len)
2604{
2605 struct btrfs_block_group_cache *cache;
2606
2607 cache = btrfs_lookup_block_group(root->fs_info, start);
2608 ASSERT(cache);
2609
2610 spin_lock(&cache->lock);
2611 cache->delalloc_bytes -= len;
2612 spin_unlock(&cache->lock);
2613
2614 btrfs_put_block_group(cache);
2615}
2616
2602/* as ordered data IO finishes, this gets called so we can finish 2617/* as ordered data IO finishes, this gets called so we can finish
2603 * an ordered extent if the range of bytes in the file it covers are 2618 * an ordered extent if the range of bytes in the file it covers are
2604 * fully written. 2619 * fully written.
@@ -2698,6 +2713,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2698 logical_len, logical_len, 2713 logical_len, logical_len,
2699 compress_type, 0, 0, 2714 compress_type, 0, 0,
2700 BTRFS_FILE_EXTENT_REG); 2715 BTRFS_FILE_EXTENT_REG);
2716 if (!ret)
2717 btrfs_release_delalloc_bytes(root,
2718 ordered_extent->start,
2719 ordered_extent->disk_len);
2701 } 2720 }
2702 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 2721 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
2703 ordered_extent->file_offset, ordered_extent->len, 2722 ordered_extent->file_offset, ordered_extent->len,
@@ -2750,7 +2769,7 @@ out:
2750 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && 2769 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2751 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) 2770 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
2752 btrfs_free_reserved_extent(root, ordered_extent->start, 2771 btrfs_free_reserved_extent(root, ordered_extent->start,
2753 ordered_extent->disk_len); 2772 ordered_extent->disk_len, 1);
2754 } 2773 }
2755 2774
2756 2775
@@ -6535,21 +6554,21 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
6535 6554
6536 alloc_hint = get_extent_allocation_hint(inode, start, len); 6555 alloc_hint = get_extent_allocation_hint(inode, start, len);
6537 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, 6556 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
6538 alloc_hint, &ins, 1); 6557 alloc_hint, &ins, 1, 1);
6539 if (ret) 6558 if (ret)
6540 return ERR_PTR(ret); 6559 return ERR_PTR(ret);
6541 6560
6542 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 6561 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
6543 ins.offset, ins.offset, ins.offset, 0); 6562 ins.offset, ins.offset, ins.offset, 0);
6544 if (IS_ERR(em)) { 6563 if (IS_ERR(em)) {
6545 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 6564 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
6546 return em; 6565 return em;
6547 } 6566 }
6548 6567
6549 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, 6568 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
6550 ins.offset, ins.offset, 0); 6569 ins.offset, ins.offset, 0);
6551 if (ret) { 6570 if (ret) {
6552 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 6571 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
6553 free_extent_map(em); 6572 free_extent_map(em);
6554 return ERR_PTR(ret); 6573 return ERR_PTR(ret);
6555 } 6574 }
@@ -7437,7 +7456,7 @@ free_ordered:
7437 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && 7456 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
7438 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) 7457 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
7439 btrfs_free_reserved_extent(root, ordered->start, 7458 btrfs_free_reserved_extent(root, ordered->start,
7440 ordered->disk_len); 7459 ordered->disk_len, 1);
7441 btrfs_put_ordered_extent(ordered); 7460 btrfs_put_ordered_extent(ordered);
7442 btrfs_put_ordered_extent(ordered); 7461 btrfs_put_ordered_extent(ordered);
7443 } 7462 }
@@ -8808,7 +8827,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8808 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); 8827 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
8809 cur_bytes = max(cur_bytes, min_size); 8828 cur_bytes = max(cur_bytes, min_size);
8810 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, 8829 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
8811 *alloc_hint, &ins, 1); 8830 *alloc_hint, &ins, 1, 0);
8812 if (ret) { 8831 if (ret) {
8813 if (own_trans) 8832 if (own_trans)
8814 btrfs_end_transaction(trans, root); 8833 btrfs_end_transaction(trans, root);
@@ -8822,7 +8841,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8822 BTRFS_FILE_EXTENT_PREALLOC); 8841 BTRFS_FILE_EXTENT_PREALLOC);
8823 if (ret) { 8842 if (ret) {
8824 btrfs_free_reserved_extent(root, ins.objectid, 8843 btrfs_free_reserved_extent(root, ins.objectid,
8825 ins.offset); 8844 ins.offset, 0);
8826 btrfs_abort_transaction(trans, root, ret); 8845 btrfs_abort_transaction(trans, root, ret);
8827 if (own_trans) 8846 if (own_trans)
8828 btrfs_end_transaction(trans, root); 8847 btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0d321c23069a..47aceb494d1d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -136,19 +136,22 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
136void btrfs_update_iflags(struct inode *inode) 136void btrfs_update_iflags(struct inode *inode)
137{ 137{
138 struct btrfs_inode *ip = BTRFS_I(inode); 138 struct btrfs_inode *ip = BTRFS_I(inode);
139 139 unsigned int new_fl = 0;
140 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
141 140
142 if (ip->flags & BTRFS_INODE_SYNC) 141 if (ip->flags & BTRFS_INODE_SYNC)
143 inode->i_flags |= S_SYNC; 142 new_fl |= S_SYNC;
144 if (ip->flags & BTRFS_INODE_IMMUTABLE) 143 if (ip->flags & BTRFS_INODE_IMMUTABLE)
145 inode->i_flags |= S_IMMUTABLE; 144 new_fl |= S_IMMUTABLE;
146 if (ip->flags & BTRFS_INODE_APPEND) 145 if (ip->flags & BTRFS_INODE_APPEND)
147 inode->i_flags |= S_APPEND; 146 new_fl |= S_APPEND;
148 if (ip->flags & BTRFS_INODE_NOATIME) 147 if (ip->flags & BTRFS_INODE_NOATIME)
149 inode->i_flags |= S_NOATIME; 148 new_fl |= S_NOATIME;
150 if (ip->flags & BTRFS_INODE_DIRSYNC) 149 if (ip->flags & BTRFS_INODE_DIRSYNC)
151 inode->i_flags |= S_DIRSYNC; 150 new_fl |= S_DIRSYNC;
151
152 set_mask_bits(&inode->i_flags,
153 S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
154 new_fl);
152} 155}
153 156
154/* 157/*
@@ -3139,7 +3142,6 @@ out:
3139static void clone_update_extent_map(struct inode *inode, 3142static void clone_update_extent_map(struct inode *inode,
3140 const struct btrfs_trans_handle *trans, 3143 const struct btrfs_trans_handle *trans,
3141 const struct btrfs_path *path, 3144 const struct btrfs_path *path,
3142 struct btrfs_file_extent_item *fi,
3143 const u64 hole_offset, 3145 const u64 hole_offset,
3144 const u64 hole_len) 3146 const u64 hole_len)
3145{ 3147{
@@ -3154,7 +3156,11 @@ static void clone_update_extent_map(struct inode *inode,
3154 return; 3156 return;
3155 } 3157 }
3156 3158
3157 if (fi) { 3159 if (path) {
3160 struct btrfs_file_extent_item *fi;
3161
3162 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
3163 struct btrfs_file_extent_item);
3158 btrfs_extent_item_to_extent_map(inode, path, fi, false, em); 3164 btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
3159 em->generation = -1; 3165 em->generation = -1;
3160 if (btrfs_file_extent_type(path->nodes[0], fi) == 3166 if (btrfs_file_extent_type(path->nodes[0], fi) ==
@@ -3508,18 +3514,15 @@ process_slot:
3508 btrfs_item_ptr_offset(leaf, slot), 3514 btrfs_item_ptr_offset(leaf, slot),
3509 size); 3515 size);
3510 inode_add_bytes(inode, datal); 3516 inode_add_bytes(inode, datal);
3511 extent = btrfs_item_ptr(leaf, slot,
3512 struct btrfs_file_extent_item);
3513 } 3517 }
3514 3518
3515 /* If we have an implicit hole (NO_HOLES feature). */ 3519 /* If we have an implicit hole (NO_HOLES feature). */
3516 if (drop_start < new_key.offset) 3520 if (drop_start < new_key.offset)
3517 clone_update_extent_map(inode, trans, 3521 clone_update_extent_map(inode, trans,
3518 path, NULL, drop_start, 3522 NULL, drop_start,
3519 new_key.offset - drop_start); 3523 new_key.offset - drop_start);
3520 3524
3521 clone_update_extent_map(inode, trans, path, 3525 clone_update_extent_map(inode, trans, path, 0, 0);
3522 extent, 0, 0);
3523 3526
3524 btrfs_mark_buffer_dirty(leaf); 3527 btrfs_mark_buffer_dirty(leaf);
3525 btrfs_release_path(path); 3528 btrfs_release_path(path);
@@ -3562,12 +3565,10 @@ process_slot:
3562 btrfs_end_transaction(trans, root); 3565 btrfs_end_transaction(trans, root);
3563 goto out; 3566 goto out;
3564 } 3567 }
3568 clone_update_extent_map(inode, trans, NULL, last_dest_end,
3569 destoff + len - last_dest_end);
3565 ret = clone_finish_inode_update(trans, inode, destoff + len, 3570 ret = clone_finish_inode_update(trans, inode, destoff + len,
3566 destoff, olen); 3571 destoff, olen);
3567 if (ret)
3568 goto out;
3569 clone_update_extent_map(inode, trans, path, NULL, last_dest_end,
3570 destoff + len - last_dest_end);
3571 } 3572 }
3572 3573
3573out: 3574out:
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 01277b8f2373..5665d2149249 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -33,14 +33,14 @@ static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
33 */ 33 */
34void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) 34void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
35{ 35{
36 if (eb->lock_nested) { 36 /*
37 read_lock(&eb->lock); 37 * no lock is required. The lock owner may change if
38 if (eb->lock_nested && current->pid == eb->lock_owner) { 38 * we have a read lock, but it won't change to or away
39 read_unlock(&eb->lock); 39 * from us. If we have the write lock, we are the owner
40 return; 40 * and it'll never change.
41 } 41 */
42 read_unlock(&eb->lock); 42 if (eb->lock_nested && current->pid == eb->lock_owner)
43 } 43 return;
44 if (rw == BTRFS_WRITE_LOCK) { 44 if (rw == BTRFS_WRITE_LOCK) {
45 if (atomic_read(&eb->blocking_writers) == 0) { 45 if (atomic_read(&eb->blocking_writers) == 0) {
46 WARN_ON(atomic_read(&eb->spinning_writers) != 1); 46 WARN_ON(atomic_read(&eb->spinning_writers) != 1);
@@ -65,14 +65,15 @@ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
65 */ 65 */
66void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) 66void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
67{ 67{
68 if (eb->lock_nested) { 68 /*
69 read_lock(&eb->lock); 69 * no lock is required. The lock owner may change if
70 if (eb->lock_nested && current->pid == eb->lock_owner) { 70 * we have a read lock, but it won't change to or away
71 read_unlock(&eb->lock); 71 * from us. If we have the write lock, we are the owner
72 return; 72 * and it'll never change.
73 } 73 */
74 read_unlock(&eb->lock); 74 if (eb->lock_nested && current->pid == eb->lock_owner)
75 } 75 return;
76
76 if (rw == BTRFS_WRITE_LOCK_BLOCKING) { 77 if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
77 BUG_ON(atomic_read(&eb->blocking_writers) != 1); 78 BUG_ON(atomic_read(&eb->blocking_writers) != 1);
78 write_lock(&eb->lock); 79 write_lock(&eb->lock);
@@ -99,6 +100,9 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
99void btrfs_tree_read_lock(struct extent_buffer *eb) 100void btrfs_tree_read_lock(struct extent_buffer *eb)
100{ 101{
101again: 102again:
103 BUG_ON(!atomic_read(&eb->blocking_writers) &&
104 current->pid == eb->lock_owner);
105
102 read_lock(&eb->lock); 106 read_lock(&eb->lock);
103 if (atomic_read(&eb->blocking_writers) && 107 if (atomic_read(&eb->blocking_writers) &&
104 current->pid == eb->lock_owner) { 108 current->pid == eb->lock_owner) {
@@ -132,7 +136,9 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
132 if (atomic_read(&eb->blocking_writers)) 136 if (atomic_read(&eb->blocking_writers))
133 return 0; 137 return 0;
134 138
135 read_lock(&eb->lock); 139 if (!read_trylock(&eb->lock))
140 return 0;
141
136 if (atomic_read(&eb->blocking_writers)) { 142 if (atomic_read(&eb->blocking_writers)) {
137 read_unlock(&eb->lock); 143 read_unlock(&eb->lock);
138 return 0; 144 return 0;
@@ -151,7 +157,10 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
151 if (atomic_read(&eb->blocking_writers) || 157 if (atomic_read(&eb->blocking_writers) ||
152 atomic_read(&eb->blocking_readers)) 158 atomic_read(&eb->blocking_readers))
153 return 0; 159 return 0;
154 write_lock(&eb->lock); 160
161 if (!write_trylock(&eb->lock))
162 return 0;
163
155 if (atomic_read(&eb->blocking_writers) || 164 if (atomic_read(&eb->blocking_writers) ||
156 atomic_read(&eb->blocking_readers)) { 165 atomic_read(&eb->blocking_readers)) {
157 write_unlock(&eb->lock); 166 write_unlock(&eb->lock);
@@ -168,14 +177,15 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
168 */ 177 */
169void btrfs_tree_read_unlock(struct extent_buffer *eb) 178void btrfs_tree_read_unlock(struct extent_buffer *eb)
170{ 179{
171 if (eb->lock_nested) { 180 /*
172 read_lock(&eb->lock); 181 * if we're nested, we have the write lock. No new locking
173 if (eb->lock_nested && current->pid == eb->lock_owner) { 182 * is needed as long as we are the lock owner.
174 eb->lock_nested = 0; 183 * The write unlock will do a barrier for us, and the lock_nested
175 read_unlock(&eb->lock); 184 * field only matters to the lock owner.
176 return; 185 */
177 } 186 if (eb->lock_nested && current->pid == eb->lock_owner) {
178 read_unlock(&eb->lock); 187 eb->lock_nested = 0;
188 return;
179 } 189 }
180 btrfs_assert_tree_read_locked(eb); 190 btrfs_assert_tree_read_locked(eb);
181 WARN_ON(atomic_read(&eb->spinning_readers) == 0); 191 WARN_ON(atomic_read(&eb->spinning_readers) == 0);
@@ -189,14 +199,15 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
189 */ 199 */
190void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) 200void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
191{ 201{
192 if (eb->lock_nested) { 202 /*
193 read_lock(&eb->lock); 203 * if we're nested, we have the write lock. No new locking
194 if (eb->lock_nested && current->pid == eb->lock_owner) { 204 * is needed as long as we are the lock owner.
195 eb->lock_nested = 0; 205 * The write unlock will do a barrier for us, and the lock_nested
196 read_unlock(&eb->lock); 206 * field only matters to the lock owner.
197 return; 207 */
198 } 208 if (eb->lock_nested && current->pid == eb->lock_owner) {
199 read_unlock(&eb->lock); 209 eb->lock_nested = 0;
210 return;
200 } 211 }
201 btrfs_assert_tree_read_locked(eb); 212 btrfs_assert_tree_read_locked(eb);
202 WARN_ON(atomic_read(&eb->blocking_readers) == 0); 213 WARN_ON(atomic_read(&eb->blocking_readers) == 0);
@@ -244,6 +255,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
244 BUG_ON(blockers > 1); 255 BUG_ON(blockers > 1);
245 256
246 btrfs_assert_tree_locked(eb); 257 btrfs_assert_tree_locked(eb);
258 eb->lock_owner = 0;
247 atomic_dec(&eb->write_locks); 259 atomic_dec(&eb->write_locks);
248 260
249 if (blockers) { 261 if (blockers) {
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 6efd70d3b64f..9626b4ad3b9a 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -54,7 +54,7 @@ static void print_extent_data_ref(struct extent_buffer *eb,
54 btrfs_extent_data_ref_count(eb, ref)); 54 btrfs_extent_data_ref_count(eb, ref));
55} 55}
56 56
57static void print_extent_item(struct extent_buffer *eb, int slot) 57static void print_extent_item(struct extent_buffer *eb, int slot, int type)
58{ 58{
59 struct btrfs_extent_item *ei; 59 struct btrfs_extent_item *ei;
60 struct btrfs_extent_inline_ref *iref; 60 struct btrfs_extent_inline_ref *iref;
@@ -63,7 +63,6 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
63 struct btrfs_disk_key key; 63 struct btrfs_disk_key key;
64 unsigned long end; 64 unsigned long end;
65 unsigned long ptr; 65 unsigned long ptr;
66 int type;
67 u32 item_size = btrfs_item_size_nr(eb, slot); 66 u32 item_size = btrfs_item_size_nr(eb, slot);
68 u64 flags; 67 u64 flags;
69 u64 offset; 68 u64 offset;
@@ -88,7 +87,8 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
88 btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei), 87 btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei),
89 flags); 88 flags);
90 89
91 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 90 if ((type == BTRFS_EXTENT_ITEM_KEY) &&
91 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
92 struct btrfs_tree_block_info *info; 92 struct btrfs_tree_block_info *info;
93 info = (struct btrfs_tree_block_info *)(ei + 1); 93 info = (struct btrfs_tree_block_info *)(ei + 1);
94 btrfs_tree_block_key(eb, info, &key); 94 btrfs_tree_block_key(eb, info, &key);
@@ -223,7 +223,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
223 btrfs_disk_root_refs(l, ri)); 223 btrfs_disk_root_refs(l, ri));
224 break; 224 break;
225 case BTRFS_EXTENT_ITEM_KEY: 225 case BTRFS_EXTENT_ITEM_KEY:
226 print_extent_item(l, i); 226 case BTRFS_METADATA_ITEM_KEY:
227 print_extent_item(l, i, type);
227 break; 228 break;
228 case BTRFS_TREE_BLOCK_REF_KEY: 229 case BTRFS_TREE_BLOCK_REF_KEY:
229 printk(KERN_INFO "\t\ttree block backref\n"); 230 printk(KERN_INFO "\t\ttree block backref\n");
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 4055291a523e..4a88f073fdd7 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1956,9 +1956,10 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
1956 * pages are going to be uptodate. 1956 * pages are going to be uptodate.
1957 */ 1957 */
1958 for (stripe = 0; stripe < bbio->num_stripes; stripe++) { 1958 for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
1959 if (rbio->faila == stripe || 1959 if (rbio->faila == stripe || rbio->failb == stripe) {
1960 rbio->failb == stripe) 1960 atomic_inc(&rbio->bbio->error);
1961 continue; 1961 continue;
1962 }
1962 1963
1963 for (pagenr = 0; pagenr < nr_pages; pagenr++) { 1964 for (pagenr = 0; pagenr < nr_pages; pagenr++) {
1964 struct page *p; 1965 struct page *p;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ac80188eec88..b6d198f5181e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2725,11 +2725,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2725 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 2725 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2726 length = btrfs_dev_extent_length(l, dev_extent); 2726 length = btrfs_dev_extent_length(l, dev_extent);
2727 2727
2728 if (found_key.offset + length <= start) { 2728 if (found_key.offset + length <= start)
2729 key.offset = found_key.offset + length; 2729 goto skip;
2730 btrfs_release_path(path);
2731 continue;
2732 }
2733 2730
2734 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); 2731 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2735 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 2732 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -2740,10 +2737,12 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2740 * the chunk from going away while we scrub it 2737 * the chunk from going away while we scrub it
2741 */ 2738 */
2742 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 2739 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2743 if (!cache) { 2740
2744 ret = -ENOENT; 2741 /* some chunks are removed but not committed to disk yet,
2745 break; 2742 * continue scrubbing */
2746 } 2743 if (!cache)
2744 goto skip;
2745
2747 dev_replace->cursor_right = found_key.offset + length; 2746 dev_replace->cursor_right = found_key.offset + length;
2748 dev_replace->cursor_left = found_key.offset; 2747 dev_replace->cursor_left = found_key.offset;
2749 dev_replace->item_needs_writeback = 1; 2748 dev_replace->item_needs_writeback = 1;
@@ -2802,7 +2801,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2802 2801
2803 dev_replace->cursor_left = dev_replace->cursor_right; 2802 dev_replace->cursor_left = dev_replace->cursor_right;
2804 dev_replace->item_needs_writeback = 1; 2803 dev_replace->item_needs_writeback = 1;
2805 2804skip:
2806 key.offset = found_key.offset + length; 2805 key.offset = found_key.offset + length;
2807 btrfs_release_path(path); 2806 btrfs_release_path(path);
2808 } 2807 }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4662d92a4b73..8e16bca69c56 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -522,9 +522,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
522 case Opt_ssd_spread: 522 case Opt_ssd_spread:
523 btrfs_set_and_info(root, SSD_SPREAD, 523 btrfs_set_and_info(root, SSD_SPREAD,
524 "use spread ssd allocation scheme"); 524 "use spread ssd allocation scheme");
525 btrfs_set_opt(info->mount_opt, SSD);
525 break; 526 break;
526 case Opt_nossd: 527 case Opt_nossd:
527 btrfs_clear_and_info(root, NOSSD, 528 btrfs_set_and_info(root, NOSSD,
528 "not using ssd allocation scheme"); 529 "not using ssd allocation scheme");
529 btrfs_clear_opt(info->mount_opt, SSD); 530 btrfs_clear_opt(info->mount_opt, SSD);
530 break; 531 break;
@@ -1467,7 +1468,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1467 goto restore; 1468 goto restore;
1468 1469
1469 /* recover relocation */ 1470 /* recover relocation */
1471 mutex_lock(&fs_info->cleaner_mutex);
1470 ret = btrfs_recover_relocation(root); 1472 ret = btrfs_recover_relocation(root);
1473 mutex_unlock(&fs_info->cleaner_mutex);
1471 if (ret) 1474 if (ret)
1472 goto restore; 1475 goto restore;
1473 1476
@@ -1808,6 +1811,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1808 list_for_each_entry(dev, head, dev_list) { 1811 list_for_each_entry(dev, head, dev_list) {
1809 if (dev->missing) 1812 if (dev->missing)
1810 continue; 1813 continue;
1814 if (!dev->name)
1815 continue;
1811 if (!first_dev || dev->devid < first_dev->devid) 1816 if (!first_dev || dev->devid < first_dev->devid)
1812 first_dev = dev; 1817 first_dev = dev;
1813 } 1818 }
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index df39458f1487..78699364f537 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -605,14 +605,37 @@ static void init_feature_attrs(void)
605 } 605 }
606} 606}
607 607
608static int add_device_membership(struct btrfs_fs_info *fs_info) 608int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
609 struct btrfs_device *one_device)
610{
611 struct hd_struct *disk;
612 struct kobject *disk_kobj;
613
614 if (!fs_info->device_dir_kobj)
615 return -EINVAL;
616
617 if (one_device) {
618 disk = one_device->bdev->bd_part;
619 disk_kobj = &part_to_dev(disk)->kobj;
620
621 sysfs_remove_link(fs_info->device_dir_kobj,
622 disk_kobj->name);
623 }
624
625 return 0;
626}
627
628int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
629 struct btrfs_device *one_device)
609{ 630{
610 int error = 0; 631 int error = 0;
611 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 632 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
612 struct btrfs_device *dev; 633 struct btrfs_device *dev;
613 634
614 fs_info->device_dir_kobj = kobject_create_and_add("devices", 635 if (!fs_info->device_dir_kobj)
636 fs_info->device_dir_kobj = kobject_create_and_add("devices",
615 &fs_info->super_kobj); 637 &fs_info->super_kobj);
638
616 if (!fs_info->device_dir_kobj) 639 if (!fs_info->device_dir_kobj)
617 return -ENOMEM; 640 return -ENOMEM;
618 641
@@ -623,6 +646,9 @@ static int add_device_membership(struct btrfs_fs_info *fs_info)
623 if (!dev->bdev) 646 if (!dev->bdev)
624 continue; 647 continue;
625 648
649 if (one_device && one_device != dev)
650 continue;
651
626 disk = dev->bdev->bd_part; 652 disk = dev->bdev->bd_part;
627 disk_kobj = &part_to_dev(disk)->kobj; 653 disk_kobj = &part_to_dev(disk)->kobj;
628 654
@@ -666,7 +692,7 @@ int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
666 if (error) 692 if (error)
667 goto failure; 693 goto failure;
668 694
669 error = add_device_membership(fs_info); 695 error = btrfs_kobj_add_device(fs_info, NULL);
670 if (error) 696 if (error)
671 goto failure; 697 goto failure;
672 698
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 9ab576318a84..ac46df37504c 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -66,4 +66,8 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
66extern const char * const btrfs_feature_set_names[3]; 66extern const char * const btrfs_feature_set_names[3];
67extern struct kobj_type space_info_ktype; 67extern struct kobj_type space_info_ktype;
68extern struct kobj_type btrfs_raid_ktype; 68extern struct kobj_type btrfs_raid_ktype;
69int btrfs_kobj_add_device(struct btrfs_fs_info *fs_info,
70 struct btrfs_device *one_device);
71int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
72 struct btrfs_device *one_device);
69#endif /* _BTRFS_SYSFS_H_ */ 73#endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 511839c04f11..5f379affdf23 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -386,11 +386,13 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
386 bool reloc_reserved = false; 386 bool reloc_reserved = false;
387 int ret; 387 int ret;
388 388
389 /* Send isn't supposed to start transactions. */
390 ASSERT(current->journal_info != (void *)BTRFS_SEND_TRANS_STUB);
391
389 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) 392 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
390 return ERR_PTR(-EROFS); 393 return ERR_PTR(-EROFS);
391 394
392 if (current->journal_info && 395 if (current->journal_info) {
393 current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) {
394 WARN_ON(type & TRANS_EXTWRITERS); 396 WARN_ON(type & TRANS_EXTWRITERS);
395 h = current->journal_info; 397 h = current->journal_info;
396 h->use_count++; 398 h->use_count++;
@@ -491,6 +493,7 @@ again:
491 smp_mb(); 493 smp_mb();
492 if (cur_trans->state >= TRANS_STATE_BLOCKED && 494 if (cur_trans->state >= TRANS_STATE_BLOCKED &&
493 may_wait_transaction(root, type)) { 495 may_wait_transaction(root, type)) {
496 current->journal_info = h;
494 btrfs_commit_transaction(h, root); 497 btrfs_commit_transaction(h, root);
495 goto again; 498 goto again;
496 } 499 }
@@ -1615,11 +1618,6 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1615 int ret; 1618 int ret;
1616 1619
1617 ret = btrfs_run_delayed_items(trans, root); 1620 ret = btrfs_run_delayed_items(trans, root);
1618 /*
1619 * running the delayed items may have added new refs. account
1620 * them now so that they hinder processing of more delayed refs
1621 * as little as possible.
1622 */
1623 if (ret) 1621 if (ret)
1624 return ret; 1622 return ret;
1625 1623
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ffeed6d6326f..6104676857f5 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -40,6 +40,7 @@
40#include "rcu-string.h" 40#include "rcu-string.h"
41#include "math.h" 41#include "math.h"
42#include "dev-replace.h" 42#include "dev-replace.h"
43#include "sysfs.h"
43 44
44static int init_first_rw_device(struct btrfs_trans_handle *trans, 45static int init_first_rw_device(struct btrfs_trans_handle *trans,
45 struct btrfs_root *root, 46 struct btrfs_root *root,
@@ -554,12 +555,14 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
554 * This is ok to do without rcu read locked because we hold the 555 * This is ok to do without rcu read locked because we hold the
555 * uuid mutex so nothing we touch in here is going to disappear. 556 * uuid mutex so nothing we touch in here is going to disappear.
556 */ 557 */
557 name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); 558 if (orig_dev->name) {
558 if (!name) { 559 name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
559 kfree(device); 560 if (!name) {
560 goto error; 561 kfree(device);
562 goto error;
563 }
564 rcu_assign_pointer(device->name, name);
561 } 565 }
562 rcu_assign_pointer(device->name, name);
563 566
564 list_add(&device->dev_list, &fs_devices->devices); 567 list_add(&device->dev_list, &fs_devices->devices);
565 device->fs_devices = fs_devices; 568 device->fs_devices = fs_devices;
@@ -1680,6 +1683,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1680 if (device->bdev) 1683 if (device->bdev)
1681 device->fs_devices->open_devices--; 1684 device->fs_devices->open_devices--;
1682 1685
1686 /* remove sysfs entry */
1687 btrfs_kobj_rm_device(root->fs_info, device);
1688
1683 call_rcu(&device->rcu, free_device); 1689 call_rcu(&device->rcu, free_device);
1684 1690
1685 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; 1691 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
@@ -2143,9 +2149,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
2143 total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); 2149 total_bytes = btrfs_super_num_devices(root->fs_info->super_copy);
2144 btrfs_set_super_num_devices(root->fs_info->super_copy, 2150 btrfs_set_super_num_devices(root->fs_info->super_copy,
2145 total_bytes + 1); 2151 total_bytes + 1);
2152
2153 /* add sysfs device entry */
2154 btrfs_kobj_add_device(root->fs_info, device);
2155
2146 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 2156 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2147 2157
2148 if (seeding_dev) { 2158 if (seeding_dev) {
2159 char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
2149 ret = init_first_rw_device(trans, root, device); 2160 ret = init_first_rw_device(trans, root, device);
2150 if (ret) { 2161 if (ret) {
2151 btrfs_abort_transaction(trans, root, ret); 2162 btrfs_abort_transaction(trans, root, ret);
@@ -2156,6 +2167,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
2156 btrfs_abort_transaction(trans, root, ret); 2167 btrfs_abort_transaction(trans, root, ret);
2157 goto error_trans; 2168 goto error_trans;
2158 } 2169 }
2170
2171 /* Sprouting would change fsid of the mounted root,
2172 * so rename the fsid on the sysfs
2173 */
2174 snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
2175 root->fs_info->fsid);
2176 if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
2177 goto error_trans;
2159 } else { 2178 } else {
2160 ret = btrfs_add_device(trans, root, device); 2179 ret = btrfs_add_device(trans, root, device);
2161 if (ret) { 2180 if (ret) {
@@ -2205,6 +2224,7 @@ error_trans:
2205 unlock_chunks(root); 2224 unlock_chunks(root);
2206 btrfs_end_transaction(trans, root); 2225 btrfs_end_transaction(trans, root);
2207 rcu_string_free(device->name); 2226 rcu_string_free(device->name);
2227 btrfs_kobj_rm_device(root->fs_info, device);
2208 kfree(device); 2228 kfree(device);
2209error: 2229error:
2210 blkdev_put(bdev, FMODE_EXCL); 2230 blkdev_put(bdev, FMODE_EXCL);
@@ -2543,9 +2563,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
2543 remove_extent_mapping(em_tree, em); 2563 remove_extent_mapping(em_tree, em);
2544 write_unlock(&em_tree->lock); 2564 write_unlock(&em_tree->lock);
2545 2565
2546 kfree(map);
2547 em->bdev = NULL;
2548
2549 /* once for the tree */ 2566 /* once for the tree */
2550 free_extent_map(em); 2567 free_extent_map(em);
2551 /* once for us */ 2568 /* once for us */
@@ -4301,9 +4318,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4301 4318
4302 em = alloc_extent_map(); 4319 em = alloc_extent_map();
4303 if (!em) { 4320 if (!em) {
4321 kfree(map);
4304 ret = -ENOMEM; 4322 ret = -ENOMEM;
4305 goto error; 4323 goto error;
4306 } 4324 }
4325 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
4307 em->bdev = (struct block_device *)map; 4326 em->bdev = (struct block_device *)map;
4308 em->start = start; 4327 em->start = start;
4309 em->len = num_bytes; 4328 em->len = num_bytes;
@@ -4346,7 +4365,6 @@ error_del_extent:
4346 /* One for the tree reference */ 4365 /* One for the tree reference */
4347 free_extent_map(em); 4366 free_extent_map(em);
4348error: 4367error:
4349 kfree(map);
4350 kfree(devices_info); 4368 kfree(devices_info);
4351 return ret; 4369 return ret;
4352} 4370}
@@ -4558,7 +4576,6 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
4558 write_unlock(&tree->map_tree.lock); 4576 write_unlock(&tree->map_tree.lock);
4559 if (!em) 4577 if (!em)
4560 break; 4578 break;
4561 kfree(em->bdev);
4562 /* once for us */ 4579 /* once for us */
4563 free_extent_map(em); 4580 free_extent_map(em);
4564 /* once for the tree */ 4581 /* once for the tree */
@@ -5362,6 +5379,15 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5362 return 0; 5379 return 0;
5363} 5380}
5364 5381
5382static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err)
5383{
5384 if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED))
5385 bio_endio_nodec(bio, err);
5386 else
5387 bio_endio(bio, err);
5388 kfree(bbio);
5389}
5390
5365static void btrfs_end_bio(struct bio *bio, int err) 5391static void btrfs_end_bio(struct bio *bio, int err)
5366{ 5392{
5367 struct btrfs_bio *bbio = bio->bi_private; 5393 struct btrfs_bio *bbio = bio->bi_private;
@@ -5402,12 +5428,6 @@ static void btrfs_end_bio(struct bio *bio, int err)
5402 bio = bbio->orig_bio; 5428 bio = bbio->orig_bio;
5403 } 5429 }
5404 5430
5405 /*
5406 * We have original bio now. So increment bi_remaining to
5407 * account for it in endio
5408 */
5409 atomic_inc(&bio->bi_remaining);
5410
5411 bio->bi_private = bbio->private; 5431 bio->bi_private = bbio->private;
5412 bio->bi_end_io = bbio->end_io; 5432 bio->bi_end_io = bbio->end_io;
5413 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5433 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
@@ -5424,9 +5444,8 @@ static void btrfs_end_bio(struct bio *bio, int err)
5424 set_bit(BIO_UPTODATE, &bio->bi_flags); 5444 set_bit(BIO_UPTODATE, &bio->bi_flags);
5425 err = 0; 5445 err = 0;
5426 } 5446 }
5427 kfree(bbio);
5428 5447
5429 bio_endio(bio, err); 5448 btrfs_end_bbio(bbio, bio, err);
5430 } else if (!is_orig_bio) { 5449 } else if (!is_orig_bio) {
5431 bio_put(bio); 5450 bio_put(bio);
5432 } 5451 }
@@ -5589,12 +5608,15 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
5589{ 5608{
5590 atomic_inc(&bbio->error); 5609 atomic_inc(&bbio->error);
5591 if (atomic_dec_and_test(&bbio->stripes_pending)) { 5610 if (atomic_dec_and_test(&bbio->stripes_pending)) {
5611 /* Shoud be the original bio. */
5612 WARN_ON(bio != bbio->orig_bio);
5613
5592 bio->bi_private = bbio->private; 5614 bio->bi_private = bbio->private;
5593 bio->bi_end_io = bbio->end_io; 5615 bio->bi_end_io = bbio->end_io;
5594 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5616 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
5595 bio->bi_iter.bi_sector = logical >> 9; 5617 bio->bi_iter.bi_sector = logical >> 9;
5596 kfree(bbio); 5618
5597 bio_endio(bio, -EIO); 5619 btrfs_end_bbio(bbio, bio, -EIO);
5598 } 5620 }
5599} 5621}
5600 5622
@@ -5681,6 +5703,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5681 BUG_ON(!bio); /* -ENOMEM */ 5703 BUG_ON(!bio); /* -ENOMEM */
5682 } else { 5704 } else {
5683 bio = first_bio; 5705 bio = first_bio;
5706 bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED;
5684 } 5707 }
5685 5708
5686 submit_stripe_bio(root, bbio, bio, 5709 submit_stripe_bio(root, bbio, bio,
@@ -5822,6 +5845,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
5822 return -ENOMEM; 5845 return -ENOMEM;
5823 } 5846 }
5824 5847
5848 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
5825 em->bdev = (struct block_device *)map; 5849 em->bdev = (struct block_device *)map;
5826 em->start = logical; 5850 em->start = logical;
5827 em->len = length; 5851 em->len = length;
@@ -5846,7 +5870,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
5846 map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, 5870 map->stripes[i].dev = btrfs_find_device(root->fs_info, devid,
5847 uuid, NULL); 5871 uuid, NULL);
5848 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { 5872 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
5849 kfree(map);
5850 free_extent_map(em); 5873 free_extent_map(em);
5851 return -EIO; 5874 return -EIO;
5852 } 5875 }
@@ -5854,7 +5877,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
5854 map->stripes[i].dev = 5877 map->stripes[i].dev =
5855 add_missing_dev(root, devid, uuid); 5878 add_missing_dev(root, devid, uuid);
5856 if (!map->stripes[i].dev) { 5879 if (!map->stripes[i].dev) {
5857 kfree(map);
5858 free_extent_map(em); 5880 free_extent_map(em);
5859 return -EIO; 5881 return -EIO;
5860 } 5882 }
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 1a15bbeb65e2..2aaa00c47816 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -190,11 +190,14 @@ struct btrfs_bio_stripe {
190struct btrfs_bio; 190struct btrfs_bio;
191typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); 191typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
192 192
193#define BTRFS_BIO_ORIG_BIO_SUBMITTED 0x1
194
193struct btrfs_bio { 195struct btrfs_bio {
194 atomic_t stripes_pending; 196 atomic_t stripes_pending;
195 struct btrfs_fs_info *fs_info; 197 struct btrfs_fs_info *fs_info;
196 bio_end_io_t *end_io; 198 bio_end_io_t *end_io;
197 struct bio *orig_bio; 199 struct bio *orig_bio;
200 unsigned long flags;
198 void *private; 201 void *private;
199 atomic_t error; 202 atomic_t error;
200 int max_errors; 203 int max_errors;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 4f196314c0c1..b67d8fc81277 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -136,7 +136,7 @@ static int zlib_compress_pages(struct list_head *ws,
136 if (workspace->def_strm.total_in > 8192 && 136 if (workspace->def_strm.total_in > 8192 &&
137 workspace->def_strm.total_in < 137 workspace->def_strm.total_in <
138 workspace->def_strm.total_out) { 138 workspace->def_strm.total_out) {
139 ret = -EIO; 139 ret = -E2BIG;
140 goto out; 140 goto out;
141 } 141 }
142 /* we need another page for writing out. Test this 142 /* we need another page for writing out. Test this
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 0227b45ef00a..15e9505aa35f 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -290,7 +290,8 @@ int
290cifsConvertToUTF16(__le16 *target, const char *source, int srclen, 290cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
291 const struct nls_table *cp, int mapChars) 291 const struct nls_table *cp, int mapChars)
292{ 292{
293 int i, j, charlen; 293 int i, charlen;
294 int j = 0;
294 char src_char; 295 char src_char;
295 __le16 dst_char; 296 __le16 dst_char;
296 wchar_t tmp; 297 wchar_t tmp;
@@ -298,12 +299,11 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
298 if (!mapChars) 299 if (!mapChars)
299 return cifs_strtoUTF16(target, source, PATH_MAX, cp); 300 return cifs_strtoUTF16(target, source, PATH_MAX, cp);
300 301
301 for (i = 0, j = 0; i < srclen; j++) { 302 for (i = 0; i < srclen; j++) {
302 src_char = source[i]; 303 src_char = source[i];
303 charlen = 1; 304 charlen = 1;
304 switch (src_char) { 305 switch (src_char) {
305 case 0: 306 case 0:
306 put_unaligned(0, &target[j]);
307 goto ctoUTF16_out; 307 goto ctoUTF16_out;
308 case ':': 308 case ':':
309 dst_char = cpu_to_le16(UNI_COLON); 309 dst_char = cpu_to_le16(UNI_COLON);
@@ -350,6 +350,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
350 } 350 }
351 351
352ctoUTF16_out: 352ctoUTF16_out:
353 put_unaligned(0, &target[j]); /* Null terminate target unicode string */
353 return j; 354 return j;
354} 355}
355 356
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2c90d07c0b3a..888398067420 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -725,6 +725,19 @@ out_nls:
725 goto out; 725 goto out;
726} 726}
727 727
728static ssize_t
729cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
730{
731 ssize_t rc;
732 struct inode *inode = file_inode(iocb->ki_filp);
733
734 rc = cifs_revalidate_mapping(inode);
735 if (rc)
736 return rc;
737
738 return generic_file_read_iter(iocb, iter);
739}
740
728static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 741static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
729{ 742{
730 struct inode *inode = file_inode(iocb->ki_filp); 743 struct inode *inode = file_inode(iocb->ki_filp);
@@ -881,7 +894,7 @@ const struct inode_operations cifs_symlink_inode_ops = {
881const struct file_operations cifs_file_ops = { 894const struct file_operations cifs_file_ops = {
882 .read = new_sync_read, 895 .read = new_sync_read,
883 .write = new_sync_write, 896 .write = new_sync_write,
884 .read_iter = generic_file_read_iter, 897 .read_iter = cifs_loose_read_iter,
885 .write_iter = cifs_file_write_iter, 898 .write_iter = cifs_file_write_iter,
886 .open = cifs_open, 899 .open = cifs_open,
887 .release = cifs_close, 900 .release = cifs_close,
@@ -939,7 +952,7 @@ const struct file_operations cifs_file_direct_ops = {
939const struct file_operations cifs_file_nobrl_ops = { 952const struct file_operations cifs_file_nobrl_ops = {
940 .read = new_sync_read, 953 .read = new_sync_read,
941 .write = new_sync_write, 954 .write = new_sync_write,
942 .read_iter = generic_file_read_iter, 955 .read_iter = cifs_loose_read_iter,
943 .write_iter = cifs_file_write_iter, 956 .write_iter = cifs_file_write_iter,
944 .open = cifs_open, 957 .open = cifs_open,
945 .release = cifs_close, 958 .release = cifs_close,
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 264ece71bdb2..68559fd557fb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -374,7 +374,7 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
374 oparms.cifs_sb = cifs_sb; 374 oparms.cifs_sb = cifs_sb;
375 oparms.desired_access = GENERIC_WRITE; 375 oparms.desired_access = GENERIC_WRITE;
376 oparms.create_options = create_options; 376 oparms.create_options = create_options;
377 oparms.disposition = FILE_OPEN; 377 oparms.disposition = FILE_CREATE;
378 oparms.path = path; 378 oparms.path = path;
379 oparms.fid = &fid; 379 oparms.fid = &fid;
380 oparms.reconnect = false; 380 oparms.reconnect = false;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b73e0621ce9e..b10b48c2a7af 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -910,7 +910,7 @@ static const struct file_operations eventpoll_fops = {
910void eventpoll_release_file(struct file *file) 910void eventpoll_release_file(struct file *file)
911{ 911{
912 struct eventpoll *ep; 912 struct eventpoll *ep;
913 struct epitem *epi; 913 struct epitem *epi, *next;
914 914
915 /* 915 /*
916 * We don't want to get "file->f_lock" because it is not 916 * We don't want to get "file->f_lock" because it is not
@@ -926,7 +926,7 @@ void eventpoll_release_file(struct file *file)
926 * Besides, ep_remove() acquires the lock, so we can't hold it here. 926 * Besides, ep_remove() acquires the lock, so we can't hold it here.
927 */ 927 */
928 mutex_lock(&epmutex); 928 mutex_lock(&epmutex);
929 list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { 929 list_for_each_entry_safe(epi, next, &file->f_ep_links, fllink) {
930 ep = epi->ep; 930 ep = epi->ep;
931 mutex_lock_nested(&ep->mtx, 0); 931 mutex_lock_nested(&ep->mtx, 0);
932 ep_remove(ep, epi); 932 ep_remove(ep, epi);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0762d143e252..fca382037ddd 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -194,7 +194,16 @@ static void ext4_init_block_bitmap(struct super_block *sb,
194 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 194 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
195 ext4_error(sb, "Checksum bad for group %u", block_group); 195 ext4_error(sb, "Checksum bad for group %u", block_group);
196 grp = ext4_get_group_info(sb, block_group); 196 grp = ext4_get_group_info(sb, block_group);
197 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
198 percpu_counter_sub(&sbi->s_freeclusters_counter,
199 grp->bb_free);
197 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); 200 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
201 if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
202 int count;
203 count = ext4_free_inodes_count(sb, gdp);
204 percpu_counter_sub(&sbi->s_freeinodes_counter,
205 count);
206 }
198 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); 207 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
199 return; 208 return;
200 } 209 }
@@ -359,6 +368,7 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
359{ 368{
360 ext4_fsblk_t blk; 369 ext4_fsblk_t blk;
361 struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); 370 struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
371 struct ext4_sb_info *sbi = EXT4_SB(sb);
362 372
363 if (buffer_verified(bh)) 373 if (buffer_verified(bh))
364 return; 374 return;
@@ -369,6 +379,9 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
369 ext4_unlock_group(sb, block_group); 379 ext4_unlock_group(sb, block_group);
370 ext4_error(sb, "bg %u: block %llu: invalid block bitmap", 380 ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
371 block_group, blk); 381 block_group, blk);
382 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
383 percpu_counter_sub(&sbi->s_freeclusters_counter,
384 grp->bb_free);
372 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); 385 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
373 return; 386 return;
374 } 387 }
@@ -376,6 +389,9 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
376 desc, bh))) { 389 desc, bh))) {
377 ext4_unlock_group(sb, block_group); 390 ext4_unlock_group(sb, block_group);
378 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); 391 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
392 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
393 percpu_counter_sub(&sbi->s_freeclusters_counter,
394 grp->bb_free);
379 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); 395 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
380 return; 396 return;
381 } 397 }
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 3f5c188953a4..0b7e28e7eaa4 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -966,10 +966,10 @@ retry:
966 continue; 966 continue;
967 } 967 }
968 968
969 if (ei->i_es_lru_nr == 0 || ei == locked_ei) 969 if (ei->i_es_lru_nr == 0 || ei == locked_ei ||
970 !write_trylock(&ei->i_es_lock))
970 continue; 971 continue;
971 972
972 write_lock(&ei->i_es_lock);
973 shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); 973 shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
974 if (ei->i_es_lru_nr == 0) 974 if (ei->i_es_lru_nr == 0)
975 list_del_init(&ei->i_es_lru); 975 list_del_init(&ei->i_es_lru);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 0ee59a6644e2..5b87fc36aab8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -71,6 +71,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
71 struct ext4_group_desc *gdp) 71 struct ext4_group_desc *gdp)
72{ 72{
73 struct ext4_group_info *grp; 73 struct ext4_group_info *grp;
74 struct ext4_sb_info *sbi = EXT4_SB(sb);
74 J_ASSERT_BH(bh, buffer_locked(bh)); 75 J_ASSERT_BH(bh, buffer_locked(bh));
75 76
76 /* If checksum is bad mark all blocks and inodes use to prevent 77 /* If checksum is bad mark all blocks and inodes use to prevent
@@ -78,7 +79,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
78 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 79 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
79 ext4_error(sb, "Checksum bad for group %u", block_group); 80 ext4_error(sb, "Checksum bad for group %u", block_group);
80 grp = ext4_get_group_info(sb, block_group); 81 grp = ext4_get_group_info(sb, block_group);
82 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
83 percpu_counter_sub(&sbi->s_freeclusters_counter,
84 grp->bb_free);
81 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); 85 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
86 if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
87 int count;
88 count = ext4_free_inodes_count(sb, gdp);
89 percpu_counter_sub(&sbi->s_freeinodes_counter,
90 count);
91 }
82 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); 92 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
83 return 0; 93 return 0;
84 } 94 }
@@ -116,6 +126,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
116 struct buffer_head *bh = NULL; 126 struct buffer_head *bh = NULL;
117 ext4_fsblk_t bitmap_blk; 127 ext4_fsblk_t bitmap_blk;
118 struct ext4_group_info *grp; 128 struct ext4_group_info *grp;
129 struct ext4_sb_info *sbi = EXT4_SB(sb);
119 130
120 desc = ext4_get_group_desc(sb, block_group, NULL); 131 desc = ext4_get_group_desc(sb, block_group, NULL);
121 if (!desc) 132 if (!desc)
@@ -185,6 +196,12 @@ verify:
185 ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " 196 ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
186 "inode_bitmap = %llu", block_group, bitmap_blk); 197 "inode_bitmap = %llu", block_group, bitmap_blk);
187 grp = ext4_get_group_info(sb, block_group); 198 grp = ext4_get_group_info(sb, block_group);
199 if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
200 int count;
201 count = ext4_free_inodes_count(sb, desc);
202 percpu_counter_sub(&sbi->s_freeinodes_counter,
203 count);
204 }
188 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); 205 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
189 return NULL; 206 return NULL;
190 } 207 }
@@ -321,6 +338,12 @@ out:
321 fatal = err; 338 fatal = err;
322 } else { 339 } else {
323 ext4_error(sb, "bit already cleared for inode %lu", ino); 340 ext4_error(sb, "bit already cleared for inode %lu", ino);
341 if (gdp && !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
342 int count;
343 count = ext4_free_inodes_count(sb, gdp);
344 percpu_counter_sub(&sbi->s_freeinodes_counter,
345 count);
346 }
324 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); 347 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
325 } 348 }
326 349
@@ -851,6 +874,13 @@ got:
851 goto out; 874 goto out;
852 } 875 }
853 876
877 BUFFER_TRACE(group_desc_bh, "get_write_access");
878 err = ext4_journal_get_write_access(handle, group_desc_bh);
879 if (err) {
880 ext4_std_error(sb, err);
881 goto out;
882 }
883
854 /* We may have to initialize the block bitmap if it isn't already */ 884 /* We may have to initialize the block bitmap if it isn't already */
855 if (ext4_has_group_desc_csum(sb) && 885 if (ext4_has_group_desc_csum(sb) &&
856 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 886 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -887,13 +917,6 @@ got:
887 } 917 }
888 } 918 }
889 919
890 BUFFER_TRACE(group_desc_bh, "get_write_access");
891 err = ext4_journal_get_write_access(handle, group_desc_bh);
892 if (err) {
893 ext4_std_error(sb, err);
894 goto out;
895 }
896
897 /* Update the relevant bg descriptor fields */ 920 /* Update the relevant bg descriptor fields */
898 if (ext4_has_group_desc_csum(sb)) { 921 if (ext4_has_group_desc_csum(sb)) {
899 int free; 922 int free;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 8a57e9fcd1b9..fd69da194826 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -389,7 +389,13 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
389 return 0; 389 return 0;
390failed: 390failed:
391 for (; i >= 0; i--) { 391 for (; i >= 0; i--) {
392 if (i != indirect_blks && branch[i].bh) 392 /*
393 * We want to ext4_forget() only freshly allocated indirect
394 * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and
395 * buffer at branch[0].bh is indirect block / inode already
396 * existing before ext4_alloc_branch() was called.
397 */
398 if (i > 0 && i != indirect_blks && branch[i].bh)
393 ext4_forget(handle, 1, inode, branch[i].bh, 399 ext4_forget(handle, 1, inode, branch[i].bh,
394 branch[i].bh->b_blocknr); 400 branch[i].bh->b_blocknr);
395 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 401 ext4_free_blocks(handle, inode, NULL, new_blocks[i],
@@ -1310,16 +1316,24 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
1310 blk = *i_data; 1316 blk = *i_data;
1311 if (level > 0) { 1317 if (level > 0) {
1312 ext4_lblk_t first2; 1318 ext4_lblk_t first2;
1319 ext4_lblk_t count2;
1320
1313 bh = sb_bread(inode->i_sb, le32_to_cpu(blk)); 1321 bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
1314 if (!bh) { 1322 if (!bh) {
1315 EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk), 1323 EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
1316 "Read failure"); 1324 "Read failure");
1317 return -EIO; 1325 return -EIO;
1318 } 1326 }
1319 first2 = (first > offset) ? first - offset : 0; 1327 if (first > offset) {
1328 first2 = first - offset;
1329 count2 = count;
1330 } else {
1331 first2 = 0;
1332 count2 = count - (offset - first);
1333 }
1320 ret = free_hole_blocks(handle, inode, bh, 1334 ret = free_hole_blocks(handle, inode, bh,
1321 (__le32 *)bh->b_data, level - 1, 1335 (__le32 *)bh->b_data, level - 1,
1322 first2, count - offset, 1336 first2, count2,
1323 inode->i_sb->s_blocksize >> 2); 1337 inode->i_sb->s_blocksize >> 2);
1324 if (ret) { 1338 if (ret) {
1325 brelse(bh); 1339 brelse(bh);
@@ -1329,8 +1343,8 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
1329 if (level == 0 || 1343 if (level == 0 ||
1330 (bh && all_zeroes((__le32 *)bh->b_data, 1344 (bh && all_zeroes((__le32 *)bh->b_data,
1331 (__le32 *)bh->b_data + addr_per_block))) { 1345 (__le32 *)bh->b_data + addr_per_block))) {
1332 ext4_free_data(handle, inode, parent_bh, &blk, &blk+1); 1346 ext4_free_data(handle, inode, parent_bh,
1333 *i_data = 0; 1347 i_data, i_data + 1);
1334 } 1348 }
1335 brelse(bh); 1349 brelse(bh);
1336 bh = NULL; 1350 bh = NULL;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 59e31622cc6e..2dcb936be90e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -722,6 +722,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
722 void *buddy, void *bitmap, ext4_group_t group) 722 void *buddy, void *bitmap, ext4_group_t group)
723{ 723{
724 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 724 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
725 struct ext4_sb_info *sbi = EXT4_SB(sb);
725 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); 726 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
726 ext4_grpblk_t i = 0; 727 ext4_grpblk_t i = 0;
727 ext4_grpblk_t first; 728 ext4_grpblk_t first;
@@ -751,14 +752,17 @@ void ext4_mb_generate_buddy(struct super_block *sb,
751 752
752 if (free != grp->bb_free) { 753 if (free != grp->bb_free) {
753 ext4_grp_locked_error(sb, group, 0, 0, 754 ext4_grp_locked_error(sb, group, 0, 0,
754 "%u clusters in bitmap, %u in gd; " 755 "block bitmap and bg descriptor "
755 "block bitmap corrupt.", 756 "inconsistent: %u vs %u free clusters",
756 free, grp->bb_free); 757 free, grp->bb_free);
757 /* 758 /*
758 * If we intend to continue, we consider group descriptor 759 * If we intend to continue, we consider group descriptor
759 * corrupt and update bb_free using bitmap value 760 * corrupt and update bb_free using bitmap value
760 */ 761 */
761 grp->bb_free = free; 762 grp->bb_free = free;
763 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
764 percpu_counter_sub(&sbi->s_freeclusters_counter,
765 grp->bb_free);
762 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); 766 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
763 } 767 }
764 mb_set_largest_free_order(sb, grp); 768 mb_set_largest_free_order(sb, grp);
@@ -1431,6 +1435,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1431 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap); 1435 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1432 1436
1433 if (unlikely(block != -1)) { 1437 if (unlikely(block != -1)) {
1438 struct ext4_sb_info *sbi = EXT4_SB(sb);
1434 ext4_fsblk_t blocknr; 1439 ext4_fsblk_t blocknr;
1435 1440
1436 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 1441 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
@@ -1441,6 +1446,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1441 "freeing already freed block " 1446 "freeing already freed block "
1442 "(bit %u); block bitmap corrupt.", 1447 "(bit %u); block bitmap corrupt.",
1443 block); 1448 block);
1449 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))
1450 percpu_counter_sub(&sbi->s_freeclusters_counter,
1451 e4b->bd_info->bb_free);
1444 /* Mark the block group as corrupt. */ 1452 /* Mark the block group as corrupt. */
1445 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, 1453 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1446 &e4b->bd_info->bb_state); 1454 &e4b->bd_info->bb_state);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b9b9aabfb4d2..6df7bc611dbd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1525,8 +1525,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1525 arg = JBD2_DEFAULT_MAX_COMMIT_AGE; 1525 arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1526 sbi->s_commit_interval = HZ * arg; 1526 sbi->s_commit_interval = HZ * arg;
1527 } else if (token == Opt_max_batch_time) { 1527 } else if (token == Opt_max_batch_time) {
1528 if (arg == 0)
1529 arg = EXT4_DEF_MAX_BATCH_TIME;
1530 sbi->s_max_batch_time = arg; 1528 sbi->s_max_batch_time = arg;
1531 } else if (token == Opt_min_batch_time) { 1529 } else if (token == Opt_min_batch_time) {
1532 sbi->s_min_batch_time = arg; 1530 sbi->s_min_batch_time = arg;
@@ -2809,10 +2807,11 @@ static void print_daily_error_info(unsigned long arg)
2809 es = sbi->s_es; 2807 es = sbi->s_es;
2810 2808
2811 if (es->s_error_count) 2809 if (es->s_error_count)
2812 ext4_msg(sb, KERN_NOTICE, "error count: %u", 2810 /* fsck newer than v1.41.13 is needed to clean this condition. */
2811 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
2813 le32_to_cpu(es->s_error_count)); 2812 le32_to_cpu(es->s_error_count));
2814 if (es->s_first_error_time) { 2813 if (es->s_first_error_time) {
2815 printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d", 2814 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
2816 sb->s_id, le32_to_cpu(es->s_first_error_time), 2815 sb->s_id, le32_to_cpu(es->s_first_error_time),
2817 (int) sizeof(es->s_first_error_func), 2816 (int) sizeof(es->s_first_error_func),
2818 es->s_first_error_func, 2817 es->s_first_error_func,
@@ -2826,7 +2825,7 @@ static void print_daily_error_info(unsigned long arg)
2826 printk("\n"); 2825 printk("\n");
2827 } 2826 }
2828 if (es->s_last_error_time) { 2827 if (es->s_last_error_time) {
2829 printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d", 2828 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
2830 sb->s_id, le32_to_cpu(es->s_last_error_time), 2829 sb->s_id, le32_to_cpu(es->s_last_error_time),
2831 (int) sizeof(es->s_last_error_func), 2830 (int) sizeof(es->s_last_error_func),
2832 es->s_last_error_func, 2831 es->s_last_error_func,
@@ -3880,38 +3879,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3880 goto failed_mount2; 3879 goto failed_mount2;
3881 } 3880 }
3882 } 3881 }
3883
3884 /*
3885 * set up enough so that it can read an inode,
3886 * and create new inode for buddy allocator
3887 */
3888 sbi->s_gdb_count = db_count;
3889 if (!test_opt(sb, NOLOAD) &&
3890 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3891 sb->s_op = &ext4_sops;
3892 else
3893 sb->s_op = &ext4_nojournal_sops;
3894
3895 ext4_ext_init(sb);
3896 err = ext4_mb_init(sb);
3897 if (err) {
3898 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
3899 err);
3900 goto failed_mount2;
3901 }
3902
3903 if (!ext4_check_descriptors(sb, &first_not_zeroed)) { 3882 if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
3904 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3883 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3905 goto failed_mount2a; 3884 goto failed_mount2;
3906 } 3885 }
3907 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 3886 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
3908 if (!ext4_fill_flex_info(sb)) { 3887 if (!ext4_fill_flex_info(sb)) {
3909 ext4_msg(sb, KERN_ERR, 3888 ext4_msg(sb, KERN_ERR,
3910 "unable to initialize " 3889 "unable to initialize "
3911 "flex_bg meta info!"); 3890 "flex_bg meta info!");
3912 goto failed_mount2a; 3891 goto failed_mount2;
3913 } 3892 }
3914 3893
3894 sbi->s_gdb_count = db_count;
3915 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3895 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3916 spin_lock_init(&sbi->s_next_gen_lock); 3896 spin_lock_init(&sbi->s_next_gen_lock);
3917 3897
@@ -3946,6 +3926,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3946 sbi->s_stripe = ext4_get_stripe_size(sbi); 3926 sbi->s_stripe = ext4_get_stripe_size(sbi);
3947 sbi->s_extent_max_zeroout_kb = 32; 3927 sbi->s_extent_max_zeroout_kb = 32;
3948 3928
3929 /*
3930 * set up enough so that it can read an inode
3931 */
3932 if (!test_opt(sb, NOLOAD) &&
3933 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3934 sb->s_op = &ext4_sops;
3935 else
3936 sb->s_op = &ext4_nojournal_sops;
3949 sb->s_export_op = &ext4_export_ops; 3937 sb->s_export_op = &ext4_export_ops;
3950 sb->s_xattr = ext4_xattr_handlers; 3938 sb->s_xattr = ext4_xattr_handlers;
3951#ifdef CONFIG_QUOTA 3939#ifdef CONFIG_QUOTA
@@ -4135,13 +4123,21 @@ no_journal:
4135 if (err) { 4123 if (err) {
4136 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " 4124 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
4137 "reserved pool", ext4_calculate_resv_clusters(sb)); 4125 "reserved pool", ext4_calculate_resv_clusters(sb));
4138 goto failed_mount5; 4126 goto failed_mount4a;
4139 } 4127 }
4140 4128
4141 err = ext4_setup_system_zone(sb); 4129 err = ext4_setup_system_zone(sb);
4142 if (err) { 4130 if (err) {
4143 ext4_msg(sb, KERN_ERR, "failed to initialize system " 4131 ext4_msg(sb, KERN_ERR, "failed to initialize system "
4144 "zone (%d)", err); 4132 "zone (%d)", err);
4133 goto failed_mount4a;
4134 }
4135
4136 ext4_ext_init(sb);
4137 err = ext4_mb_init(sb);
4138 if (err) {
4139 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4140 err);
4145 goto failed_mount5; 4141 goto failed_mount5;
4146 } 4142 }
4147 4143
@@ -4218,8 +4214,11 @@ failed_mount8:
4218failed_mount7: 4214failed_mount7:
4219 ext4_unregister_li_request(sb); 4215 ext4_unregister_li_request(sb);
4220failed_mount6: 4216failed_mount6:
4221 ext4_release_system_zone(sb); 4217 ext4_mb_release(sb);
4222failed_mount5: 4218failed_mount5:
4219 ext4_ext_release(sb);
4220 ext4_release_system_zone(sb);
4221failed_mount4a:
4223 dput(sb->s_root); 4222 dput(sb->s_root);
4224 sb->s_root = NULL; 4223 sb->s_root = NULL;
4225failed_mount4: 4224failed_mount4:
@@ -4243,14 +4242,11 @@ failed_mount3:
4243 percpu_counter_destroy(&sbi->s_extent_cache_cnt); 4242 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4244 if (sbi->s_mmp_tsk) 4243 if (sbi->s_mmp_tsk)
4245 kthread_stop(sbi->s_mmp_tsk); 4244 kthread_stop(sbi->s_mmp_tsk);
4246failed_mount2a:
4247 ext4_mb_release(sb);
4248failed_mount2: 4245failed_mount2:
4249 for (i = 0; i < db_count; i++) 4246 for (i = 0; i < db_count; i++)
4250 brelse(sbi->s_group_desc[i]); 4247 brelse(sbi->s_group_desc[i]);
4251 ext4_kvfree(sbi->s_group_desc); 4248 ext4_kvfree(sbi->s_group_desc);
4252failed_mount: 4249failed_mount:
4253 ext4_ext_release(sb);
4254 if (sbi->s_chksum_driver) 4250 if (sbi->s_chksum_driver)
4255 crypto_free_shash(sbi->s_chksum_driver); 4251 crypto_free_shash(sbi->s_chksum_driver);
4256 if (sbi->s_proc) { 4252 if (sbi->s_proc) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0924521306b4..f8cf619edb5f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -608,8 +608,8 @@ static int __allocate_data_block(struct dnode_of_data *dn)
608 * b. do not use extent cache for better performance 608 * b. do not use extent cache for better performance
609 * c. give the block addresses to blockdev 609 * c. give the block addresses to blockdev
610 */ 610 */
611static int get_data_block(struct inode *inode, sector_t iblock, 611static int __get_data_block(struct inode *inode, sector_t iblock,
612 struct buffer_head *bh_result, int create) 612 struct buffer_head *bh_result, int create, bool fiemap)
613{ 613{
614 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 614 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
615 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 615 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
@@ -637,7 +637,7 @@ static int get_data_block(struct inode *inode, sector_t iblock,
637 err = 0; 637 err = 0;
638 goto unlock_out; 638 goto unlock_out;
639 } 639 }
640 if (dn.data_blkaddr == NEW_ADDR) 640 if (dn.data_blkaddr == NEW_ADDR && !fiemap)
641 goto put_out; 641 goto put_out;
642 642
643 if (dn.data_blkaddr != NULL_ADDR) { 643 if (dn.data_blkaddr != NULL_ADDR) {
@@ -671,7 +671,7 @@ get_next:
671 err = 0; 671 err = 0;
672 goto unlock_out; 672 goto unlock_out;
673 } 673 }
674 if (dn.data_blkaddr == NEW_ADDR) 674 if (dn.data_blkaddr == NEW_ADDR && !fiemap)
675 goto put_out; 675 goto put_out;
676 676
677 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 677 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
@@ -708,10 +708,23 @@ out:
708 return err; 708 return err;
709} 709}
710 710
711static int get_data_block(struct inode *inode, sector_t iblock,
712 struct buffer_head *bh_result, int create)
713{
714 return __get_data_block(inode, iblock, bh_result, create, false);
715}
716
717static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
718 struct buffer_head *bh_result, int create)
719{
720 return __get_data_block(inode, iblock, bh_result, create, true);
721}
722
711int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 723int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
712 u64 start, u64 len) 724 u64 start, u64 len)
713{ 725{
714 return generic_block_fiemap(inode, fieinfo, start, len, get_data_block); 726 return generic_block_fiemap(inode, fieinfo,
727 start, len, get_data_block_fiemap);
715} 728}
716 729
717static int f2fs_read_data_page(struct file *file, struct page *page) 730static int f2fs_read_data_page(struct file *file, struct page *page)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 966acb039e3b..a4addd72ebbd 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -376,11 +376,11 @@ static struct page *init_inode_metadata(struct inode *inode,
376 376
377put_error: 377put_error:
378 f2fs_put_page(page, 1); 378 f2fs_put_page(page, 1);
379error:
379 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ 380 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
380 truncate_inode_pages(&inode->i_data, 0); 381 truncate_inode_pages(&inode->i_data, 0);
381 truncate_blocks(inode, 0); 382 truncate_blocks(inode, 0);
382 remove_dirty_dir_inode(inode); 383 remove_dirty_dir_inode(inode);
383error:
384 remove_inode_page(inode); 384 remove_inode_page(inode);
385 return ERR_PTR(err); 385 return ERR_PTR(err);
386} 386}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e51c732b0dd9..58df97e174d0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -342,9 +342,6 @@ struct f2fs_sm_info {
342 struct dirty_seglist_info *dirty_info; /* dirty segment information */ 342 struct dirty_seglist_info *dirty_info; /* dirty segment information */
343 struct curseg_info *curseg_array; /* active segment information */ 343 struct curseg_info *curseg_array; /* active segment information */
344 344
345 struct list_head wblist_head; /* list of under-writeback pages */
346 spinlock_t wblist_lock; /* lock for checkpoint */
347
348 block_t seg0_blkaddr; /* block address of 0'th segment */ 345 block_t seg0_blkaddr; /* block address of 0'th segment */
349 block_t main_blkaddr; /* start block address of main area */ 346 block_t main_blkaddr; /* start block address of main area */
350 block_t ssa_blkaddr; /* start block address of SSA area */ 347 block_t ssa_blkaddr; /* start block address of SSA area */
@@ -644,7 +641,8 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
644 */ 641 */
645static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) 642static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
646{ 643{
647 WARN_ON((nid >= NM_I(sbi)->max_nid)); 644 if (unlikely(nid < F2FS_ROOT_INO(sbi)))
645 return -EINVAL;
648 if (unlikely(nid >= NM_I(sbi)->max_nid)) 646 if (unlikely(nid >= NM_I(sbi)->max_nid))
649 return -EINVAL; 647 return -EINVAL;
650 return 0; 648 return 0;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c58e33075719..7d8b96275092 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -659,16 +659,19 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
659 off_start = offset & (PAGE_CACHE_SIZE - 1); 659 off_start = offset & (PAGE_CACHE_SIZE - 1);
660 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); 660 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
661 661
662 f2fs_lock_op(sbi);
663
662 for (index = pg_start; index <= pg_end; index++) { 664 for (index = pg_start; index <= pg_end; index++) {
663 struct dnode_of_data dn; 665 struct dnode_of_data dn;
664 666
665 f2fs_lock_op(sbi); 667 if (index == pg_end && !off_end)
668 goto noalloc;
669
666 set_new_dnode(&dn, inode, NULL, NULL, 0); 670 set_new_dnode(&dn, inode, NULL, NULL, 0);
667 ret = f2fs_reserve_block(&dn, index); 671 ret = f2fs_reserve_block(&dn, index);
668 f2fs_unlock_op(sbi);
669 if (ret) 672 if (ret)
670 break; 673 break;
671 674noalloc:
672 if (pg_start == pg_end) 675 if (pg_start == pg_end)
673 new_size = offset + len; 676 new_size = offset + len;
674 else if (index == pg_start && off_start) 677 else if (index == pg_start && off_start)
@@ -683,8 +686,9 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
683 i_size_read(inode) < new_size) { 686 i_size_read(inode) < new_size) {
684 i_size_write(inode, new_size); 687 i_size_write(inode, new_size);
685 mark_inode_dirty(inode); 688 mark_inode_dirty(inode);
686 f2fs_write_inode(inode, NULL); 689 update_inode_page(inode);
687 } 690 }
691 f2fs_unlock_op(sbi);
688 692
689 return ret; 693 return ret;
690} 694}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index adc622c6bdce..2cf6962f6cc8 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -78,6 +78,7 @@ static int do_read_inode(struct inode *inode)
78 if (check_nid_range(sbi, inode->i_ino)) { 78 if (check_nid_range(sbi, inode->i_ino)) {
79 f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu", 79 f2fs_msg(inode->i_sb, KERN_ERR, "bad inode number: %lu",
80 (unsigned long) inode->i_ino); 80 (unsigned long) inode->i_ino);
81 WARN_ON(1);
81 return -EINVAL; 82 return -EINVAL;
82 } 83 }
83 84
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 9138c32aa698..a6bdddc33ce2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -417,9 +417,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
417 } 417 }
418 418
419 f2fs_set_link(new_dir, new_entry, new_page, old_inode); 419 f2fs_set_link(new_dir, new_entry, new_page, old_inode);
420 down_write(&F2FS_I(old_inode)->i_sem);
421 F2FS_I(old_inode)->i_pino = new_dir->i_ino;
422 up_write(&F2FS_I(old_inode)->i_sem);
423 420
424 new_inode->i_ctime = CURRENT_TIME; 421 new_inode->i_ctime = CURRENT_TIME;
425 down_write(&F2FS_I(new_inode)->i_sem); 422 down_write(&F2FS_I(new_inode)->i_sem);
@@ -448,6 +445,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
448 } 445 }
449 } 446 }
450 447
448 down_write(&F2FS_I(old_inode)->i_sem);
449 file_lost_pino(old_inode);
450 up_write(&F2FS_I(old_inode)->i_sem);
451
451 old_inode->i_ctime = CURRENT_TIME; 452 old_inode->i_ctime = CURRENT_TIME;
452 mark_inode_dirty(old_inode); 453 mark_inode_dirty(old_inode);
453 454
@@ -457,9 +458,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
457 if (old_dir != new_dir) { 458 if (old_dir != new_dir) {
458 f2fs_set_link(old_inode, old_dir_entry, 459 f2fs_set_link(old_inode, old_dir_entry,
459 old_dir_page, new_dir); 460 old_dir_page, new_dir);
460 down_write(&F2FS_I(old_inode)->i_sem);
461 F2FS_I(old_inode)->i_pino = new_dir->i_ino;
462 up_write(&F2FS_I(old_inode)->i_sem);
463 update_inode_page(old_inode); 461 update_inode_page(old_inode);
464 } else { 462 } else {
465 kunmap(old_dir_page); 463 kunmap(old_dir_page);
@@ -474,7 +472,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
474 return 0; 472 return 0;
475 473
476put_out_dir: 474put_out_dir:
477 f2fs_put_page(new_page, 1); 475 kunmap(new_page);
476 f2fs_put_page(new_page, 0);
478out_dir: 477out_dir:
479 if (old_dir_entry) { 478 if (old_dir_entry) {
480 kunmap(old_dir_page); 479 kunmap(old_dir_page);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9dfb9a042fd2..4b697ccc9b0c 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -42,6 +42,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
42 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; 42 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
43 res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); 43 res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
44 } else if (type == DIRTY_DENTS) { 44 } else if (type == DIRTY_DENTS) {
45 if (sbi->sb->s_bdi->dirty_exceeded)
46 return false;
45 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); 47 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
46 res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); 48 res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
47 } 49 }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f25f0e07e26f..d04613df710a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -272,14 +272,15 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
272 return -ENOMEM; 272 return -ENOMEM;
273 spin_lock_init(&fcc->issue_lock); 273 spin_lock_init(&fcc->issue_lock);
274 init_waitqueue_head(&fcc->flush_wait_queue); 274 init_waitqueue_head(&fcc->flush_wait_queue);
275 sbi->sm_info->cmd_control_info = fcc;
275 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 276 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
276 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 277 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
277 if (IS_ERR(fcc->f2fs_issue_flush)) { 278 if (IS_ERR(fcc->f2fs_issue_flush)) {
278 err = PTR_ERR(fcc->f2fs_issue_flush); 279 err = PTR_ERR(fcc->f2fs_issue_flush);
279 kfree(fcc); 280 kfree(fcc);
281 sbi->sm_info->cmd_control_info = NULL;
280 return err; 282 return err;
281 } 283 }
282 sbi->sm_info->cmd_control_info = fcc;
283 284
284 return err; 285 return err;
285} 286}
@@ -1885,8 +1886,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1885 1886
1886 /* init sm info */ 1887 /* init sm info */
1887 sbi->sm_info = sm_info; 1888 sbi->sm_info = sm_info;
1888 INIT_LIST_HEAD(&sm_info->wblist_head);
1889 spin_lock_init(&sm_info->wblist_lock);
1890 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); 1889 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1891 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); 1890 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
1892 sm_info->segment_count = le32_to_cpu(raw_super->segment_count); 1891 sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b2b18637cb9e..8f96d9372ade 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -689,9 +689,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
689 struct f2fs_sb_info *sbi = F2FS_SB(sb); 689 struct f2fs_sb_info *sbi = F2FS_SB(sb);
690 struct inode *inode; 690 struct inode *inode;
691 691
692 if (unlikely(ino < F2FS_ROOT_INO(sbi))) 692 if (check_nid_range(sbi, ino))
693 return ERR_PTR(-ESTALE);
694 if (unlikely(ino >= NM_I(sbi)->max_nid))
695 return ERR_PTR(-ESTALE); 693 return ERR_PTR(-ESTALE);
696 694
697 /* 695 /*
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 098f97bdcf1b..ca887314aba9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -643,9 +643,8 @@ struct fuse_copy_state {
643 unsigned long seglen; 643 unsigned long seglen;
644 unsigned long addr; 644 unsigned long addr;
645 struct page *pg; 645 struct page *pg;
646 void *mapaddr;
647 void *buf;
648 unsigned len; 646 unsigned len;
647 unsigned offset;
649 unsigned move_pages:1; 648 unsigned move_pages:1;
650}; 649};
651 650
@@ -666,23 +665,17 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
666 if (cs->currbuf) { 665 if (cs->currbuf) {
667 struct pipe_buffer *buf = cs->currbuf; 666 struct pipe_buffer *buf = cs->currbuf;
668 667
669 if (!cs->write) { 668 if (cs->write)
670 kunmap_atomic(cs->mapaddr);
671 } else {
672 kunmap_atomic(cs->mapaddr);
673 buf->len = PAGE_SIZE - cs->len; 669 buf->len = PAGE_SIZE - cs->len;
674 }
675 cs->currbuf = NULL; 670 cs->currbuf = NULL;
676 cs->mapaddr = NULL; 671 } else if (cs->pg) {
677 } else if (cs->mapaddr) {
678 kunmap_atomic(cs->mapaddr);
679 if (cs->write) { 672 if (cs->write) {
680 flush_dcache_page(cs->pg); 673 flush_dcache_page(cs->pg);
681 set_page_dirty_lock(cs->pg); 674 set_page_dirty_lock(cs->pg);
682 } 675 }
683 put_page(cs->pg); 676 put_page(cs->pg);
684 cs->mapaddr = NULL;
685 } 677 }
678 cs->pg = NULL;
686} 679}
687 680
688/* 681/*
@@ -691,7 +684,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
691 */ 684 */
692static int fuse_copy_fill(struct fuse_copy_state *cs) 685static int fuse_copy_fill(struct fuse_copy_state *cs)
693{ 686{
694 unsigned long offset; 687 struct page *page;
695 int err; 688 int err;
696 689
697 unlock_request(cs->fc, cs->req); 690 unlock_request(cs->fc, cs->req);
@@ -706,14 +699,12 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
706 699
707 BUG_ON(!cs->nr_segs); 700 BUG_ON(!cs->nr_segs);
708 cs->currbuf = buf; 701 cs->currbuf = buf;
709 cs->mapaddr = kmap_atomic(buf->page); 702 cs->pg = buf->page;
703 cs->offset = buf->offset;
710 cs->len = buf->len; 704 cs->len = buf->len;
711 cs->buf = cs->mapaddr + buf->offset;
712 cs->pipebufs++; 705 cs->pipebufs++;
713 cs->nr_segs--; 706 cs->nr_segs--;
714 } else { 707 } else {
715 struct page *page;
716
717 if (cs->nr_segs == cs->pipe->buffers) 708 if (cs->nr_segs == cs->pipe->buffers)
718 return -EIO; 709 return -EIO;
719 710
@@ -726,8 +717,8 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
726 buf->len = 0; 717 buf->len = 0;
727 718
728 cs->currbuf = buf; 719 cs->currbuf = buf;
729 cs->mapaddr = kmap_atomic(page); 720 cs->pg = page;
730 cs->buf = cs->mapaddr; 721 cs->offset = 0;
731 cs->len = PAGE_SIZE; 722 cs->len = PAGE_SIZE;
732 cs->pipebufs++; 723 cs->pipebufs++;
733 cs->nr_segs++; 724 cs->nr_segs++;
@@ -740,14 +731,13 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
740 cs->iov++; 731 cs->iov++;
741 cs->nr_segs--; 732 cs->nr_segs--;
742 } 733 }
743 err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg); 734 err = get_user_pages_fast(cs->addr, 1, cs->write, &page);
744 if (err < 0) 735 if (err < 0)
745 return err; 736 return err;
746 BUG_ON(err != 1); 737 BUG_ON(err != 1);
747 offset = cs->addr % PAGE_SIZE; 738 cs->pg = page;
748 cs->mapaddr = kmap_atomic(cs->pg); 739 cs->offset = cs->addr % PAGE_SIZE;
749 cs->buf = cs->mapaddr + offset; 740 cs->len = min(PAGE_SIZE - cs->offset, cs->seglen);
750 cs->len = min(PAGE_SIZE - offset, cs->seglen);
751 cs->seglen -= cs->len; 741 cs->seglen -= cs->len;
752 cs->addr += cs->len; 742 cs->addr += cs->len;
753 } 743 }
@@ -760,15 +750,20 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
760{ 750{
761 unsigned ncpy = min(*size, cs->len); 751 unsigned ncpy = min(*size, cs->len);
762 if (val) { 752 if (val) {
753 void *pgaddr = kmap_atomic(cs->pg);
754 void *buf = pgaddr + cs->offset;
755
763 if (cs->write) 756 if (cs->write)
764 memcpy(cs->buf, *val, ncpy); 757 memcpy(buf, *val, ncpy);
765 else 758 else
766 memcpy(*val, cs->buf, ncpy); 759 memcpy(*val, buf, ncpy);
760
761 kunmap_atomic(pgaddr);
767 *val += ncpy; 762 *val += ncpy;
768 } 763 }
769 *size -= ncpy; 764 *size -= ncpy;
770 cs->len -= ncpy; 765 cs->len -= ncpy;
771 cs->buf += ncpy; 766 cs->offset += ncpy;
772 return ncpy; 767 return ncpy;
773} 768}
774 769
@@ -874,8 +869,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
874out_fallback_unlock: 869out_fallback_unlock:
875 unlock_page(newpage); 870 unlock_page(newpage);
876out_fallback: 871out_fallback:
877 cs->mapaddr = kmap_atomic(buf->page); 872 cs->pg = buf->page;
878 cs->buf = cs->mapaddr + buf->offset; 873 cs->offset = buf->offset;
879 874
880 err = lock_request(cs->fc, cs->req); 875 err = lock_request(cs->fc, cs->req);
881 if (err) 876 if (err)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 42198359fa1b..0c6048247a34 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -198,7 +198,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
198 inode = ACCESS_ONCE(entry->d_inode); 198 inode = ACCESS_ONCE(entry->d_inode);
199 if (inode && is_bad_inode(inode)) 199 if (inode && is_bad_inode(inode))
200 goto invalid; 200 goto invalid;
201 else if (fuse_dentry_time(entry) < get_jiffies_64()) { 201 else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
202 (flags & LOOKUP_REVAL)) {
202 int err; 203 int err;
203 struct fuse_entry_out outarg; 204 struct fuse_entry_out outarg;
204 struct fuse_req *req; 205 struct fuse_req *req;
@@ -814,13 +815,6 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
814 return err; 815 return err;
815} 816}
816 817
817static int fuse_rename(struct inode *olddir, struct dentry *oldent,
818 struct inode *newdir, struct dentry *newent)
819{
820 return fuse_rename_common(olddir, oldent, newdir, newent, 0,
821 FUSE_RENAME, sizeof(struct fuse_rename_in));
822}
823
824static int fuse_rename2(struct inode *olddir, struct dentry *oldent, 818static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
825 struct inode *newdir, struct dentry *newent, 819 struct inode *newdir, struct dentry *newent,
826 unsigned int flags) 820 unsigned int flags)
@@ -831,17 +825,30 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
831 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 825 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
832 return -EINVAL; 826 return -EINVAL;
833 827
834 if (fc->no_rename2 || fc->minor < 23) 828 if (flags) {
835 return -EINVAL; 829 if (fc->no_rename2 || fc->minor < 23)
830 return -EINVAL;
836 831
837 err = fuse_rename_common(olddir, oldent, newdir, newent, flags, 832 err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
838 FUSE_RENAME2, sizeof(struct fuse_rename2_in)); 833 FUSE_RENAME2,
839 if (err == -ENOSYS) { 834 sizeof(struct fuse_rename2_in));
840 fc->no_rename2 = 1; 835 if (err == -ENOSYS) {
841 err = -EINVAL; 836 fc->no_rename2 = 1;
837 err = -EINVAL;
838 }
839 } else {
840 err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
841 FUSE_RENAME,
842 sizeof(struct fuse_rename_in));
842 } 843 }
844
843 return err; 845 return err;
846}
844 847
848static int fuse_rename(struct inode *olddir, struct dentry *oldent,
849 struct inode *newdir, struct dentry *newent)
850{
851 return fuse_rename2(olddir, oldent, newdir, newent, 0);
845} 852}
846 853
847static int fuse_link(struct dentry *entry, struct inode *newdir, 854static int fuse_link(struct dentry *entry, struct inode *newdir,
@@ -985,7 +992,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
985 int err; 992 int err;
986 bool r; 993 bool r;
987 994
988 if (fi->i_time < get_jiffies_64()) { 995 if (time_before64(fi->i_time, get_jiffies_64())) {
989 r = true; 996 r = true;
990 err = fuse_do_getattr(inode, stat, file); 997 err = fuse_do_getattr(inode, stat, file);
991 } else { 998 } else {
@@ -1171,7 +1178,7 @@ static int fuse_permission(struct inode *inode, int mask)
1171 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { 1178 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1172 struct fuse_inode *fi = get_fuse_inode(inode); 1179 struct fuse_inode *fi = get_fuse_inode(inode);
1173 1180
1174 if (fi->i_time < get_jiffies_64()) { 1181 if (time_before64(fi->i_time, get_jiffies_64())) {
1175 refreshed = true; 1182 refreshed = true;
1176 1183
1177 err = fuse_perm_getattr(inode, mask); 1184 err = fuse_perm_getattr(inode, mask);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6e16dad13e9b..40ac2628ddcf 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1687,7 +1687,7 @@ static int fuse_writepage_locked(struct page *page)
1687 error = -EIO; 1687 error = -EIO;
1688 req->ff = fuse_write_file_get(fc, fi); 1688 req->ff = fuse_write_file_get(fc, fi);
1689 if (!req->ff) 1689 if (!req->ff)
1690 goto err_free; 1690 goto err_nofile;
1691 1691
1692 fuse_write_fill(req, req->ff, page_offset(page), 0); 1692 fuse_write_fill(req, req->ff, page_offset(page), 0);
1693 1693
@@ -1715,6 +1715,8 @@ static int fuse_writepage_locked(struct page *page)
1715 1715
1716 return 0; 1716 return 0;
1717 1717
1718err_nofile:
1719 __free_page(tmp_page);
1718err_free: 1720err_free:
1719 fuse_request_free(req); 1721 fuse_request_free(req);
1720err: 1722err:
@@ -1955,8 +1957,8 @@ static int fuse_writepages(struct address_space *mapping,
1955 data.ff = NULL; 1957 data.ff = NULL;
1956 1958
1957 err = -ENOMEM; 1959 err = -ENOMEM;
1958 data.orig_pages = kzalloc(sizeof(struct page *) * 1960 data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
1959 FUSE_MAX_PAGES_PER_REQ, 1961 sizeof(struct page *),
1960 GFP_NOFS); 1962 GFP_NOFS);
1961 if (!data.orig_pages) 1963 if (!data.orig_pages)
1962 goto out; 1964 goto out;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 754dcf23de8a..8474028d7848 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -478,6 +478,17 @@ static const match_table_t tokens = {
478 {OPT_ERR, NULL} 478 {OPT_ERR, NULL}
479}; 479};
480 480
481static int fuse_match_uint(substring_t *s, unsigned int *res)
482{
483 int err = -ENOMEM;
484 char *buf = match_strdup(s);
485 if (buf) {
486 err = kstrtouint(buf, 10, res);
487 kfree(buf);
488 }
489 return err;
490}
491
481static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) 492static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
482{ 493{
483 char *p; 494 char *p;
@@ -488,6 +499,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
488 while ((p = strsep(&opt, ",")) != NULL) { 499 while ((p = strsep(&opt, ",")) != NULL) {
489 int token; 500 int token;
490 int value; 501 int value;
502 unsigned uv;
491 substring_t args[MAX_OPT_ARGS]; 503 substring_t args[MAX_OPT_ARGS];
492 if (!*p) 504 if (!*p)
493 continue; 505 continue;
@@ -511,18 +523,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
511 break; 523 break;
512 524
513 case OPT_USER_ID: 525 case OPT_USER_ID:
514 if (match_int(&args[0], &value)) 526 if (fuse_match_uint(&args[0], &uv))
515 return 0; 527 return 0;
516 d->user_id = make_kuid(current_user_ns(), value); 528 d->user_id = make_kuid(current_user_ns(), uv);
517 if (!uid_valid(d->user_id)) 529 if (!uid_valid(d->user_id))
518 return 0; 530 return 0;
519 d->user_id_present = 1; 531 d->user_id_present = 1;
520 break; 532 break;
521 533
522 case OPT_GROUP_ID: 534 case OPT_GROUP_ID:
523 if (match_int(&args[0], &value)) 535 if (fuse_match_uint(&args[0], &uv))
524 return 0; 536 return 0;
525 d->group_id = make_kgid(current_user_ns(), value); 537 d->group_id = make_kgid(current_user_ns(), uv);
526 if (!gid_valid(d->group_id)) 538 if (!gid_valid(d->group_id))
527 return 0; 539 return 0;
528 d->group_id_present = 1; 540 d->group_id_present = 1;
@@ -1006,7 +1018,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
1006 1018
1007 sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION); 1019 sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
1008 1020
1009 if (!parse_fuse_opt((char *) data, &d, is_bdev)) 1021 if (!parse_fuse_opt(data, &d, is_bdev))
1010 goto err; 1022 goto err;
1011 1023
1012 if (is_bdev) { 1024 if (is_bdev) {
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 38cfcf5f6fce..6f0f590cc5a3 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1588,9 +1588,12 @@ int jbd2_journal_stop(handle_t *handle)
1588 * to perform a synchronous write. We do this to detect the 1588 * to perform a synchronous write. We do this to detect the
1589 * case where a single process is doing a stream of sync 1589 * case where a single process is doing a stream of sync
1590 * writes. No point in waiting for joiners in that case. 1590 * writes. No point in waiting for joiners in that case.
1591 *
1592 * Setting max_batch_time to 0 disables this completely.
1591 */ 1593 */
1592 pid = current->pid; 1594 pid = current->pid;
1593 if (handle->h_sync && journal->j_last_sync_writer != pid) { 1595 if (handle->h_sync && journal->j_last_sync_writer != pid &&
1596 journal->j_max_batch_time) {
1594 u64 commit_time, trans_time; 1597 u64 commit_time, trans_time;
1595 1598
1596 journal->j_last_sync_writer = pid; 1599 journal->j_last_sync_writer = pid;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e3d37f607f97..d895b4b7b661 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -39,6 +39,19 @@ struct kernfs_open_node {
39 struct list_head files; /* goes through kernfs_open_file.list */ 39 struct list_head files; /* goes through kernfs_open_file.list */
40}; 40};
41 41
42/*
43 * kernfs_notify() may be called from any context and bounces notifications
44 * through a work item. To minimize space overhead in kernfs_node, the
45 * pending queue is implemented as a singly linked list of kernfs_nodes.
46 * The list is terminated with the self pointer so that whether a
47 * kernfs_node is on the list or not can be determined by testing the next
48 * pointer for NULL.
49 */
50#define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list)
51
52static DEFINE_SPINLOCK(kernfs_notify_lock);
53static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
54
42static struct kernfs_open_file *kernfs_of(struct file *file) 55static struct kernfs_open_file *kernfs_of(struct file *file)
43{ 56{
44 return ((struct seq_file *)file->private_data)->private; 57 return ((struct seq_file *)file->private_data)->private;
@@ -783,24 +796,25 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
783 return DEFAULT_POLLMASK|POLLERR|POLLPRI; 796 return DEFAULT_POLLMASK|POLLERR|POLLPRI;
784} 797}
785 798
786/** 799static void kernfs_notify_workfn(struct work_struct *work)
787 * kernfs_notify - notify a kernfs file
788 * @kn: file to notify
789 *
790 * Notify @kn such that poll(2) on @kn wakes up.
791 */
792void kernfs_notify(struct kernfs_node *kn)
793{ 800{
794 struct kernfs_root *root = kernfs_root(kn); 801 struct kernfs_node *kn;
795 struct kernfs_open_node *on; 802 struct kernfs_open_node *on;
796 struct kernfs_super_info *info; 803 struct kernfs_super_info *info;
797 unsigned long flags; 804repeat:
798 805 /* pop one off the notify_list */
799 if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) 806 spin_lock_irq(&kernfs_notify_lock);
807 kn = kernfs_notify_list;
808 if (kn == KERNFS_NOTIFY_EOL) {
809 spin_unlock_irq(&kernfs_notify_lock);
800 return; 810 return;
811 }
812 kernfs_notify_list = kn->attr.notify_next;
813 kn->attr.notify_next = NULL;
814 spin_unlock_irq(&kernfs_notify_lock);
801 815
802 /* kick poll */ 816 /* kick poll */
803 spin_lock_irqsave(&kernfs_open_node_lock, flags); 817 spin_lock_irq(&kernfs_open_node_lock);
804 818
805 on = kn->attr.open; 819 on = kn->attr.open;
806 if (on) { 820 if (on) {
@@ -808,12 +822,12 @@ void kernfs_notify(struct kernfs_node *kn)
808 wake_up_interruptible(&on->poll); 822 wake_up_interruptible(&on->poll);
809 } 823 }
810 824
811 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 825 spin_unlock_irq(&kernfs_open_node_lock);
812 826
813 /* kick fsnotify */ 827 /* kick fsnotify */
814 mutex_lock(&kernfs_mutex); 828 mutex_lock(&kernfs_mutex);
815 829
816 list_for_each_entry(info, &root->supers, node) { 830 list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
817 struct inode *inode; 831 struct inode *inode;
818 struct dentry *dentry; 832 struct dentry *dentry;
819 833
@@ -833,6 +847,33 @@ void kernfs_notify(struct kernfs_node *kn)
833 } 847 }
834 848
835 mutex_unlock(&kernfs_mutex); 849 mutex_unlock(&kernfs_mutex);
850 kernfs_put(kn);
851 goto repeat;
852}
853
854/**
855 * kernfs_notify - notify a kernfs file
856 * @kn: file to notify
857 *
858 * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any
859 * context.
860 */
861void kernfs_notify(struct kernfs_node *kn)
862{
863 static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
864 unsigned long flags;
865
866 if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
867 return;
868
869 spin_lock_irqsave(&kernfs_notify_lock, flags);
870 if (!kn->attr.notify_next) {
871 kernfs_get(kn);
872 kn->attr.notify_next = kernfs_notify_list;
873 kernfs_notify_list = kn;
874 schedule_work(&kernfs_notify_work);
875 }
876 spin_unlock_irqrestore(&kernfs_notify_lock, flags);
836} 877}
837EXPORT_SYMBOL_GPL(kernfs_notify); 878EXPORT_SYMBOL_GPL(kernfs_notify);
838 879
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d171b98a6cdd..f973ae9b05f1 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -211,6 +211,36 @@ void kernfs_kill_sb(struct super_block *sb)
211 kernfs_put(root_kn); 211 kernfs_put(root_kn);
212} 212}
213 213
214/**
215 * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root
216 * @kernfs_root: the kernfs_root in question
217 * @ns: the namespace tag
218 *
219 * Pin the superblock so the superblock won't be destroyed in subsequent
220 * operations. This can be used to block ->kill_sb() which may be useful
221 * for kernfs users which dynamically manage superblocks.
222 *
223 * Returns NULL if there's no superblock associated to this kernfs_root, or
224 * -EINVAL if the superblock is being freed.
225 */
226struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
227{
228 struct kernfs_super_info *info;
229 struct super_block *sb = NULL;
230
231 mutex_lock(&kernfs_mutex);
232 list_for_each_entry(info, &root->supers, node) {
233 if (info->ns == ns) {
234 sb = info->sb;
235 if (!atomic_inc_not_zero(&info->sb->s_active))
236 sb = ERR_PTR(-EINVAL);
237 break;
238 }
239 }
240 mutex_unlock(&kernfs_mutex);
241 return sb;
242}
243
214void __init kernfs_init(void) 244void __init kernfs_init(void)
215{ 245{
216 kernfs_node_cache = kmem_cache_create("kernfs_node_cache", 246 kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
diff --git a/fs/locks.c b/fs/locks.c
index da57c9b7e844..717fbc404e6b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -431,7 +431,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
431 if (assign_type(fl, type) != 0) 431 if (assign_type(fl, type) != 0)
432 return -EINVAL; 432 return -EINVAL;
433 433
434 fl->fl_owner = (fl_owner_t)filp; 434 fl->fl_owner = (fl_owner_t)current->files;
435 fl->fl_pid = current->tgid; 435 fl->fl_pid = current->tgid;
436 436
437 fl->fl_file = filp; 437 fl->fl_file = filp;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index bf166e388f0d..187477ded6b3 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -73,6 +73,7 @@
73#include <linux/mbcache.h> 73#include <linux/mbcache.h>
74#include <linux/init.h> 74#include <linux/init.h>
75#include <linux/blockgroup_lock.h> 75#include <linux/blockgroup_lock.h>
76#include <linux/log2.h>
76 77
77#ifdef MB_CACHE_DEBUG 78#ifdef MB_CACHE_DEBUG
78# define mb_debug(f...) do { \ 79# define mb_debug(f...) do { \
@@ -93,7 +94,7 @@
93 94
94#define MB_CACHE_WRITER ((unsigned short)~0U >> 1) 95#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
95 96
96#define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS) 97#define MB_CACHE_ENTRY_LOCK_BITS ilog2(NR_BG_LOCKS)
97#define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ 98#define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
98 (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) 99 (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
99 100
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c496f8a74639..9927913c97c2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -147,6 +147,17 @@ int nfs_sync_mapping(struct address_space *mapping)
147 return ret; 147 return ret;
148} 148}
149 149
150static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
151{
152 struct nfs_inode *nfsi = NFS_I(inode);
153
154 if (inode->i_mapping->nrpages == 0)
155 flags &= ~NFS_INO_INVALID_DATA;
156 nfsi->cache_validity |= flags;
157 if (flags & NFS_INO_INVALID_DATA)
158 nfs_fscache_invalidate(inode);
159}
160
150/* 161/*
151 * Invalidate the local caches 162 * Invalidate the local caches
152 */ 163 */
@@ -162,17 +173,16 @@ static void nfs_zap_caches_locked(struct inode *inode)
162 173
163 memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); 174 memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
164 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { 175 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
165 nfs_fscache_invalidate(inode); 176 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
166 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
167 | NFS_INO_INVALID_DATA 177 | NFS_INO_INVALID_DATA
168 | NFS_INO_INVALID_ACCESS 178 | NFS_INO_INVALID_ACCESS
169 | NFS_INO_INVALID_ACL 179 | NFS_INO_INVALID_ACL
170 | NFS_INO_REVAL_PAGECACHE; 180 | NFS_INO_REVAL_PAGECACHE);
171 } else 181 } else
172 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 182 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
173 | NFS_INO_INVALID_ACCESS 183 | NFS_INO_INVALID_ACCESS
174 | NFS_INO_INVALID_ACL 184 | NFS_INO_INVALID_ACL
175 | NFS_INO_REVAL_PAGECACHE; 185 | NFS_INO_REVAL_PAGECACHE);
176 nfs_zap_label_cache_locked(nfsi); 186 nfs_zap_label_cache_locked(nfsi);
177} 187}
178 188
@@ -187,8 +197,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
187{ 197{
188 if (mapping->nrpages != 0) { 198 if (mapping->nrpages != 0) {
189 spin_lock(&inode->i_lock); 199 spin_lock(&inode->i_lock);
190 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; 200 nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
191 nfs_fscache_invalidate(inode);
192 spin_unlock(&inode->i_lock); 201 spin_unlock(&inode->i_lock);
193 } 202 }
194} 203}
@@ -209,7 +218,7 @@ EXPORT_SYMBOL_GPL(nfs_zap_acl_cache);
209void nfs_invalidate_atime(struct inode *inode) 218void nfs_invalidate_atime(struct inode *inode)
210{ 219{
211 spin_lock(&inode->i_lock); 220 spin_lock(&inode->i_lock);
212 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 221 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
213 spin_unlock(&inode->i_lock); 222 spin_unlock(&inode->i_lock);
214} 223}
215EXPORT_SYMBOL_GPL(nfs_invalidate_atime); 224EXPORT_SYMBOL_GPL(nfs_invalidate_atime);
@@ -369,7 +378,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
369 inode->i_mode = fattr->mode; 378 inode->i_mode = fattr->mode;
370 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 379 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
371 && nfs_server_capable(inode, NFS_CAP_MODE)) 380 && nfs_server_capable(inode, NFS_CAP_MODE))
372 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 381 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
373 /* Why so? Because we want revalidate for devices/FIFOs, and 382 /* Why so? Because we want revalidate for devices/FIFOs, and
374 * that's precisely what we have in nfs_file_inode_operations. 383 * that's precisely what we have in nfs_file_inode_operations.
375 */ 384 */
@@ -415,36 +424,36 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
415 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 424 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
416 inode->i_atime = fattr->atime; 425 inode->i_atime = fattr->atime;
417 else if (nfs_server_capable(inode, NFS_CAP_ATIME)) 426 else if (nfs_server_capable(inode, NFS_CAP_ATIME))
418 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 427 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
419 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 428 if (fattr->valid & NFS_ATTR_FATTR_MTIME)
420 inode->i_mtime = fattr->mtime; 429 inode->i_mtime = fattr->mtime;
421 else if (nfs_server_capable(inode, NFS_CAP_MTIME)) 430 else if (nfs_server_capable(inode, NFS_CAP_MTIME))
422 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 431 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
423 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 432 if (fattr->valid & NFS_ATTR_FATTR_CTIME)
424 inode->i_ctime = fattr->ctime; 433 inode->i_ctime = fattr->ctime;
425 else if (nfs_server_capable(inode, NFS_CAP_CTIME)) 434 else if (nfs_server_capable(inode, NFS_CAP_CTIME))
426 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 435 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
427 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) 436 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
428 inode->i_version = fattr->change_attr; 437 inode->i_version = fattr->change_attr;
429 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) 438 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
430 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 439 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
431 if (fattr->valid & NFS_ATTR_FATTR_SIZE) 440 if (fattr->valid & NFS_ATTR_FATTR_SIZE)
432 inode->i_size = nfs_size_to_loff_t(fattr->size); 441 inode->i_size = nfs_size_to_loff_t(fattr->size);
433 else 442 else
434 nfsi->cache_validity |= NFS_INO_INVALID_ATTR 443 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
435 | NFS_INO_REVAL_PAGECACHE; 444 | NFS_INO_REVAL_PAGECACHE);
436 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 445 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
437 set_nlink(inode, fattr->nlink); 446 set_nlink(inode, fattr->nlink);
438 else if (nfs_server_capable(inode, NFS_CAP_NLINK)) 447 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
439 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 448 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
440 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 449 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
441 inode->i_uid = fattr->uid; 450 inode->i_uid = fattr->uid;
442 else if (nfs_server_capable(inode, NFS_CAP_OWNER)) 451 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
443 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 452 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
444 if (fattr->valid & NFS_ATTR_FATTR_GROUP) 453 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
445 inode->i_gid = fattr->gid; 454 inode->i_gid = fattr->gid;
446 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) 455 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
447 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 456 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
448 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 457 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
449 inode->i_blocks = fattr->du.nfs2.blocks; 458 inode->i_blocks = fattr->du.nfs2.blocks;
450 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 459 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -550,6 +559,9 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
550 559
551 spin_lock(&inode->i_lock); 560 spin_lock(&inode->i_lock);
552 i_size_write(inode, offset); 561 i_size_write(inode, offset);
562 /* Optimisation */
563 if (offset == 0)
564 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
553 spin_unlock(&inode->i_lock); 565 spin_unlock(&inode->i_lock);
554 566
555 truncate_pagecache(inode, offset); 567 truncate_pagecache(inode, offset);
@@ -578,7 +590,8 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
578 inode->i_uid = attr->ia_uid; 590 inode->i_uid = attr->ia_uid;
579 if ((attr->ia_valid & ATTR_GID) != 0) 591 if ((attr->ia_valid & ATTR_GID) != 0)
580 inode->i_gid = attr->ia_gid; 592 inode->i_gid = attr->ia_gid;
581 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 593 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
594 | NFS_INO_INVALID_ACL);
582 spin_unlock(&inode->i_lock); 595 spin_unlock(&inode->i_lock);
583 } 596 }
584 if ((attr->ia_valid & ATTR_SIZE) != 0) { 597 if ((attr->ia_valid & ATTR_SIZE) != 0) {
@@ -1101,7 +1114,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
1101 && inode->i_version == fattr->pre_change_attr) { 1114 && inode->i_version == fattr->pre_change_attr) {
1102 inode->i_version = fattr->change_attr; 1115 inode->i_version = fattr->change_attr;
1103 if (S_ISDIR(inode->i_mode)) 1116 if (S_ISDIR(inode->i_mode))
1104 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 1117 nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
1105 ret |= NFS_INO_INVALID_ATTR; 1118 ret |= NFS_INO_INVALID_ATTR;
1106 } 1119 }
1107 /* If we have atomic WCC data, we may update some attributes */ 1120 /* If we have atomic WCC data, we may update some attributes */
@@ -1117,7 +1130,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
1117 && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { 1130 && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
1118 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 1131 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1119 if (S_ISDIR(inode->i_mode)) 1132 if (S_ISDIR(inode->i_mode))
1120 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 1133 nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
1121 ret |= NFS_INO_INVALID_ATTR; 1134 ret |= NFS_INO_INVALID_ATTR;
1122 } 1135 }
1123 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) 1136 if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
@@ -1128,9 +1141,6 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
1128 ret |= NFS_INO_INVALID_ATTR; 1141 ret |= NFS_INO_INVALID_ATTR;
1129 } 1142 }
1130 1143
1131 if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
1132 nfs_fscache_invalidate(inode);
1133
1134 return ret; 1144 return ret;
1135} 1145}
1136 1146
@@ -1189,7 +1199,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
1189 invalid |= NFS_INO_INVALID_ATIME; 1199 invalid |= NFS_INO_INVALID_ATIME;
1190 1200
1191 if (invalid != 0) 1201 if (invalid != 0)
1192 nfsi->cache_validity |= invalid; 1202 nfs_set_cache_invalid(inode, invalid);
1193 1203
1194 nfsi->read_cache_jiffies = fattr->time_start; 1204 nfsi->read_cache_jiffies = fattr->time_start;
1195 return 0; 1205 return 0;
@@ -1402,13 +1412,11 @@ EXPORT_SYMBOL_GPL(nfs_refresh_inode);
1402 1412
1403static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) 1413static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
1404{ 1414{
1405 struct nfs_inode *nfsi = NFS_I(inode); 1415 unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
1406 1416
1407 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; 1417 if (S_ISDIR(inode->i_mode))
1408 if (S_ISDIR(inode->i_mode)) { 1418 invalid |= NFS_INO_INVALID_DATA;
1409 nfsi->cache_validity |= NFS_INO_INVALID_DATA; 1419 nfs_set_cache_invalid(inode, invalid);
1410 nfs_fscache_invalidate(inode);
1411 }
1412 if ((fattr->valid & NFS_ATTR_FATTR) == 0) 1420 if ((fattr->valid & NFS_ATTR_FATTR) == 0)
1413 return 0; 1421 return 0;
1414 return nfs_refresh_inode_locked(inode, fattr); 1422 return nfs_refresh_inode_locked(inode, fattr);
@@ -1601,6 +1609,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1601 if ((nfsi->npages == 0) || new_isize > cur_isize) { 1609 if ((nfsi->npages == 0) || new_isize > cur_isize) {
1602 i_size_write(inode, new_isize); 1610 i_size_write(inode, new_isize);
1603 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; 1611 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
1612 invalid &= ~NFS_INO_REVAL_PAGECACHE;
1604 } 1613 }
1605 dprintk("NFS: isize change on server for file %s/%ld " 1614 dprintk("NFS: isize change on server for file %s/%ld "
1606 "(%Ld to %Ld)\n", 1615 "(%Ld to %Ld)\n",
@@ -1702,10 +1711,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1702 invalid &= ~NFS_INO_INVALID_DATA; 1711 invalid &= ~NFS_INO_INVALID_DATA;
1703 if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) || 1712 if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) ||
1704 (save_cache_validity & NFS_INO_REVAL_FORCED)) 1713 (save_cache_validity & NFS_INO_REVAL_FORCED))
1705 nfsi->cache_validity |= invalid; 1714 nfs_set_cache_invalid(inode, invalid);
1706
1707 if (invalid & NFS_INO_INVALID_DATA)
1708 nfs_fscache_invalidate(inode);
1709 1715
1710 return 0; 1716 return 0;
1711 out_err: 1717 out_err:
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index f63cb87cd730..ba2affa51941 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -230,7 +230,7 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
230extern struct file_system_type nfs4_fs_type; 230extern struct file_system_type nfs4_fs_type;
231 231
232/* nfs4namespace.c */ 232/* nfs4namespace.c */
233struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); 233struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, struct qstr *);
234struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, 234struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
235 struct nfs_fh *, struct nfs_fattr *); 235 struct nfs_fh *, struct nfs_fattr *);
236int nfs4_replace_transport(struct nfs_server *server, 236int nfs4_replace_transport(struct nfs_server *server,
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 3d5dbf80d46a..3d83cb1fdc70 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -139,16 +139,22 @@ static size_t nfs_parse_server_name(char *string, size_t len,
139 * @server: NFS server struct 139 * @server: NFS server struct
140 * @flavors: List of security tuples returned by SECINFO procedure 140 * @flavors: List of security tuples returned by SECINFO procedure
141 * 141 *
142 * Return the pseudoflavor of the first security mechanism in 142 * Return an rpc client that uses the first security mechanism in
143 * "flavors" that is locally supported. Return RPC_AUTH_UNIX if 143 * "flavors" that is locally supported. The "flavors" array
144 * no matching flavor is found in the array. The "flavors" array
145 * is searched in the order returned from the server, per RFC 3530 144 * is searched in the order returned from the server, per RFC 3530
146 * recommendation. 145 * recommendation and each flavor is checked for membership in the
146 * sec= mount option list if it exists.
147 *
148 * Return -EPERM if no matching flavor is found in the array.
149 *
150 * Please call rpc_shutdown_client() when you are done with this rpc client.
151 *
147 */ 152 */
148static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server, 153static struct rpc_clnt *nfs_find_best_sec(struct rpc_clnt *clnt,
154 struct nfs_server *server,
149 struct nfs4_secinfo_flavors *flavors) 155 struct nfs4_secinfo_flavors *flavors)
150{ 156{
151 rpc_authflavor_t pseudoflavor; 157 rpc_authflavor_t pflavor;
152 struct nfs4_secinfo4 *secinfo; 158 struct nfs4_secinfo4 *secinfo;
153 unsigned int i; 159 unsigned int i;
154 160
@@ -159,62 +165,73 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server,
159 case RPC_AUTH_NULL: 165 case RPC_AUTH_NULL:
160 case RPC_AUTH_UNIX: 166 case RPC_AUTH_UNIX:
161 case RPC_AUTH_GSS: 167 case RPC_AUTH_GSS:
162 pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor, 168 pflavor = rpcauth_get_pseudoflavor(secinfo->flavor,
163 &secinfo->flavor_info); 169 &secinfo->flavor_info);
164 /* make sure pseudoflavor matches sec= mount opt */ 170 /* does the pseudoflavor match a sec= mount opt? */
165 if (pseudoflavor != RPC_AUTH_MAXFLAVOR && 171 if (pflavor != RPC_AUTH_MAXFLAVOR &&
166 nfs_auth_info_match(&server->auth_info, 172 nfs_auth_info_match(&server->auth_info, pflavor)) {
167 pseudoflavor)) 173 struct rpc_clnt *new;
168 return pseudoflavor; 174 struct rpc_cred *cred;
169 break; 175
176 /* Cloning creates an rpc_auth for the flavor */
177 new = rpc_clone_client_set_auth(clnt, pflavor);
178 if (IS_ERR(new))
179 continue;
180 /**
181 * Check that the user actually can use the
182 * flavor. This is mostly for RPC_AUTH_GSS
183 * where cr_init obtains a gss context
184 */
185 cred = rpcauth_lookupcred(new->cl_auth, 0);
186 if (IS_ERR(cred)) {
187 rpc_shutdown_client(new);
188 continue;
189 }
190 put_rpccred(cred);
191 return new;
192 }
170 } 193 }
171 } 194 }
172 195 return ERR_PTR(-EPERM);
173 /* if there were any sec= options then nothing matched */
174 if (server->auth_info.flavor_len > 0)
175 return -EPERM;
176
177 return RPC_AUTH_UNIX;
178} 196}
179 197
180static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) 198/**
199 * nfs4_negotiate_security - in response to an NFS4ERR_WRONGSEC on lookup,
200 * return an rpc_clnt that uses the best available security flavor with
201 * respect to the secinfo flavor list and the sec= mount options.
202 *
203 * @clnt: RPC client to clone
204 * @inode: directory inode
205 * @name: lookup name
206 *
207 * Please call rpc_shutdown_client() when you are done with this rpc client.
208 */
209struct rpc_clnt *
210nfs4_negotiate_security(struct rpc_clnt *clnt, struct inode *inode,
211 struct qstr *name)
181{ 212{
182 struct page *page; 213 struct page *page;
183 struct nfs4_secinfo_flavors *flavors; 214 struct nfs4_secinfo_flavors *flavors;
184 rpc_authflavor_t flavor; 215 struct rpc_clnt *new;
185 int err; 216 int err;
186 217
187 page = alloc_page(GFP_KERNEL); 218 page = alloc_page(GFP_KERNEL);
188 if (!page) 219 if (!page)
189 return -ENOMEM; 220 return ERR_PTR(-ENOMEM);
221
190 flavors = page_address(page); 222 flavors = page_address(page);
191 223
192 err = nfs4_proc_secinfo(inode, name, flavors); 224 err = nfs4_proc_secinfo(inode, name, flavors);
193 if (err < 0) { 225 if (err < 0) {
194 flavor = err; 226 new = ERR_PTR(err);
195 goto out; 227 goto out;
196 } 228 }
197 229
198 flavor = nfs_find_best_sec(NFS_SERVER(inode), flavors); 230 new = nfs_find_best_sec(clnt, NFS_SERVER(inode), flavors);
199 231
200out: 232out:
201 put_page(page); 233 put_page(page);
202 return flavor; 234 return new;
203}
204
205/*
206 * Please call rpc_shutdown_client() when you are done with this client.
207 */
208struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode,
209 struct qstr *name)
210{
211 rpc_authflavor_t flavor;
212
213 flavor = nfs4_negotiate_security(inode, name);
214 if ((int)flavor < 0)
215 return ERR_PTR((int)flavor);
216
217 return rpc_clone_client_set_auth(clnt, flavor);
218} 235}
219 236
220static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, 237static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
@@ -397,11 +414,6 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
397 414
398 if (client->cl_auth->au_flavor != flavor) 415 if (client->cl_auth->au_flavor != flavor)
399 flavor = client->cl_auth->au_flavor; 416 flavor = client->cl_auth->au_flavor;
400 else {
401 rpc_authflavor_t new = nfs4_negotiate_security(dir, name);
402 if ((int)new >= 0)
403 flavor = new;
404 }
405 mnt = nfs_do_submount(dentry, fh, fattr, flavor); 417 mnt = nfs_do_submount(dentry, fh, fattr, flavor);
406out: 418out:
407 rpc_shutdown_client(client); 419 rpc_shutdown_client(client);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 285ad5334018..4bf3d97cc5a0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3247,7 +3247,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
3247 err = -EPERM; 3247 err = -EPERM;
3248 if (client != *clnt) 3248 if (client != *clnt)
3249 goto out; 3249 goto out;
3250 client = nfs4_create_sec_client(client, dir, name); 3250 client = nfs4_negotiate_security(client, dir, name);
3251 if (IS_ERR(client)) 3251 if (IS_ERR(client))
3252 return PTR_ERR(client); 3252 return PTR_ERR(client);
3253 3253
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3ee5af4e738e..98ff061ccaf3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -934,12 +934,14 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
934 934
935 if (nfs_have_delegated_attributes(inode)) 935 if (nfs_have_delegated_attributes(inode))
936 goto out; 936 goto out;
937 if (nfsi->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE)) 937 if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
938 return false; 938 return false;
939 smp_rmb(); 939 smp_rmb();
940 if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags)) 940 if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
941 return false; 941 return false;
942out: 942out:
943 if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
944 return false;
943 return PageUptodate(page) != 0; 945 return PageUptodate(page) != 0;
944} 946}
945 947
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6851b003f2a4..8f029db5d271 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -617,15 +617,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
617 617
618 switch (create->cr_type) { 618 switch (create->cr_type) {
619 case NF4LNK: 619 case NF4LNK:
620 /* ugh! we have to null-terminate the linktext, or
621 * vfs_symlink() will choke. it is always safe to
622 * null-terminate by brute force, since at worst we
623 * will overwrite the first byte of the create namelen
624 * in the XDR buffer, which has already been extracted
625 * during XDR decode.
626 */
627 create->cr_linkname[create->cr_linklen] = 0;
628
629 status = nfsd_symlink(rqstp, &cstate->current_fh, 620 status = nfsd_symlink(rqstp, &cstate->current_fh,
630 create->cr_name, create->cr_namelen, 621 create->cr_name, create->cr_namelen,
631 create->cr_linkname, create->cr_linklen, 622 create->cr_linkname, create->cr_linklen,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c0d45cec9958..2204e1fe5725 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -41,6 +41,7 @@
41#include <linux/ratelimit.h> 41#include <linux/ratelimit.h>
42#include <linux/sunrpc/svcauth_gss.h> 42#include <linux/sunrpc/svcauth_gss.h>
43#include <linux/sunrpc/addr.h> 43#include <linux/sunrpc/addr.h>
44#include <linux/hash.h>
44#include "xdr4.h" 45#include "xdr4.h"
45#include "xdr4cb.h" 46#include "xdr4cb.h"
46#include "vfs.h" 47#include "vfs.h"
@@ -364,6 +365,79 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
364 return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); 365 return openlockstateid(nfs4_alloc_stid(clp, stateid_slab));
365} 366}
366 367
368/*
369 * When we recall a delegation, we should be careful not to hand it
370 * out again straight away.
371 * To ensure this we keep a pair of bloom filters ('new' and 'old')
372 * in which the filehandles of recalled delegations are "stored".
373 * If a filehandle appear in either filter, a delegation is blocked.
374 * When a delegation is recalled, the filehandle is stored in the "new"
375 * filter.
376 * Every 30 seconds we swap the filters and clear the "new" one,
377 * unless both are empty of course.
378 *
379 * Each filter is 256 bits. We hash the filehandle to 32bit and use the
380 * low 3 bytes as hash-table indices.
381 *
382 * 'state_lock', which is always held when block_delegations() is called,
383 * is used to manage concurrent access. Testing does not need the lock
384 * except when swapping the two filters.
385 */
386static struct bloom_pair {
387 int entries, old_entries;
388 time_t swap_time;
389 int new; /* index into 'set' */
390 DECLARE_BITMAP(set[2], 256);
391} blocked_delegations;
392
393static int delegation_blocked(struct knfsd_fh *fh)
394{
395 u32 hash;
396 struct bloom_pair *bd = &blocked_delegations;
397
398 if (bd->entries == 0)
399 return 0;
400 if (seconds_since_boot() - bd->swap_time > 30) {
401 spin_lock(&state_lock);
402 if (seconds_since_boot() - bd->swap_time > 30) {
403 bd->entries -= bd->old_entries;
404 bd->old_entries = bd->entries;
405 memset(bd->set[bd->new], 0,
406 sizeof(bd->set[0]));
407 bd->new = 1-bd->new;
408 bd->swap_time = seconds_since_boot();
409 }
410 spin_unlock(&state_lock);
411 }
412 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
413 if (test_bit(hash&255, bd->set[0]) &&
414 test_bit((hash>>8)&255, bd->set[0]) &&
415 test_bit((hash>>16)&255, bd->set[0]))
416 return 1;
417
418 if (test_bit(hash&255, bd->set[1]) &&
419 test_bit((hash>>8)&255, bd->set[1]) &&
420 test_bit((hash>>16)&255, bd->set[1]))
421 return 1;
422
423 return 0;
424}
425
426static void block_delegations(struct knfsd_fh *fh)
427{
428 u32 hash;
429 struct bloom_pair *bd = &blocked_delegations;
430
431 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
432
433 __set_bit(hash&255, bd->set[bd->new]);
434 __set_bit((hash>>8)&255, bd->set[bd->new]);
435 __set_bit((hash>>16)&255, bd->set[bd->new]);
436 if (bd->entries == 0)
437 bd->swap_time = seconds_since_boot();
438 bd->entries += 1;
439}
440
367static struct nfs4_delegation * 441static struct nfs4_delegation *
368alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) 442alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh)
369{ 443{
@@ -372,6 +446,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
372 dprintk("NFSD alloc_init_deleg\n"); 446 dprintk("NFSD alloc_init_deleg\n");
373 if (num_delegations > max_delegations) 447 if (num_delegations > max_delegations)
374 return NULL; 448 return NULL;
449 if (delegation_blocked(&current_fh->fh_handle))
450 return NULL;
375 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); 451 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
376 if (dp == NULL) 452 if (dp == NULL)
377 return dp; 453 return dp;
@@ -2770,6 +2846,8 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
2770 /* Only place dl_time is set; protected by i_lock: */ 2846 /* Only place dl_time is set; protected by i_lock: */
2771 dp->dl_time = get_seconds(); 2847 dp->dl_time = get_seconds();
2772 2848
2849 block_delegations(&dp->dl_fh);
2850
2773 nfsd4_cb_recall(dp); 2851 nfsd4_cb_recall(dp);
2774} 2852}
2775 2853
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2d305a121f37..b56b1cc02718 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -600,7 +600,18 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
600 READ_BUF(4); 600 READ_BUF(4);
601 create->cr_linklen = be32_to_cpup(p++); 601 create->cr_linklen = be32_to_cpup(p++);
602 READ_BUF(create->cr_linklen); 602 READ_BUF(create->cr_linklen);
603 SAVEMEM(create->cr_linkname, create->cr_linklen); 603 /*
604 * The VFS will want a null-terminated string, and
605 * null-terminating in place isn't safe since this might
606 * end on a page boundary:
607 */
608 create->cr_linkname =
609 kmalloc(create->cr_linklen + 1, GFP_KERNEL);
610 if (!create->cr_linkname)
611 return nfserr_jukebox;
612 memcpy(create->cr_linkname, p, create->cr_linklen);
613 create->cr_linkname[create->cr_linklen] = '\0';
614 defer_free(argp, kfree, create->cr_linkname);
604 break; 615 break;
605 case NF4BLK: 616 case NF4BLK:
606 case NF4CHR: 617 case NF4CHR:
@@ -2630,7 +2641,7 @@ nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
2630{ 2641{
2631 __be32 *p; 2642 __be32 *p;
2632 2643
2633 p = xdr_reserve_space(xdr, 6); 2644 p = xdr_reserve_space(xdr, 20);
2634 if (!p) 2645 if (!p)
2635 return NULL; 2646 return NULL;
2636 *p++ = htonl(2); 2647 *p++ = htonl(2);
@@ -2687,6 +2698,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
2687 nfserr = nfserr_toosmall; 2698 nfserr = nfserr_toosmall;
2688 goto fail; 2699 goto fail;
2689 case nfserr_noent: 2700 case nfserr_noent:
2701 xdr_truncate_encode(xdr, start_offset);
2690 goto skip_entry; 2702 goto skip_entry;
2691 default: 2703 default:
2692 /* 2704 /*
@@ -3266,7 +3278,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
3266 3278
3267 wire_count = htonl(maxcount); 3279 wire_count = htonl(maxcount);
3268 write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); 3280 write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4);
3269 xdr_truncate_encode(xdr, length_offset + 4 + maxcount); 3281 xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4));
3270 if (maxcount & 3) 3282 if (maxcount & 3)
3271 write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, 3283 write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount,
3272 &zero, 4 - (maxcount&3)); 3284 &zero, 4 - (maxcount&3));
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index a106b3f2b22a..fae17c640df3 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -331,6 +331,7 @@ struct dlm_lock_resource
331 u16 state; 331 u16 state;
332 char lvb[DLM_LVB_LEN]; 332 char lvb[DLM_LVB_LEN];
333 unsigned int inflight_locks; 333 unsigned int inflight_locks;
334 unsigned int inflight_assert_workers;
334 unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; 335 unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
335}; 336};
336 337
@@ -910,6 +911,9 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
910void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, 911void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
911 struct dlm_lock_resource *res); 912 struct dlm_lock_resource *res);
912 913
914void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
915 struct dlm_lock_resource *res);
916
913void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 917void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
914void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 918void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
915void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 919void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 3087a21d32f9..82abf0cc9a12 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -581,6 +581,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
581 atomic_set(&res->asts_reserved, 0); 581 atomic_set(&res->asts_reserved, 0);
582 res->migration_pending = 0; 582 res->migration_pending = 0;
583 res->inflight_locks = 0; 583 res->inflight_locks = 0;
584 res->inflight_assert_workers = 0;
584 585
585 res->dlm = dlm; 586 res->dlm = dlm;
586 587
@@ -683,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
683 wake_up(&res->wq); 684 wake_up(&res->wq);
684} 685}
685 686
687void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
688 struct dlm_lock_resource *res)
689{
690 assert_spin_locked(&res->spinlock);
691 res->inflight_assert_workers++;
692 mlog(0, "%s:%.*s: inflight assert worker++: now %u\n",
693 dlm->name, res->lockname.len, res->lockname.name,
694 res->inflight_assert_workers);
695}
696
697static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
698 struct dlm_lock_resource *res)
699{
700 spin_lock(&res->spinlock);
701 __dlm_lockres_grab_inflight_worker(dlm, res);
702 spin_unlock(&res->spinlock);
703}
704
705static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
706 struct dlm_lock_resource *res)
707{
708 assert_spin_locked(&res->spinlock);
709 BUG_ON(res->inflight_assert_workers == 0);
710 res->inflight_assert_workers--;
711 mlog(0, "%s:%.*s: inflight assert worker--: now %u\n",
712 dlm->name, res->lockname.len, res->lockname.name,
713 res->inflight_assert_workers);
714}
715
716static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
717 struct dlm_lock_resource *res)
718{
719 spin_lock(&res->spinlock);
720 __dlm_lockres_drop_inflight_worker(dlm, res);
721 spin_unlock(&res->spinlock);
722}
723
686/* 724/*
687 * lookup a lock resource by name. 725 * lookup a lock resource by name.
688 * may already exist in the hashtable. 726 * may already exist in the hashtable.
@@ -1603,7 +1641,8 @@ send_response:
1603 mlog(ML_ERROR, "failed to dispatch assert master work\n"); 1641 mlog(ML_ERROR, "failed to dispatch assert master work\n");
1604 response = DLM_MASTER_RESP_ERROR; 1642 response = DLM_MASTER_RESP_ERROR;
1605 dlm_lockres_put(res); 1643 dlm_lockres_put(res);
1606 } 1644 } else
1645 dlm_lockres_grab_inflight_worker(dlm, res);
1607 } else { 1646 } else {
1608 if (res) 1647 if (res)
1609 dlm_lockres_put(res); 1648 dlm_lockres_put(res);
@@ -2118,6 +2157,8 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
2118 dlm_lockres_release_ast(dlm, res); 2157 dlm_lockres_release_ast(dlm, res);
2119 2158
2120put: 2159put:
2160 dlm_lockres_drop_inflight_worker(dlm, res);
2161
2121 dlm_lockres_put(res); 2162 dlm_lockres_put(res);
2122 2163
2123 mlog(0, "finished with dlm_assert_master_worker\n"); 2164 mlog(0, "finished with dlm_assert_master_worker\n");
@@ -3088,11 +3129,15 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
3088 /* remove it so that only one mle will be found */ 3129 /* remove it so that only one mle will be found */
3089 __dlm_unlink_mle(dlm, tmp); 3130 __dlm_unlink_mle(dlm, tmp);
3090 __dlm_mle_detach_hb_events(dlm, tmp); 3131 __dlm_mle_detach_hb_events(dlm, tmp);
3091 ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; 3132 if (tmp->type == DLM_MLE_MASTER) {
3092 mlog(0, "%s:%.*s: master=%u, newmaster=%u, " 3133 ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
3093 "telling master to get ref for cleared out mle " 3134 mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
3094 "during migration\n", dlm->name, namelen, name, 3135 "telling master to get ref "
3095 master, new_master); 3136 "for cleared out mle during "
3137 "migration\n", dlm->name,
3138 namelen, name, master,
3139 new_master);
3140 }
3096 } 3141 }
3097 spin_unlock(&tmp->spinlock); 3142 spin_unlock(&tmp->spinlock);
3098 } 3143 }
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 5de019437ea5..45067faf5695 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1708,7 +1708,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
1708 mlog_errno(-ENOMEM); 1708 mlog_errno(-ENOMEM);
1709 /* retry!? */ 1709 /* retry!? */
1710 BUG(); 1710 BUG();
1711 } 1711 } else
1712 __dlm_lockres_grab_inflight_worker(dlm, res);
1712 } else /* put.. incase we are not the master */ 1713 } else /* put.. incase we are not the master */
1713 dlm_lockres_put(res); 1714 dlm_lockres_put(res);
1714 spin_unlock(&res->spinlock); 1715 spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 9db869de829d..69aac6f088ad 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -259,12 +259,15 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
259 * refs on it. */ 259 * refs on it. */
260 unused = __dlm_lockres_unused(lockres); 260 unused = __dlm_lockres_unused(lockres);
261 if (!unused || 261 if (!unused ||
262 (lockres->state & DLM_LOCK_RES_MIGRATING)) { 262 (lockres->state & DLM_LOCK_RES_MIGRATING) ||
263 (lockres->inflight_assert_workers != 0)) {
263 mlog(0, "%s: res %.*s is in use or being remastered, " 264 mlog(0, "%s: res %.*s is in use or being remastered, "
264 "used %d, state %d\n", dlm->name, 265 "used %d, state %d, assert master workers %u\n",
265 lockres->lockname.len, lockres->lockname.name, 266 dlm->name, lockres->lockname.len,
266 !unused, lockres->state); 267 lockres->lockname.name,
267 list_move_tail(&dlm->purge_list, &lockres->purge); 268 !unused, lockres->state,
269 lockres->inflight_assert_workers);
270 list_move_tail(&lockres->purge, &dlm->purge_list);
268 spin_unlock(&lockres->spinlock); 271 spin_unlock(&lockres->spinlock);
269 continue; 272 continue;
270 } 273 }
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 5698b52cf5c9..2e3c9dbab68c 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -191,7 +191,9 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
191 DLM_UNLOCK_CLEAR_CONVERT_TYPE); 191 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
192 } else if (status == DLM_RECOVERING || 192 } else if (status == DLM_RECOVERING ||
193 status == DLM_MIGRATING || 193 status == DLM_MIGRATING ||
194 status == DLM_FORWARD) { 194 status == DLM_FORWARD ||
195 status == DLM_NOLOCKMGR
196 ) {
195 /* must clear the actions because this unlock 197 /* must clear the actions because this unlock
196 * is about to be retried. cannot free or do 198 * is about to be retried. cannot free or do
197 * any list manipulation. */ 199 * any list manipulation. */
@@ -200,7 +202,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
200 res->lockname.name, 202 res->lockname.name,
201 status==DLM_RECOVERING?"recovering": 203 status==DLM_RECOVERING?"recovering":
202 (status==DLM_MIGRATING?"migrating": 204 (status==DLM_MIGRATING?"migrating":
203 "forward")); 205 (status == DLM_FORWARD ? "forward" :
206 "nolockmanager")));
204 actions = 0; 207 actions = 0;
205 } 208 }
206 if (flags & LKM_CANCEL) 209 if (flags & LKM_CANCEL)
@@ -364,7 +367,10 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
364 * updated state to the recovery master. this thread 367 * updated state to the recovery master. this thread
365 * just needs to finish out the operation and call 368 * just needs to finish out the operation and call
366 * the unlockast. */ 369 * the unlockast. */
367 ret = DLM_NORMAL; 370 if (dlm_is_node_dead(dlm, owner))
371 ret = DLM_NORMAL;
372 else
373 ret = DLM_NOLOCKMGR;
368 } else { 374 } else {
369 /* something bad. this will BUG in ocfs2 */ 375 /* something bad. this will BUG in ocfs2 */
370 ret = dlm_err_to_dlm_status(tmpret); 376 ret = dlm_err_to_dlm_status(tmpret);
@@ -638,7 +644,9 @@ retry:
638 644
639 if (status == DLM_RECOVERING || 645 if (status == DLM_RECOVERING ||
640 status == DLM_MIGRATING || 646 status == DLM_MIGRATING ||
641 status == DLM_FORWARD) { 647 status == DLM_FORWARD ||
648 status == DLM_NOLOCKMGR) {
649
642 /* We want to go away for a tiny bit to allow recovery 650 /* We want to go away for a tiny bit to allow recovery
643 * / migration to complete on this resource. I don't 651 * / migration to complete on this resource. I don't
644 * know of any wait queue we could sleep on as this 652 * know of any wait queue we could sleep on as this
@@ -650,7 +658,7 @@ retry:
650 msleep(50); 658 msleep(50);
651 659
652 mlog(0, "retrying unlock due to pending recovery/" 660 mlog(0, "retrying unlock due to pending recovery/"
653 "migration/in-progress\n"); 661 "migration/in-progress/reconnect\n");
654 goto retry; 662 goto retry;
655 } 663 }
656 664
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2060fc398445..8add6f1030d7 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -205,6 +205,21 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
205 return inode; 205 return inode;
206} 206}
207 207
208static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb,
209 struct dentry *dentry, struct inode *inode)
210{
211 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
212
213 ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
214 ocfs2_lock_res_free(&dl->dl_lockres);
215 BUG_ON(dl->dl_count != 1);
216 spin_lock(&dentry_attach_lock);
217 dentry->d_fsdata = NULL;
218 spin_unlock(&dentry_attach_lock);
219 kfree(dl);
220 iput(inode);
221}
222
208static int ocfs2_mknod(struct inode *dir, 223static int ocfs2_mknod(struct inode *dir,
209 struct dentry *dentry, 224 struct dentry *dentry,
210 umode_t mode, 225 umode_t mode,
@@ -231,6 +246,7 @@ static int ocfs2_mknod(struct inode *dir,
231 sigset_t oldset; 246 sigset_t oldset;
232 int did_block_signals = 0; 247 int did_block_signals = 0;
233 struct posix_acl *default_acl = NULL, *acl = NULL; 248 struct posix_acl *default_acl = NULL, *acl = NULL;
249 struct ocfs2_dentry_lock *dl = NULL;
234 250
235 trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name, 251 trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
236 (unsigned long long)OCFS2_I(dir)->ip_blkno, 252 (unsigned long long)OCFS2_I(dir)->ip_blkno,
@@ -423,6 +439,8 @@ static int ocfs2_mknod(struct inode *dir,
423 goto leave; 439 goto leave;
424 } 440 }
425 441
442 dl = dentry->d_fsdata;
443
426 status = ocfs2_add_entry(handle, dentry, inode, 444 status = ocfs2_add_entry(handle, dentry, inode,
427 OCFS2_I(inode)->ip_blkno, parent_fe_bh, 445 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
428 &lookup); 446 &lookup);
@@ -469,6 +487,9 @@ leave:
469 * ocfs2_delete_inode will mutex_lock again. 487 * ocfs2_delete_inode will mutex_lock again.
470 */ 488 */
471 if ((status < 0) && inode) { 489 if ((status < 0) && inode) {
490 if (dl)
491 ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
492
472 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; 493 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
473 clear_nlink(inode); 494 clear_nlink(inode);
474 iput(inode); 495 iput(inode);
@@ -991,6 +1012,65 @@ leave:
991 return status; 1012 return status;
992} 1013}
993 1014
1015static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
1016 u64 src_inode_no, u64 dest_inode_no)
1017{
1018 int ret = 0, i = 0;
1019 u64 parent_inode_no = 0;
1020 u64 child_inode_no = src_inode_no;
1021 struct inode *child_inode;
1022
1023#define MAX_LOOKUP_TIMES 32
1024 while (1) {
1025 child_inode = ocfs2_iget(osb, child_inode_no, 0, 0);
1026 if (IS_ERR(child_inode)) {
1027 ret = PTR_ERR(child_inode);
1028 break;
1029 }
1030
1031 ret = ocfs2_inode_lock(child_inode, NULL, 0);
1032 if (ret < 0) {
1033 iput(child_inode);
1034 if (ret != -ENOENT)
1035 mlog_errno(ret);
1036 break;
1037 }
1038
1039 ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2,
1040 &parent_inode_no);
1041 ocfs2_inode_unlock(child_inode, 0);
1042 iput(child_inode);
1043 if (ret < 0) {
1044 ret = -ENOENT;
1045 break;
1046 }
1047
1048 if (parent_inode_no == dest_inode_no) {
1049 ret = 1;
1050 break;
1051 }
1052
1053 if (parent_inode_no == osb->root_inode->i_ino) {
1054 ret = 0;
1055 break;
1056 }
1057
1058 child_inode_no = parent_inode_no;
1059
1060 if (++i >= MAX_LOOKUP_TIMES) {
1061 mlog(ML_NOTICE, "max lookup times reached, filesystem "
1062 "may have nested directories, "
1063 "src inode: %llu, dest inode: %llu.\n",
1064 (unsigned long long)src_inode_no,
1065 (unsigned long long)dest_inode_no);
1066 ret = 0;
1067 break;
1068 }
1069 }
1070
1071 return ret;
1072}
1073
994/* 1074/*
995 * The only place this should be used is rename! 1075 * The only place this should be used is rename!
996 * if they have the same id, then the 1st one is the only one locked. 1076 * if they have the same id, then the 1st one is the only one locked.
@@ -1002,6 +1082,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
1002 struct inode *inode2) 1082 struct inode *inode2)
1003{ 1083{
1004 int status; 1084 int status;
1085 int inode1_is_ancestor, inode2_is_ancestor;
1005 struct ocfs2_inode_info *oi1 = OCFS2_I(inode1); 1086 struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
1006 struct ocfs2_inode_info *oi2 = OCFS2_I(inode2); 1087 struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
1007 struct buffer_head **tmpbh; 1088 struct buffer_head **tmpbh;
@@ -1015,9 +1096,26 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
1015 if (*bh2) 1096 if (*bh2)
1016 *bh2 = NULL; 1097 *bh2 = NULL;
1017 1098
1018 /* we always want to lock the one with the lower lockid first. */ 1099 /* we always want to lock the one with the lower lockid first.
1100 * and if they are nested, we lock ancestor first */
1019 if (oi1->ip_blkno != oi2->ip_blkno) { 1101 if (oi1->ip_blkno != oi2->ip_blkno) {
1020 if (oi1->ip_blkno < oi2->ip_blkno) { 1102 inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno,
1103 oi1->ip_blkno);
1104 if (inode1_is_ancestor < 0) {
1105 status = inode1_is_ancestor;
1106 goto bail;
1107 }
1108
1109 inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno,
1110 oi2->ip_blkno);
1111 if (inode2_is_ancestor < 0) {
1112 status = inode2_is_ancestor;
1113 goto bail;
1114 }
1115
1116 if ((inode1_is_ancestor == 1) ||
1117 (oi1->ip_blkno < oi2->ip_blkno &&
1118 inode2_is_ancestor == 0)) {
1021 /* switch id1 and id2 around */ 1119 /* switch id1 and id2 around */
1022 tmpbh = bh2; 1120 tmpbh = bh2;
1023 bh2 = bh1; 1121 bh2 = bh1;
@@ -1098,6 +1196,7 @@ static int ocfs2_rename(struct inode *old_dir,
1098 struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, }; 1196 struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, };
1099 struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; 1197 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
1100 struct ocfs2_dir_lookup_result target_insert = { NULL, }; 1198 struct ocfs2_dir_lookup_result target_insert = { NULL, };
1199 bool should_add_orphan = false;
1101 1200
1102 /* At some point it might be nice to break this function up a 1201 /* At some point it might be nice to break this function up a
1103 * bit. */ 1202 * bit. */
@@ -1134,6 +1233,21 @@ static int ocfs2_rename(struct inode *old_dir,
1134 goto bail; 1233 goto bail;
1135 } 1234 }
1136 rename_lock = 1; 1235 rename_lock = 1;
1236
1237 /* here we cannot guarantee the inodes haven't just been
1238 * changed, so check if they are nested again */
1239 status = ocfs2_check_if_ancestor(osb, new_dir->i_ino,
1240 old_inode->i_ino);
1241 if (status < 0) {
1242 mlog_errno(status);
1243 goto bail;
1244 } else if (status == 1) {
1245 status = -EPERM;
1246 trace_ocfs2_rename_not_permitted(
1247 (unsigned long long)old_inode->i_ino,
1248 (unsigned long long)new_dir->i_ino);
1249 goto bail;
1250 }
1137 } 1251 }
1138 1252
1139 /* if old and new are the same, this'll just do one lock. */ 1253 /* if old and new are the same, this'll just do one lock. */
@@ -1304,6 +1418,7 @@ static int ocfs2_rename(struct inode *old_dir,
1304 mlog_errno(status); 1418 mlog_errno(status);
1305 goto bail; 1419 goto bail;
1306 } 1420 }
1421 should_add_orphan = true;
1307 } 1422 }
1308 } else { 1423 } else {
1309 BUG_ON(new_dentry->d_parent->d_inode != new_dir); 1424 BUG_ON(new_dentry->d_parent->d_inode != new_dir);
@@ -1348,17 +1463,6 @@ static int ocfs2_rename(struct inode *old_dir,
1348 goto bail; 1463 goto bail;
1349 } 1464 }
1350 1465
1351 if (S_ISDIR(new_inode->i_mode) ||
1352 (ocfs2_read_links_count(newfe) == 1)) {
1353 status = ocfs2_orphan_add(osb, handle, new_inode,
1354 newfe_bh, orphan_name,
1355 &orphan_insert, orphan_dir);
1356 if (status < 0) {
1357 mlog_errno(status);
1358 goto bail;
1359 }
1360 }
1361
1362 /* change the dirent to point to the correct inode */ 1466 /* change the dirent to point to the correct inode */
1363 status = ocfs2_update_entry(new_dir, handle, &target_lookup_res, 1467 status = ocfs2_update_entry(new_dir, handle, &target_lookup_res,
1364 old_inode); 1468 old_inode);
@@ -1373,6 +1477,15 @@ static int ocfs2_rename(struct inode *old_dir,
1373 else 1477 else
1374 ocfs2_add_links_count(newfe, -1); 1478 ocfs2_add_links_count(newfe, -1);
1375 ocfs2_journal_dirty(handle, newfe_bh); 1479 ocfs2_journal_dirty(handle, newfe_bh);
1480 if (should_add_orphan) {
1481 status = ocfs2_orphan_add(osb, handle, new_inode,
1482 newfe_bh, orphan_name,
1483 &orphan_insert, orphan_dir);
1484 if (status < 0) {
1485 mlog_errno(status);
1486 goto bail;
1487 }
1488 }
1376 } else { 1489 } else {
1377 /* if the name was not found in new_dir, add it now */ 1490 /* if the name was not found in new_dir, add it now */
1378 status = ocfs2_add_entry(handle, new_dentry, old_inode, 1491 status = ocfs2_add_entry(handle, new_dentry, old_inode,
@@ -1642,6 +1755,7 @@ static int ocfs2_symlink(struct inode *dir,
1642 struct ocfs2_dir_lookup_result lookup = { NULL, }; 1755 struct ocfs2_dir_lookup_result lookup = { NULL, };
1643 sigset_t oldset; 1756 sigset_t oldset;
1644 int did_block_signals = 0; 1757 int did_block_signals = 0;
1758 struct ocfs2_dentry_lock *dl = NULL;
1645 1759
1646 trace_ocfs2_symlink_begin(dir, dentry, symname, 1760 trace_ocfs2_symlink_begin(dir, dentry, symname,
1647 dentry->d_name.len, dentry->d_name.name); 1761 dentry->d_name.len, dentry->d_name.name);
@@ -1830,6 +1944,8 @@ static int ocfs2_symlink(struct inode *dir,
1830 goto bail; 1944 goto bail;
1831 } 1945 }
1832 1946
1947 dl = dentry->d_fsdata;
1948
1833 status = ocfs2_add_entry(handle, dentry, inode, 1949 status = ocfs2_add_entry(handle, dentry, inode,
1834 le64_to_cpu(fe->i_blkno), parent_fe_bh, 1950 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1835 &lookup); 1951 &lookup);
@@ -1864,6 +1980,9 @@ bail:
1864 if (xattr_ac) 1980 if (xattr_ac)
1865 ocfs2_free_alloc_context(xattr_ac); 1981 ocfs2_free_alloc_context(xattr_ac);
1866 if ((status < 0) && inode) { 1982 if ((status < 0) && inode) {
1983 if (dl)
1984 ocfs2_cleanup_add_entry_failure(osb, dentry, inode);
1985
1867 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; 1986 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
1868 clear_nlink(inode); 1987 clear_nlink(inode);
1869 iput(inode); 1988 iput(inode);
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index 1b60c62aa9d6..6cb019b7c6a8 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -2292,6 +2292,8 @@ TRACE_EVENT(ocfs2_rename,
2292 __entry->new_len, __get_str(new_name)) 2292 __entry->new_len, __get_str(new_name))
2293); 2293);
2294 2294
2295DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_rename_not_permitted);
2296
2295TRACE_EVENT(ocfs2_rename_target_exists, 2297TRACE_EVENT(ocfs2_rename_target_exists,
2296 TP_PROTO(int new_len, const char *new_name), 2298 TP_PROTO(int new_len, const char *new_name),
2297 TP_ARGS(new_len, new_name), 2299 TP_ARGS(new_len, new_name),
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 714e53b9cc66..636aab69ead5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4288,9 +4288,16 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4288 goto out; 4288 goto out;
4289 } 4289 }
4290 4290
4291 error = ocfs2_rw_lock(inode, 1);
4292 if (error) {
4293 mlog_errno(error);
4294 goto out;
4295 }
4296
4291 error = ocfs2_inode_lock(inode, &old_bh, 1); 4297 error = ocfs2_inode_lock(inode, &old_bh, 1);
4292 if (error) { 4298 if (error) {
4293 mlog_errno(error); 4299 mlog_errno(error);
4300 ocfs2_rw_unlock(inode, 1);
4294 goto out; 4301 goto out;
4295 } 4302 }
4296 4303
@@ -4302,6 +4309,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4302 up_write(&OCFS2_I(inode)->ip_xattr_sem); 4309 up_write(&OCFS2_I(inode)->ip_xattr_sem);
4303 4310
4304 ocfs2_inode_unlock(inode, 1); 4311 ocfs2_inode_unlock(inode, 1);
4312 ocfs2_rw_unlock(inode, 1);
4305 brelse(old_bh); 4313 brelse(old_bh);
4306 4314
4307 if (error) { 4315 if (error) {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index c7a89cea5c5d..ddb662b32447 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1925,15 +1925,11 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1925 1925
1926 ocfs2_shutdown_local_alloc(osb); 1926 ocfs2_shutdown_local_alloc(osb);
1927 1927
1928 ocfs2_truncate_log_shutdown(osb);
1929
1928 /* This will disable recovery and flush any recovery work. */ 1930 /* This will disable recovery and flush any recovery work. */
1929 ocfs2_recovery_exit(osb); 1931 ocfs2_recovery_exit(osb);
1930 1932
1931 /*
1932 * During dismount, when it recovers another node it will call
1933 * ocfs2_recover_orphans and queue delayed work osb_truncate_log_wq.
1934 */
1935 ocfs2_truncate_log_shutdown(osb);
1936
1937 ocfs2_journal_shutdown(osb); 1933 ocfs2_journal_shutdown(osb);
1938 1934
1939 ocfs2_sync_blockdev(sb); 1935 ocfs2_sync_blockdev(sb);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 9d231e9e5f0e..bf2d03f8fd3e 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -184,29 +184,11 @@ static int show_stat(struct seq_file *p, void *v)
184 184
185static int stat_open(struct inode *inode, struct file *file) 185static int stat_open(struct inode *inode, struct file *file)
186{ 186{
187 size_t size = 1024 + 128 * num_possible_cpus(); 187 size_t size = 1024 + 128 * num_online_cpus();
188 char *buf;
189 struct seq_file *m;
190 int res;
191 188
192 /* minimum size to display an interrupt count : 2 bytes */ 189 /* minimum size to display an interrupt count : 2 bytes */
193 size += 2 * nr_irqs; 190 size += 2 * nr_irqs;
194 191 return single_open_size(file, show_stat, NULL, size);
195 /* don't ask for more than the kmalloc() max size */
196 if (size > KMALLOC_MAX_SIZE)
197 size = KMALLOC_MAX_SIZE;
198 buf = kmalloc(size, GFP_KERNEL);
199 if (!buf)
200 return -ENOMEM;
201
202 res = single_open(file, show_stat, NULL);
203 if (!res) {
204 m = file->private_data;
205 m->buf = buf;
206 m->size = ksize(buf);
207 } else
208 kfree(buf);
209 return res;
210} 192}
211 193
212static const struct file_operations proc_stat_operations = { 194static const struct file_operations proc_stat_operations = {
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 9cd5f63715c0..7f30bdc57d13 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -702,6 +702,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
702 struct dquot *dquot; 702 struct dquot *dquot;
703 unsigned long freed = 0; 703 unsigned long freed = 0;
704 704
705 spin_lock(&dq_list_lock);
705 head = free_dquots.prev; 706 head = free_dquots.prev;
706 while (head != &free_dquots && sc->nr_to_scan) { 707 while (head != &free_dquots && sc->nr_to_scan) {
707 dquot = list_entry(head, struct dquot, dq_free); 708 dquot = list_entry(head, struct dquot, dq_free);
@@ -713,6 +714,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
713 freed++; 714 freed++;
714 head = free_dquots.prev; 715 head = free_dquots.prev;
715 } 716 }
717 spin_unlock(&dq_list_lock);
716 return freed; 718 return freed;
717} 719}
718 720
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 1d641bb108d2..3857b720cb1b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -8,8 +8,10 @@
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/export.h> 9#include <linux/export.h>
10#include <linux/seq_file.h> 10#include <linux/seq_file.h>
11#include <linux/vmalloc.h>
11#include <linux/slab.h> 12#include <linux/slab.h>
12#include <linux/cred.h> 13#include <linux/cred.h>
14#include <linux/mm.h>
13 15
14#include <asm/uaccess.h> 16#include <asm/uaccess.h>
15#include <asm/page.h> 17#include <asm/page.h>
@@ -30,6 +32,16 @@ static void seq_set_overflow(struct seq_file *m)
30 m->count = m->size; 32 m->count = m->size;
31} 33}
32 34
35static void *seq_buf_alloc(unsigned long size)
36{
37 void *buf;
38
39 buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
40 if (!buf && size > PAGE_SIZE)
41 buf = vmalloc(size);
42 return buf;
43}
44
33/** 45/**
34 * seq_open - initialize sequential file 46 * seq_open - initialize sequential file
35 * @file: file we initialize 47 * @file: file we initialize
@@ -96,7 +108,7 @@ static int traverse(struct seq_file *m, loff_t offset)
96 return 0; 108 return 0;
97 } 109 }
98 if (!m->buf) { 110 if (!m->buf) {
99 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); 111 m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
100 if (!m->buf) 112 if (!m->buf)
101 return -ENOMEM; 113 return -ENOMEM;
102 } 114 }
@@ -135,9 +147,9 @@ static int traverse(struct seq_file *m, loff_t offset)
135 147
136Eoverflow: 148Eoverflow:
137 m->op->stop(m, p); 149 m->op->stop(m, p);
138 kfree(m->buf); 150 kvfree(m->buf);
139 m->count = 0; 151 m->count = 0;
140 m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); 152 m->buf = seq_buf_alloc(m->size <<= 1);
141 return !m->buf ? -ENOMEM : -EAGAIN; 153 return !m->buf ? -ENOMEM : -EAGAIN;
142} 154}
143 155
@@ -192,7 +204,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
192 204
193 /* grab buffer if we didn't have one */ 205 /* grab buffer if we didn't have one */
194 if (!m->buf) { 206 if (!m->buf) {
195 m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); 207 m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
196 if (!m->buf) 208 if (!m->buf)
197 goto Enomem; 209 goto Enomem;
198 } 210 }
@@ -232,9 +244,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
232 if (m->count < m->size) 244 if (m->count < m->size)
233 goto Fill; 245 goto Fill;
234 m->op->stop(m, p); 246 m->op->stop(m, p);
235 kfree(m->buf); 247 kvfree(m->buf);
236 m->count = 0; 248 m->count = 0;
237 m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); 249 m->buf = seq_buf_alloc(m->size <<= 1);
238 if (!m->buf) 250 if (!m->buf)
239 goto Enomem; 251 goto Enomem;
240 m->version = 0; 252 m->version = 0;
@@ -350,7 +362,7 @@ EXPORT_SYMBOL(seq_lseek);
350int seq_release(struct inode *inode, struct file *file) 362int seq_release(struct inode *inode, struct file *file)
351{ 363{
352 struct seq_file *m = file->private_data; 364 struct seq_file *m = file->private_data;
353 kfree(m->buf); 365 kvfree(m->buf);
354 kfree(m); 366 kfree(m);
355 return 0; 367 return 0;
356} 368}
@@ -605,13 +617,13 @@ EXPORT_SYMBOL(single_open);
605int single_open_size(struct file *file, int (*show)(struct seq_file *, void *), 617int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
606 void *data, size_t size) 618 void *data, size_t size)
607{ 619{
608 char *buf = kmalloc(size, GFP_KERNEL); 620 char *buf = seq_buf_alloc(size);
609 int ret; 621 int ret;
610 if (!buf) 622 if (!buf)
611 return -ENOMEM; 623 return -ENOMEM;
612 ret = single_open(file, show, data); 624 ret = single_open(file, show, data);
613 if (ret) { 625 if (ret) {
614 kfree(buf); 626 kvfree(buf);
615 return ret; 627 return ret;
616 } 628 }
617 ((struct seq_file *)file->private_data)->buf = buf; 629 ((struct seq_file *)file->private_data)->buf = buf;