aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c13
-rw-r--r--fs/btrfs/async-thread.c44
-rw-r--r--fs/btrfs/async-thread.h28
-rw-r--r--fs/btrfs/delayed-inode.c4
-rw-r--r--fs/btrfs/disk-io.c56
-rw-r--r--fs/btrfs/extent-tree.c23
-rw-r--r--fs/btrfs/extent_io.c5
-rw-r--r--fs/btrfs/file.c17
-rw-r--r--fs/btrfs/inode.c109
-rw-r--r--fs/btrfs/ioctl.c36
-rw-r--r--fs/btrfs/ordered-data.c1
-rw-r--r--fs/btrfs/qgroup.c3
-rw-r--r--fs/btrfs/raid56.c9
-rw-r--r--fs/btrfs/reada.c3
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/tree-log.c17
-rw-r--r--fs/btrfs/volumes.c65
-rw-r--r--fs/ext4/ext4.h18
-rw-r--r--fs/ext4/extents.c88
-rw-r--r--fs/ext4/inode.c44
-rw-r--r--fs/ext4/mballoc.c5
-rw-r--r--fs/ext4/namei.c56
-rw-r--r--fs/ext4/super.c5
-rw-r--r--fs/f2fs/Kconfig4
-rw-r--r--fs/f2fs/checkpoint.c80
-rw-r--r--fs/f2fs/data.c19
-rw-r--r--fs/f2fs/debug.c4
-rw-r--r--fs/f2fs/dir.c6
-rw-r--r--fs/f2fs/f2fs.h26
-rw-r--r--fs/f2fs/file.c60
-rw-r--r--fs/f2fs/gc.c8
-rw-r--r--fs/f2fs/gc.h2
-rw-r--r--fs/f2fs/hash.c7
-rw-r--r--fs/f2fs/inline.c38
-rw-r--r--fs/f2fs/namei.c23
-rw-r--r--fs/f2fs/node.c80
-rw-r--r--fs/f2fs/recovery.c30
-rw-r--r--fs/f2fs/segment.c53
-rw-r--r--fs/f2fs/segment.h2
-rw-r--r--fs/f2fs/super.c32
-rw-r--r--fs/f2fs/xattr.c2
-rw-r--r--fs/jbd2/commit.c21
-rw-r--r--fs/jbd2/journal.c56
-rw-r--r--fs/jbd2/recovery.c33
-rw-r--r--fs/jbd2/revoke.c6
-rw-r--r--fs/locks.c2
-rw-r--r--fs/nfs/nfs3acl.c5
-rw-r--r--fs/nfs/nfs4proc.c26
-rw-r--r--fs/ocfs2/cluster/quorum.c13
-rw-r--r--fs/ocfs2/cluster/tcp.c45
-rw-r--r--fs/ocfs2/cluster/tcp.h1
-rw-r--r--fs/ocfs2/ioctl.c129
-rw-r--r--fs/sync.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c18
-rw-r--r--fs/xfs/xfs_aops.c61
-rw-r--r--fs/xfs/xfs_bmap_util.c20
-rw-r--r--fs/xfs/xfs_file.c27
58 files changed, 1011 insertions, 606 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 97bc62cbe2da..733750096b71 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -793,6 +793,8 @@ void exit_aio(struct mm_struct *mm)
793 793
794 for (i = 0; i < table->nr; ++i) { 794 for (i = 0; i < table->nr; ++i) {
795 struct kioctx *ctx = table->table[i]; 795 struct kioctx *ctx = table->table[i];
796 struct completion requests_done =
797 COMPLETION_INITIALIZER_ONSTACK(requests_done);
796 798
797 if (!ctx) 799 if (!ctx)
798 continue; 800 continue;
@@ -804,7 +806,10 @@ void exit_aio(struct mm_struct *mm)
804 * that it needs to unmap the area, just set it to 0. 806 * that it needs to unmap the area, just set it to 0.
805 */ 807 */
806 ctx->mmap_size = 0; 808 ctx->mmap_size = 0;
807 kill_ioctx(mm, ctx, NULL); 809 kill_ioctx(mm, ctx, &requests_done);
810
811 /* Wait until all IO for the context are done. */
812 wait_for_completion(&requests_done);
808 } 813 }
809 814
810 RCU_INIT_POINTER(mm->ioctx_table, NULL); 815 RCU_INIT_POINTER(mm->ioctx_table, NULL);
@@ -1111,6 +1116,12 @@ static long aio_read_events_ring(struct kioctx *ctx,
1111 tail = ring->tail; 1116 tail = ring->tail;
1112 kunmap_atomic(ring); 1117 kunmap_atomic(ring);
1113 1118
1119 /*
1120 * Ensure that once we've read the current tail pointer, that
1121 * we also see the events that were stored up to the tail.
1122 */
1123 smp_rmb();
1124
1114 pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); 1125 pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events);
1115 1126
1116 if (head == tail) 1127 if (head == tail)
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 5a201d81049c..fbd76ded9a34 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -22,7 +22,6 @@
22#include <linux/list.h> 22#include <linux/list.h>
23#include <linux/spinlock.h> 23#include <linux/spinlock.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <linux/workqueue.h>
26#include "async-thread.h" 25#include "async-thread.h"
27#include "ctree.h" 26#include "ctree.h"
28 27
@@ -55,8 +54,39 @@ struct btrfs_workqueue {
55 struct __btrfs_workqueue *high; 54 struct __btrfs_workqueue *high;
56}; 55};
57 56
58static inline struct __btrfs_workqueue 57static void normal_work_helper(struct btrfs_work *work);
59*__btrfs_alloc_workqueue(const char *name, int flags, int max_active, 58
59#define BTRFS_WORK_HELPER(name) \
60void btrfs_##name(struct work_struct *arg) \
61{ \
62 struct btrfs_work *work = container_of(arg, struct btrfs_work, \
63 normal_work); \
64 normal_work_helper(work); \
65}
66
67BTRFS_WORK_HELPER(worker_helper);
68BTRFS_WORK_HELPER(delalloc_helper);
69BTRFS_WORK_HELPER(flush_delalloc_helper);
70BTRFS_WORK_HELPER(cache_helper);
71BTRFS_WORK_HELPER(submit_helper);
72BTRFS_WORK_HELPER(fixup_helper);
73BTRFS_WORK_HELPER(endio_helper);
74BTRFS_WORK_HELPER(endio_meta_helper);
75BTRFS_WORK_HELPER(endio_meta_write_helper);
76BTRFS_WORK_HELPER(endio_raid56_helper);
77BTRFS_WORK_HELPER(rmw_helper);
78BTRFS_WORK_HELPER(endio_write_helper);
79BTRFS_WORK_HELPER(freespace_write_helper);
80BTRFS_WORK_HELPER(delayed_meta_helper);
81BTRFS_WORK_HELPER(readahead_helper);
82BTRFS_WORK_HELPER(qgroup_rescan_helper);
83BTRFS_WORK_HELPER(extent_refs_helper);
84BTRFS_WORK_HELPER(scrub_helper);
85BTRFS_WORK_HELPER(scrubwrc_helper);
86BTRFS_WORK_HELPER(scrubnc_helper);
87
88static struct __btrfs_workqueue *
89__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
60 int thresh) 90 int thresh)
61{ 91{
62 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); 92 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -232,13 +262,11 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
232 spin_unlock_irqrestore(lock, flags); 262 spin_unlock_irqrestore(lock, flags);
233} 263}
234 264
235static void normal_work_helper(struct work_struct *arg) 265static void normal_work_helper(struct btrfs_work *work)
236{ 266{
237 struct btrfs_work *work;
238 struct __btrfs_workqueue *wq; 267 struct __btrfs_workqueue *wq;
239 int need_order = 0; 268 int need_order = 0;
240 269
241 work = container_of(arg, struct btrfs_work, normal_work);
242 /* 270 /*
243 * We should not touch things inside work in the following cases: 271 * We should not touch things inside work in the following cases:
244 * 1) after work->func() if it has no ordered_free 272 * 1) after work->func() if it has no ordered_free
@@ -262,7 +290,7 @@ static void normal_work_helper(struct work_struct *arg)
262 trace_btrfs_all_work_done(work); 290 trace_btrfs_all_work_done(work);
263} 291}
264 292
265void btrfs_init_work(struct btrfs_work *work, 293void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
266 btrfs_func_t func, 294 btrfs_func_t func,
267 btrfs_func_t ordered_func, 295 btrfs_func_t ordered_func,
268 btrfs_func_t ordered_free) 296 btrfs_func_t ordered_free)
@@ -270,7 +298,7 @@ void btrfs_init_work(struct btrfs_work *work,
270 work->func = func; 298 work->func = func;
271 work->ordered_func = ordered_func; 299 work->ordered_func = ordered_func;
272 work->ordered_free = ordered_free; 300 work->ordered_free = ordered_free;
273 INIT_WORK(&work->normal_work, normal_work_helper); 301 INIT_WORK(&work->normal_work, uniq_func);
274 INIT_LIST_HEAD(&work->ordered_list); 302 INIT_LIST_HEAD(&work->ordered_list);
275 work->flags = 0; 303 work->flags = 0;
276} 304}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 9c6b66d15fb0..e9e31c94758f 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -19,12 +19,14 @@
19 19
20#ifndef __BTRFS_ASYNC_THREAD_ 20#ifndef __BTRFS_ASYNC_THREAD_
21#define __BTRFS_ASYNC_THREAD_ 21#define __BTRFS_ASYNC_THREAD_
22#include <linux/workqueue.h>
22 23
23struct btrfs_workqueue; 24struct btrfs_workqueue;
24/* Internal use only */ 25/* Internal use only */
25struct __btrfs_workqueue; 26struct __btrfs_workqueue;
26struct btrfs_work; 27struct btrfs_work;
27typedef void (*btrfs_func_t)(struct btrfs_work *arg); 28typedef void (*btrfs_func_t)(struct btrfs_work *arg);
29typedef void (*btrfs_work_func_t)(struct work_struct *arg);
28 30
29struct btrfs_work { 31struct btrfs_work {
30 btrfs_func_t func; 32 btrfs_func_t func;
@@ -38,11 +40,35 @@ struct btrfs_work {
38 unsigned long flags; 40 unsigned long flags;
39}; 41};
40 42
43#define BTRFS_WORK_HELPER_PROTO(name) \
44void btrfs_##name(struct work_struct *arg)
45
46BTRFS_WORK_HELPER_PROTO(worker_helper);
47BTRFS_WORK_HELPER_PROTO(delalloc_helper);
48BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
49BTRFS_WORK_HELPER_PROTO(cache_helper);
50BTRFS_WORK_HELPER_PROTO(submit_helper);
51BTRFS_WORK_HELPER_PROTO(fixup_helper);
52BTRFS_WORK_HELPER_PROTO(endio_helper);
53BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
54BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
55BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
56BTRFS_WORK_HELPER_PROTO(rmw_helper);
57BTRFS_WORK_HELPER_PROTO(endio_write_helper);
58BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
59BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
60BTRFS_WORK_HELPER_PROTO(readahead_helper);
61BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
62BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
63BTRFS_WORK_HELPER_PROTO(scrub_helper);
64BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
65BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
66
41struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, 67struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
42 int flags, 68 int flags,
43 int max_active, 69 int max_active,
44 int thresh); 70 int thresh);
45void btrfs_init_work(struct btrfs_work *work, 71void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
46 btrfs_func_t func, 72 btrfs_func_t func,
47 btrfs_func_t ordered_func, 73 btrfs_func_t ordered_func,
48 btrfs_func_t ordered_free); 74 btrfs_func_t ordered_free);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index da775bfdebc9..a2e90f855d7d 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1395 return -ENOMEM; 1395 return -ENOMEM;
1396 1396
1397 async_work->delayed_root = delayed_root; 1397 async_work->delayed_root = delayed_root;
1398 btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, 1398 btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1399 NULL, NULL); 1399 btrfs_async_run_delayed_root, NULL, NULL);
1400 async_work->nr = nr; 1400 async_work->nr = nr;
1401 1401
1402 btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); 1402 btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d0ed9e664f7d..a1d36e62179c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -39,7 +39,6 @@
39#include "btrfs_inode.h" 39#include "btrfs_inode.h"
40#include "volumes.h" 40#include "volumes.h"
41#include "print-tree.h" 41#include "print-tree.h"
42#include "async-thread.h"
43#include "locking.h" 42#include "locking.h"
44#include "tree-log.h" 43#include "tree-log.h"
45#include "free-space-cache.h" 44#include "free-space-cache.h"
@@ -693,35 +692,41 @@ static void end_workqueue_bio(struct bio *bio, int err)
693{ 692{
694 struct end_io_wq *end_io_wq = bio->bi_private; 693 struct end_io_wq *end_io_wq = bio->bi_private;
695 struct btrfs_fs_info *fs_info; 694 struct btrfs_fs_info *fs_info;
695 struct btrfs_workqueue *wq;
696 btrfs_work_func_t func;
696 697
697 fs_info = end_io_wq->info; 698 fs_info = end_io_wq->info;
698 end_io_wq->error = err; 699 end_io_wq->error = err;
699 btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
700 700
701 if (bio->bi_rw & REQ_WRITE) { 701 if (bio->bi_rw & REQ_WRITE) {
702 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) 702 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
703 btrfs_queue_work(fs_info->endio_meta_write_workers, 703 wq = fs_info->endio_meta_write_workers;
704 &end_io_wq->work); 704 func = btrfs_endio_meta_write_helper;
705 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) 705 } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
706 btrfs_queue_work(fs_info->endio_freespace_worker, 706 wq = fs_info->endio_freespace_worker;
707 &end_io_wq->work); 707 func = btrfs_freespace_write_helper;
708 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) 708 } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
709 btrfs_queue_work(fs_info->endio_raid56_workers, 709 wq = fs_info->endio_raid56_workers;
710 &end_io_wq->work); 710 func = btrfs_endio_raid56_helper;
711 else 711 } else {
712 btrfs_queue_work(fs_info->endio_write_workers, 712 wq = fs_info->endio_write_workers;
713 &end_io_wq->work); 713 func = btrfs_endio_write_helper;
714 }
714 } else { 715 } else {
715 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) 716 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
716 btrfs_queue_work(fs_info->endio_raid56_workers, 717 wq = fs_info->endio_raid56_workers;
717 &end_io_wq->work); 718 func = btrfs_endio_raid56_helper;
718 else if (end_io_wq->metadata) 719 } else if (end_io_wq->metadata) {
719 btrfs_queue_work(fs_info->endio_meta_workers, 720 wq = fs_info->endio_meta_workers;
720 &end_io_wq->work); 721 func = btrfs_endio_meta_helper;
721 else 722 } else {
722 btrfs_queue_work(fs_info->endio_workers, 723 wq = fs_info->endio_workers;
723 &end_io_wq->work); 724 func = btrfs_endio_helper;
725 }
724 } 726 }
727
728 btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
729 btrfs_queue_work(wq, &end_io_wq->work);
725} 730}
726 731
727/* 732/*
@@ -828,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
828 async->submit_bio_start = submit_bio_start; 833 async->submit_bio_start = submit_bio_start;
829 async->submit_bio_done = submit_bio_done; 834 async->submit_bio_done = submit_bio_done;
830 835
831 btrfs_init_work(&async->work, run_one_async_start, 836 btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
832 run_one_async_done, run_one_async_free); 837 run_one_async_done, run_one_async_free);
833 838
834 async->bio_flags = bio_flags; 839 async->bio_flags = bio_flags;
@@ -3450,7 +3455,8 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3450 btrfs_set_stack_device_generation(dev_item, 0); 3455 btrfs_set_stack_device_generation(dev_item, 0);
3451 btrfs_set_stack_device_type(dev_item, dev->type); 3456 btrfs_set_stack_device_type(dev_item, dev->type);
3452 btrfs_set_stack_device_id(dev_item, dev->devid); 3457 btrfs_set_stack_device_id(dev_item, dev->devid);
3453 btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); 3458 btrfs_set_stack_device_total_bytes(dev_item,
3459 dev->disk_total_bytes);
3454 btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); 3460 btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
3455 btrfs_set_stack_device_io_align(dev_item, dev->io_align); 3461 btrfs_set_stack_device_io_align(dev_item, dev->io_align);
3456 btrfs_set_stack_device_io_width(dev_item, dev->io_width); 3462 btrfs_set_stack_device_io_width(dev_item, dev->io_width);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 102ed3143976..3efe1c3877bf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
552 caching_ctl->block_group = cache; 552 caching_ctl->block_group = cache;
553 caching_ctl->progress = cache->key.objectid; 553 caching_ctl->progress = cache->key.objectid;
554 atomic_set(&caching_ctl->count, 1); 554 atomic_set(&caching_ctl->count, 1);
555 btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); 555 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
556 caching_thread, NULL, NULL);
556 557
557 spin_lock(&cache->lock); 558 spin_lock(&cache->lock);
558 /* 559 /*
@@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
2749 async->sync = 0; 2750 async->sync = 0;
2750 init_completion(&async->wait); 2751 init_completion(&async->wait);
2751 2752
2752 btrfs_init_work(&async->work, delayed_ref_async_start, 2753 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2753 NULL, NULL); 2754 delayed_ref_async_start, NULL, NULL);
2754 2755
2755 btrfs_queue_work(root->fs_info->extent_workers, &async->work); 2756 btrfs_queue_work(root->fs_info->extent_workers, &async->work);
2756 2757
@@ -3586,13 +3587,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3586 */ 3587 */
3587static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 3588static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3588{ 3589{
3589 /* 3590 u64 num_devices = root->fs_info->fs_devices->rw_devices;
3590 * we add in the count of missing devices because we want
3591 * to make sure that any RAID levels on a degraded FS
3592 * continue to be honored.
3593 */
3594 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3595 root->fs_info->fs_devices->missing_devices;
3596 u64 target; 3591 u64 target;
3597 u64 tmp; 3592 u64 tmp;
3598 3593
@@ -8440,13 +8435,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8440 if (stripped) 8435 if (stripped)
8441 return extended_to_chunk(stripped); 8436 return extended_to_chunk(stripped);
8442 8437
8443 /* 8438 num_devices = root->fs_info->fs_devices->rw_devices;
8444 * we add in the count of missing devices because we want
8445 * to make sure that any RAID levels on a degraded FS
8446 * continue to be honored.
8447 */
8448 num_devices = root->fs_info->fs_devices->rw_devices +
8449 root->fs_info->fs_devices->missing_devices;
8450 8439
8451 stripped = BTRFS_BLOCK_GROUP_RAID0 | 8440 stripped = BTRFS_BLOCK_GROUP_RAID0 |
8452 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | 8441 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e11aab9f391..af0359dcf337 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2532,6 +2532,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2532 test_bit(BIO_UPTODATE, &bio->bi_flags); 2532 test_bit(BIO_UPTODATE, &bio->bi_flags);
2533 if (err) 2533 if (err)
2534 uptodate = 0; 2534 uptodate = 0;
2535 offset += len;
2535 continue; 2536 continue;
2536 } 2537 }
2537 } 2538 }
@@ -4207,8 +4208,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4207 return -ENOMEM; 4208 return -ENOMEM;
4208 path->leave_spinning = 1; 4209 path->leave_spinning = 1;
4209 4210
4210 start = ALIGN(start, BTRFS_I(inode)->root->sectorsize); 4211 start = round_down(start, BTRFS_I(inode)->root->sectorsize);
4211 len = ALIGN(len, BTRFS_I(inode)->root->sectorsize); 4212 len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start;
4212 4213
4213 /* 4214 /*
4214 * lookup the last file extent. We're not using i_size here 4215 * lookup the last file extent. We're not using i_size here
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d3afac292d67..36861b7a6757 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1840,7 +1840,15 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
1840{ 1840{
1841 if (filp->private_data) 1841 if (filp->private_data)
1842 btrfs_ioctl_trans_end(filp); 1842 btrfs_ioctl_trans_end(filp);
1843 filemap_flush(inode->i_mapping); 1843 /*
1844 * ordered_data_close is set by settattr when we are about to truncate
1845 * a file from a non-zero size to a zero size. This tries to
1846 * flush down new bytes that may have been written if the
1847 * application were using truncate to replace a file in place.
1848 */
1849 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1850 &BTRFS_I(inode)->runtime_flags))
1851 filemap_flush(inode->i_mapping);
1844 return 0; 1852 return 0;
1845} 1853}
1846 1854
@@ -2088,10 +2096,9 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
2088 goto out; 2096 goto out;
2089 } 2097 }
2090 2098
2091 if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { 2099 if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
2092 u64 num_bytes; 2100 u64 num_bytes;
2093 2101
2094 path->slots[0]++;
2095 key.offset = offset; 2102 key.offset = offset;
2096 btrfs_set_item_key_safe(root, path, &key); 2103 btrfs_set_item_key_safe(root, path, &key);
2097 fi = btrfs_item_ptr(leaf, path->slots[0], 2104 fi = btrfs_item_ptr(leaf, path->slots[0],
@@ -2216,7 +2223,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2216 goto out_only_mutex; 2223 goto out_only_mutex;
2217 } 2224 }
2218 2225
2219 lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize); 2226 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
2220 lockend = round_down(offset + len, 2227 lockend = round_down(offset + len,
2221 BTRFS_I(inode)->root->sectorsize) - 1; 2228 BTRFS_I(inode)->root->sectorsize) - 1;
2222 same_page = ((offset >> PAGE_CACHE_SHIFT) == 2229 same_page = ((offset >> PAGE_CACHE_SHIFT) ==
@@ -2277,7 +2284,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2277 tail_start + tail_len, 0, 1); 2284 tail_start + tail_len, 0, 1);
2278 if (ret) 2285 if (ret)
2279 goto out_only_mutex; 2286 goto out_only_mutex;
2280 } 2287 }
2281 } 2288 }
2282 } 2289 }
2283 2290
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 03708ef3deef..9c194bd74d6e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1096,8 +1096,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1096 async_cow->end = cur_end; 1096 async_cow->end = cur_end;
1097 INIT_LIST_HEAD(&async_cow->extents); 1097 INIT_LIST_HEAD(&async_cow->extents);
1098 1098
1099 btrfs_init_work(&async_cow->work, async_cow_start, 1099 btrfs_init_work(&async_cow->work,
1100 async_cow_submit, async_cow_free); 1100 btrfs_delalloc_helper,
1101 async_cow_start, async_cow_submit,
1102 async_cow_free);
1101 1103
1102 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> 1104 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
1103 PAGE_CACHE_SHIFT; 1105 PAGE_CACHE_SHIFT;
@@ -1881,7 +1883,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
1881 1883
1882 SetPageChecked(page); 1884 SetPageChecked(page);
1883 page_cache_get(page); 1885 page_cache_get(page);
1884 btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); 1886 btrfs_init_work(&fixup->work, btrfs_fixup_helper,
1887 btrfs_writepage_fixup_worker, NULL, NULL);
1885 fixup->page = page; 1888 fixup->page = page;
1886 btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); 1889 btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
1887 return -EBUSY; 1890 return -EBUSY;
@@ -2822,7 +2825,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2822 struct inode *inode = page->mapping->host; 2825 struct inode *inode = page->mapping->host;
2823 struct btrfs_root *root = BTRFS_I(inode)->root; 2826 struct btrfs_root *root = BTRFS_I(inode)->root;
2824 struct btrfs_ordered_extent *ordered_extent = NULL; 2827 struct btrfs_ordered_extent *ordered_extent = NULL;
2825 struct btrfs_workqueue *workers; 2828 struct btrfs_workqueue *wq;
2829 btrfs_work_func_t func;
2826 2830
2827 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); 2831 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
2828 2832
@@ -2831,13 +2835,17 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2831 end - start + 1, uptodate)) 2835 end - start + 1, uptodate))
2832 return 0; 2836 return 0;
2833 2837
2834 btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); 2838 if (btrfs_is_free_space_inode(inode)) {
2839 wq = root->fs_info->endio_freespace_worker;
2840 func = btrfs_freespace_write_helper;
2841 } else {
2842 wq = root->fs_info->endio_write_workers;
2843 func = btrfs_endio_write_helper;
2844 }
2835 2845
2836 if (btrfs_is_free_space_inode(inode)) 2846 btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
2837 workers = root->fs_info->endio_freespace_worker; 2847 NULL);
2838 else 2848 btrfs_queue_work(wq, &ordered_extent->work);
2839 workers = root->fs_info->endio_write_workers;
2840 btrfs_queue_work(workers, &ordered_extent->work);
2841 2849
2842 return 0; 2850 return 0;
2843} 2851}
@@ -4674,6 +4682,11 @@ static void evict_inode_truncate_pages(struct inode *inode)
4674 clear_bit(EXTENT_FLAG_LOGGING, &em->flags); 4682 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
4675 remove_extent_mapping(map_tree, em); 4683 remove_extent_mapping(map_tree, em);
4676 free_extent_map(em); 4684 free_extent_map(em);
4685 if (need_resched()) {
4686 write_unlock(&map_tree->lock);
4687 cond_resched();
4688 write_lock(&map_tree->lock);
4689 }
4677 } 4690 }
4678 write_unlock(&map_tree->lock); 4691 write_unlock(&map_tree->lock);
4679 4692
@@ -4696,6 +4709,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
4696 &cached_state, GFP_NOFS); 4709 &cached_state, GFP_NOFS);
4697 free_extent_state(state); 4710 free_extent_state(state);
4698 4711
4712 cond_resched();
4699 spin_lock(&io_tree->lock); 4713 spin_lock(&io_tree->lock);
4700 } 4714 }
4701 spin_unlock(&io_tree->lock); 4715 spin_unlock(&io_tree->lock);
@@ -5181,6 +5195,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5181 iput(inode); 5195 iput(inode);
5182 inode = ERR_PTR(ret); 5196 inode = ERR_PTR(ret);
5183 } 5197 }
5198 /*
5199 * If orphan cleanup did remove any orphans, it means the tree
5200 * was modified and therefore the commit root is not the same as
5201 * the current root anymore. This is a problem, because send
5202 * uses the commit root and therefore can see inode items that
5203 * don't exist in the current root anymore, and for example make
5204 * calls to btrfs_iget, which will do tree lookups based on the
5205 * current root and not on the commit root. Those lookups will
5206 * fail, returning a -ESTALE error, and making send fail with
5207 * that error. So make sure a send does not see any orphans we
5208 * have just removed, and that it will see the same inodes
5209 * regardless of whether a transaction commit happened before
5210 * it started (meaning that the commit root will be the same as
5211 * the current root) or not.
5212 */
5213 if (sub_root->node != sub_root->commit_root) {
5214 u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
5215
5216 if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
5217 struct extent_buffer *eb;
5218
5219 /*
5220 * Assert we can't have races between dentry
5221 * lookup called through the snapshot creation
5222 * ioctl and the VFS.
5223 */
5224 ASSERT(mutex_is_locked(&dir->i_mutex));
5225
5226 down_write(&root->fs_info->commit_root_sem);
5227 eb = sub_root->commit_root;
5228 sub_root->commit_root =
5229 btrfs_root_node(sub_root);
5230 up_write(&root->fs_info->commit_root_sem);
5231 free_extent_buffer(eb);
5232 }
5233 }
5184 } 5234 }
5185 5235
5186 return inode; 5236 return inode;
@@ -5606,6 +5656,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5606 } 5656 }
5607 5657
5608 /* 5658 /*
5659 * O_TMPFILE, set link count to 0, so that after this point,
5660 * we fill in an inode item with the correct link count.
5661 */
5662 if (!name)
5663 set_nlink(inode, 0);
5664
5665 /*
5609 * we have to initialize this early, so we can reclaim the inode 5666 * we have to initialize this early, so we can reclaim the inode
5610 * number if we fail afterwards in this function. 5667 * number if we fail afterwards in this function.
5611 */ 5668 */
@@ -6097,14 +6154,14 @@ out_fail:
6097static int merge_extent_mapping(struct extent_map_tree *em_tree, 6154static int merge_extent_mapping(struct extent_map_tree *em_tree,
6098 struct extent_map *existing, 6155 struct extent_map *existing,
6099 struct extent_map *em, 6156 struct extent_map *em,
6100 u64 map_start, u64 map_len) 6157 u64 map_start)
6101{ 6158{
6102 u64 start_diff; 6159 u64 start_diff;
6103 6160
6104 BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); 6161 BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
6105 start_diff = map_start - em->start; 6162 start_diff = map_start - em->start;
6106 em->start = map_start; 6163 em->start = map_start;
6107 em->len = map_len; 6164 em->len = existing->start - em->start;
6108 if (em->block_start < EXTENT_MAP_LAST_BYTE && 6165 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
6109 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 6166 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
6110 em->block_start += start_diff; 6167 em->block_start += start_diff;
@@ -6275,6 +6332,8 @@ next:
6275 goto not_found; 6332 goto not_found;
6276 if (start + len <= found_key.offset) 6333 if (start + len <= found_key.offset)
6277 goto not_found; 6334 goto not_found;
6335 if (start > found_key.offset)
6336 goto next;
6278 em->start = start; 6337 em->start = start;
6279 em->orig_start = start; 6338 em->orig_start = start;
6280 em->len = found_key.offset - start; 6339 em->len = found_key.offset - start;
@@ -6390,8 +6449,7 @@ insert:
6390 em->len); 6449 em->len);
6391 if (existing) { 6450 if (existing) {
6392 err = merge_extent_mapping(em_tree, existing, 6451 err = merge_extent_mapping(em_tree, existing,
6393 em, start, 6452 em, start);
6394 root->sectorsize);
6395 free_extent_map(existing); 6453 free_extent_map(existing);
6396 if (err) { 6454 if (err) {
6397 free_extent_map(em); 6455 free_extent_map(em);
@@ -7158,7 +7216,8 @@ again:
7158 if (!ret) 7216 if (!ret)
7159 goto out_test; 7217 goto out_test;
7160 7218
7161 btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); 7219 btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
7220 finish_ordered_fn, NULL, NULL);
7162 btrfs_queue_work(root->fs_info->endio_write_workers, 7221 btrfs_queue_work(root->fs_info->endio_write_workers,
7163 &ordered->work); 7222 &ordered->work);
7164out_test: 7223out_test:
@@ -7306,10 +7365,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7306 map_length = orig_bio->bi_iter.bi_size; 7365 map_length = orig_bio->bi_iter.bi_size;
7307 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, 7366 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
7308 &map_length, NULL, 0); 7367 &map_length, NULL, 0);
7309 if (ret) { 7368 if (ret)
7310 bio_put(orig_bio);
7311 return -EIO; 7369 return -EIO;
7312 }
7313 7370
7314 if (map_length >= orig_bio->bi_iter.bi_size) { 7371 if (map_length >= orig_bio->bi_iter.bi_size) {
7315 bio = orig_bio; 7372 bio = orig_bio;
@@ -7326,6 +7383,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7326 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); 7383 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
7327 if (!bio) 7384 if (!bio)
7328 return -ENOMEM; 7385 return -ENOMEM;
7386
7329 bio->bi_private = dip; 7387 bio->bi_private = dip;
7330 bio->bi_end_io = btrfs_end_dio_bio; 7388 bio->bi_end_io = btrfs_end_dio_bio;
7331 atomic_inc(&dip->pending_bios); 7389 atomic_inc(&dip->pending_bios);
@@ -7534,7 +7592,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
7534 count = iov_iter_count(iter); 7592 count = iov_iter_count(iter);
7535 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 7593 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
7536 &BTRFS_I(inode)->runtime_flags)) 7594 &BTRFS_I(inode)->runtime_flags))
7537 filemap_fdatawrite_range(inode->i_mapping, offset, count); 7595 filemap_fdatawrite_range(inode->i_mapping, offset,
7596 offset + count - 1);
7538 7597
7539 if (rw & WRITE) { 7598 if (rw & WRITE) {
7540 /* 7599 /*
@@ -8495,7 +8554,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
8495 work->inode = inode; 8554 work->inode = inode;
8496 work->wait = wait; 8555 work->wait = wait;
8497 work->delay_iput = delay_iput; 8556 work->delay_iput = delay_iput;
8498 btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); 8557 WARN_ON_ONCE(!inode);
8558 btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
8559 btrfs_run_delalloc_work, NULL, NULL);
8499 8560
8500 return work; 8561 return work;
8501} 8562}
@@ -8979,6 +9040,14 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
8979 if (ret) 9040 if (ret)
8980 goto out; 9041 goto out;
8981 9042
9043 /*
9044 * We set number of links to 0 in btrfs_new_inode(), and here we set
9045 * it to 1 because d_tmpfile() will issue a warning if the count is 0,
9046 * through:
9047 *
9048 * d_tmpfile() -> inode_dec_link_count() -> drop_nlink()
9049 */
9050 set_nlink(inode, 1);
8982 d_tmpfile(dentry, inode); 9051 d_tmpfile(dentry, inode);
8983 mark_inode_dirty(inode); 9052 mark_inode_dirty(inode);
8984 9053
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 47aceb494d1d..fce6fd0e3f50 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
711 if (ret) 711 if (ret)
712 goto fail; 712 goto fail;
713 713
714 ret = btrfs_orphan_cleanup(pending_snapshot->snap);
715 if (ret)
716 goto fail;
717
718 /*
719 * If orphan cleanup did remove any orphans, it means the tree was
720 * modified and therefore the commit root is not the same as the
721 * current root anymore. This is a problem, because send uses the
722 * commit root and therefore can see inode items that don't exist
723 * in the current root anymore, and for example make calls to
724 * btrfs_iget, which will do tree lookups based on the current root
725 * and not on the commit root. Those lookups will fail, returning a
726 * -ESTALE error, and making send fail with that error. So make sure
727 * a send does not see any orphans we have just removed, and that it
728 * will see the same inodes regardless of whether a transaction
729 * commit happened before it started (meaning that the commit root
730 * will be the same as the current root) or not.
731 */
732 if (readonly && pending_snapshot->snap->node !=
733 pending_snapshot->snap->commit_root) {
734 trans = btrfs_join_transaction(pending_snapshot->snap);
735 if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) {
736 ret = PTR_ERR(trans);
737 goto fail;
738 }
739 if (!IS_ERR(trans)) {
740 ret = btrfs_commit_transaction(trans,
741 pending_snapshot->snap);
742 if (ret)
743 goto fail;
744 }
745 }
746
747 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); 714 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
748 if (IS_ERR(inode)) { 715 if (IS_ERR(inode)) {
749 ret = PTR_ERR(inode); 716 ret = PTR_ERR(inode);
@@ -3527,7 +3494,8 @@ process_slot:
3527 btrfs_mark_buffer_dirty(leaf); 3494 btrfs_mark_buffer_dirty(leaf);
3528 btrfs_release_path(path); 3495 btrfs_release_path(path);
3529 3496
3530 last_dest_end = new_key.offset + datal; 3497 last_dest_end = ALIGN(new_key.offset + datal,
3498 root->sectorsize);
3531 ret = clone_finish_inode_update(trans, inode, 3499 ret = clone_finish_inode_update(trans, inode,
3532 last_dest_end, 3500 last_dest_end,
3533 destoff, olen); 3501 destoff, olen);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 963895c1f801..ac734ec4cc20 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -615,6 +615,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
615 spin_unlock(&root->ordered_extent_lock); 615 spin_unlock(&root->ordered_extent_lock);
616 616
617 btrfs_init_work(&ordered->flush_work, 617 btrfs_init_work(&ordered->flush_work,
618 btrfs_flush_delalloc_helper,
618 btrfs_run_ordered_extent_work, NULL, NULL); 619 btrfs_run_ordered_extent_work, NULL, NULL);
619 list_add_tail(&ordered->work_list, &works); 620 list_add_tail(&ordered->work_list, &works);
620 btrfs_queue_work(root->fs_info->flush_workers, 621 btrfs_queue_work(root->fs_info->flush_workers,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b497498484be..ded5c601d916 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1973,7 +1973,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
1973 elem.seq, &roots); 1973 elem.seq, &roots);
1974 btrfs_put_tree_mod_seq(fs_info, &elem); 1974 btrfs_put_tree_mod_seq(fs_info, &elem);
1975 if (ret < 0) 1975 if (ret < 0)
1976 return ret; 1976 goto out;
1977 1977
1978 if (roots->nnodes != 1) 1978 if (roots->nnodes != 1)
1979 goto out; 1979 goto out;
@@ -2720,6 +2720,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2720 memset(&fs_info->qgroup_rescan_work, 0, 2720 memset(&fs_info->qgroup_rescan_work, 0,
2721 sizeof(fs_info->qgroup_rescan_work)); 2721 sizeof(fs_info->qgroup_rescan_work));
2722 btrfs_init_work(&fs_info->qgroup_rescan_work, 2722 btrfs_init_work(&fs_info->qgroup_rescan_work,
2723 btrfs_qgroup_rescan_helper,
2723 btrfs_qgroup_rescan_worker, NULL, NULL); 2724 btrfs_qgroup_rescan_worker, NULL, NULL);
2724 2725
2725 if (ret) { 2726 if (ret) {
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 4a88f073fdd7..0a6b6e4bcbb9 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1416,7 +1416,8 @@ cleanup:
1416 1416
1417static void async_rmw_stripe(struct btrfs_raid_bio *rbio) 1417static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
1418{ 1418{
1419 btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); 1419 btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1420 rmw_work, NULL, NULL);
1420 1421
1421 btrfs_queue_work(rbio->fs_info->rmw_workers, 1422 btrfs_queue_work(rbio->fs_info->rmw_workers,
1422 &rbio->work); 1423 &rbio->work);
@@ -1424,7 +1425,8 @@ static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
1424 1425
1425static void async_read_rebuild(struct btrfs_raid_bio *rbio) 1426static void async_read_rebuild(struct btrfs_raid_bio *rbio)
1426{ 1427{
1427 btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); 1428 btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1429 read_rebuild_work, NULL, NULL);
1428 1430
1429 btrfs_queue_work(rbio->fs_info->rmw_workers, 1431 btrfs_queue_work(rbio->fs_info->rmw_workers,
1430 &rbio->work); 1432 &rbio->work);
@@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
1665 plug = container_of(cb, struct btrfs_plug_cb, cb); 1667 plug = container_of(cb, struct btrfs_plug_cb, cb);
1666 1668
1667 if (from_schedule) { 1669 if (from_schedule) {
1668 btrfs_init_work(&plug->work, unplug_work, NULL, NULL); 1670 btrfs_init_work(&plug->work, btrfs_rmw_helper,
1671 unplug_work, NULL, NULL);
1669 btrfs_queue_work(plug->info->rmw_workers, 1672 btrfs_queue_work(plug->info->rmw_workers,
1670 &plug->work); 1673 &plug->work);
1671 return; 1674 return;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 09230cf3a244..20408c6b665a 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -798,7 +798,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
798 /* FIXME we cannot handle this properly right now */ 798 /* FIXME we cannot handle this properly right now */
799 BUG(); 799 BUG();
800 } 800 }
801 btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); 801 btrfs_init_work(&rmw->work, btrfs_readahead_helper,
802 reada_start_machine_worker, NULL, NULL);
802 rmw->fs_info = fs_info; 803 rmw->fs_info = fs_info;
803 804
804 btrfs_queue_work(fs_info->readahead_workers, &rmw->work); 805 btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b6d198f5181e..f4a41f37be22 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -428,8 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
428 sbio->index = i; 428 sbio->index = i;
429 sbio->sctx = sctx; 429 sbio->sctx = sctx;
430 sbio->page_count = 0; 430 sbio->page_count = 0;
431 btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, 431 btrfs_init_work(&sbio->work, btrfs_scrub_helper,
432 NULL, NULL); 432 scrub_bio_end_io_worker, NULL, NULL);
433 433
434 if (i != SCRUB_BIOS_PER_SCTX - 1) 434 if (i != SCRUB_BIOS_PER_SCTX - 1)
435 sctx->bios[i]->next_free = i + 1; 435 sctx->bios[i]->next_free = i + 1;
@@ -999,8 +999,8 @@ nodatasum_case:
999 fixup_nodatasum->root = fs_info->extent_root; 999 fixup_nodatasum->root = fs_info->extent_root;
1000 fixup_nodatasum->mirror_num = failed_mirror_index + 1; 1000 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
1001 scrub_pending_trans_workers_inc(sctx); 1001 scrub_pending_trans_workers_inc(sctx);
1002 btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, 1002 btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
1003 NULL, NULL); 1003 scrub_fixup_nodatasum, NULL, NULL);
1004 btrfs_queue_work(fs_info->scrub_workers, 1004 btrfs_queue_work(fs_info->scrub_workers,
1005 &fixup_nodatasum->work); 1005 &fixup_nodatasum->work);
1006 goto out; 1006 goto out;
@@ -1616,7 +1616,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err)
1616 sbio->err = err; 1616 sbio->err = err;
1617 sbio->bio = bio; 1617 sbio->bio = bio;
1618 1618
1619 btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); 1619 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
1620 scrub_wr_bio_end_io_worker, NULL, NULL);
1620 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); 1621 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1621} 1622}
1622 1623
@@ -2904,6 +2905,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2904 struct scrub_ctx *sctx; 2905 struct scrub_ctx *sctx;
2905 int ret; 2906 int ret;
2906 struct btrfs_device *dev; 2907 struct btrfs_device *dev;
2908 struct rcu_string *name;
2907 2909
2908 if (btrfs_fs_closing(fs_info)) 2910 if (btrfs_fs_closing(fs_info))
2909 return -EINVAL; 2911 return -EINVAL;
@@ -2965,6 +2967,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2965 return -ENODEV; 2967 return -ENODEV;
2966 } 2968 }
2967 2969
2970 if (!is_dev_replace && !readonly && !dev->writeable) {
2971 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2972 rcu_read_lock();
2973 name = rcu_dereference(dev->name);
2974 btrfs_err(fs_info, "scrub: device %s is not writable",
2975 name->str);
2976 rcu_read_unlock();
2977 return -EROFS;
2978 }
2979
2968 mutex_lock(&fs_info->scrub_lock); 2980 mutex_lock(&fs_info->scrub_lock);
2969 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { 2981 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
2970 mutex_unlock(&fs_info->scrub_lock); 2982 mutex_unlock(&fs_info->scrub_lock);
@@ -3203,7 +3215,8 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3203 nocow_ctx->len = len; 3215 nocow_ctx->len = len;
3204 nocow_ctx->mirror_num = mirror_num; 3216 nocow_ctx->mirror_num = mirror_num;
3205 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; 3217 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3206 btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); 3218 btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
3219 copy_nocow_pages_worker, NULL, NULL);
3207 INIT_LIST_HEAD(&nocow_ctx->inodes); 3220 INIT_LIST_HEAD(&nocow_ctx->inodes);
3208 btrfs_queue_work(fs_info->scrub_nocow_workers, 3221 btrfs_queue_work(fs_info->scrub_nocow_workers,
3209 &nocow_ctx->work); 3222 &nocow_ctx->work);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 78699364f537..12e53556e214 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -614,7 +614,7 @@ int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
614 if (!fs_info->device_dir_kobj) 614 if (!fs_info->device_dir_kobj)
615 return -EINVAL; 615 return -EINVAL;
616 616
617 if (one_device) { 617 if (one_device && one_device->bdev) {
618 disk = one_device->bdev->bd_part; 618 disk = one_device->bdev->bd_part;
619 disk_kobj = &part_to_dev(disk)->kobj; 619 disk_kobj = &part_to_dev(disk)->kobj;
620 620
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9e1f2cd5e67a..7e0e6e3029dd 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3298,7 +3298,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3298 struct list_head ordered_sums; 3298 struct list_head ordered_sums;
3299 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3299 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3300 bool has_extents = false; 3300 bool has_extents = false;
3301 bool need_find_last_extent = (*last_extent == 0); 3301 bool need_find_last_extent = true;
3302 bool done = false; 3302 bool done = false;
3303 3303
3304 INIT_LIST_HEAD(&ordered_sums); 3304 INIT_LIST_HEAD(&ordered_sums);
@@ -3352,8 +3352,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3352 */ 3352 */
3353 if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { 3353 if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
3354 has_extents = true; 3354 has_extents = true;
3355 if (need_find_last_extent && 3355 if (first_key.objectid == (u64)-1)
3356 first_key.objectid == (u64)-1)
3357 first_key = ins_keys[i]; 3356 first_key = ins_keys[i];
3358 } else { 3357 } else {
3359 need_find_last_extent = false; 3358 need_find_last_extent = false;
@@ -3427,6 +3426,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3427 if (!has_extents) 3426 if (!has_extents)
3428 return ret; 3427 return ret;
3429 3428
3429 if (need_find_last_extent && *last_extent == first_key.offset) {
3430 /*
3431 * We don't have any leafs between our current one and the one
3432 * we processed before that can have file extent items for our
3433 * inode (and have a generation number smaller than our current
3434 * transaction id).
3435 */
3436 need_find_last_extent = false;
3437 }
3438
3430 /* 3439 /*
3431 * Because we use btrfs_search_forward we could skip leaves that were 3440 * Because we use btrfs_search_forward we could skip leaves that were
3432 * not modified and then assume *last_extent is valid when it really 3441 * not modified and then assume *last_extent is valid when it really
@@ -3537,7 +3546,7 @@ fill_holes:
3537 0, 0); 3546 0, 0);
3538 if (ret) 3547 if (ret)
3539 break; 3548 break;
3540 *last_extent = offset + len; 3549 *last_extent = extent_end;
3541 } 3550 }
3542 /* 3551 /*
3543 * Need to let the callers know we dropped the path so they should 3552 * Need to let the callers know we dropped the path so they should
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6cb82f62cb7c..340a92d08e84 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -508,6 +508,44 @@ static noinline int device_list_add(const char *path,
508 ret = 1; 508 ret = 1;
509 device->fs_devices = fs_devices; 509 device->fs_devices = fs_devices;
510 } else if (!device->name || strcmp(device->name->str, path)) { 510 } else if (!device->name || strcmp(device->name->str, path)) {
511 /*
512 * When FS is already mounted.
513 * 1. If you are here and if the device->name is NULL that
514 * means this device was missing at time of FS mount.
515 * 2. If you are here and if the device->name is different
516 * from 'path' that means either
517 * a. The same device disappeared and reappeared with
518 * different name. or
519 * b. The missing-disk-which-was-replaced, has
520 * reappeared now.
521 *
522 * We must allow 1 and 2a above. But 2b would be a spurious
523 * and unintentional.
524 *
525 * Further in case of 1 and 2a above, the disk at 'path'
526 * would have missed some transaction when it was away and
527 * in case of 2a the stale bdev has to be updated as well.
528 * 2b must not be allowed at all time.
529 */
530
531 /*
532 * As of now don't allow update to btrfs_fs_device through
533 * the btrfs dev scan cli, after FS has been mounted.
534 */
535 if (fs_devices->opened) {
536 return -EBUSY;
537 } else {
538 /*
539 * That is if the FS is _not_ mounted and if you
540 * are here, that means there is more than one
541 * disk with same uuid and devid.We keep the one
542 * with larger generation number or the last-in if
543 * generation are equal.
544 */
545 if (found_transid < device->generation)
546 return -EEXIST;
547 }
548
511 name = rcu_string_strdup(path, GFP_NOFS); 549 name = rcu_string_strdup(path, GFP_NOFS);
512 if (!name) 550 if (!name)
513 return -ENOMEM; 551 return -ENOMEM;
@@ -519,6 +557,15 @@ static noinline int device_list_add(const char *path,
519 } 557 }
520 } 558 }
521 559
560 /*
561 * Unmount does not free the btrfs_device struct but would zero
562 * generation along with most of the other members. So just update
563 * it back. We need it to pick the disk with largest generation
564 * (as above).
565 */
566 if (!fs_devices->opened)
567 device->generation = found_transid;
568
522 if (found_transid > fs_devices->latest_trans) { 569 if (found_transid > fs_devices->latest_trans) {
523 fs_devices->latest_devid = devid; 570 fs_devices->latest_devid = devid;
524 fs_devices->latest_trans = found_transid; 571 fs_devices->latest_trans = found_transid;
@@ -1436,7 +1483,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans,
1436 btrfs_set_device_io_align(leaf, dev_item, device->io_align); 1483 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1437 btrfs_set_device_io_width(leaf, dev_item, device->io_width); 1484 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1438 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); 1485 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1439 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); 1486 btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
1440 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); 1487 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1441 btrfs_set_device_group(leaf, dev_item, 0); 1488 btrfs_set_device_group(leaf, dev_item, 0);
1442 btrfs_set_device_seek_speed(leaf, dev_item, 0); 1489 btrfs_set_device_seek_speed(leaf, dev_item, 0);
@@ -1671,7 +1718,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1671 device->fs_devices->total_devices--; 1718 device->fs_devices->total_devices--;
1672 1719
1673 if (device->missing) 1720 if (device->missing)
1674 root->fs_info->fs_devices->missing_devices--; 1721 device->fs_devices->missing_devices--;
1675 1722
1676 next_device = list_entry(root->fs_info->fs_devices->devices.next, 1723 next_device = list_entry(root->fs_info->fs_devices->devices.next,
1677 struct btrfs_device, dev_list); 1724 struct btrfs_device, dev_list);
@@ -1801,8 +1848,12 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
1801 if (srcdev->bdev) { 1848 if (srcdev->bdev) {
1802 fs_info->fs_devices->open_devices--; 1849 fs_info->fs_devices->open_devices--;
1803 1850
1804 /* zero out the old super */ 1851 /*
1805 btrfs_scratch_superblock(srcdev); 1852 * zero out the old super if it is not writable
1853 * (e.g. seed device)
1854 */
1855 if (srcdev->writeable)
1856 btrfs_scratch_superblock(srcdev);
1806 } 1857 }
1807 1858
1808 call_rcu(&srcdev->rcu, free_device); 1859 call_rcu(&srcdev->rcu, free_device);
@@ -1941,6 +1992,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
1941 fs_devices->seeding = 0; 1992 fs_devices->seeding = 0;
1942 fs_devices->num_devices = 0; 1993 fs_devices->num_devices = 0;
1943 fs_devices->open_devices = 0; 1994 fs_devices->open_devices = 0;
1995 fs_devices->missing_devices = 0;
1996 fs_devices->num_can_discard = 0;
1997 fs_devices->rotating = 0;
1944 fs_devices->seed = seed_devices; 1998 fs_devices->seed = seed_devices;
1945 1999
1946 generate_random_uuid(fs_devices->fsid); 2000 generate_random_uuid(fs_devices->fsid);
@@ -5800,7 +5854,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
5800 else 5854 else
5801 generate_random_uuid(dev->uuid); 5855 generate_random_uuid(dev->uuid);
5802 5856
5803 btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); 5857 btrfs_init_work(&dev->work, btrfs_submit_helper,
5858 pending_bios_fn, NULL, NULL);
5804 5859
5805 return dev; 5860 return dev;
5806} 5861}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5b19760b1de5..b0c225cdb52c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1825,7 +1825,7 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
1825/* 1825/*
1826 * Special error return code only used by dx_probe() and its callers. 1826 * Special error return code only used by dx_probe() and its callers.
1827 */ 1827 */
1828#define ERR_BAD_DX_DIR -75000 1828#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
1829 1829
1830/* 1830/*
1831 * Timeout and state flag for lazy initialization inode thread. 1831 * Timeout and state flag for lazy initialization inode thread.
@@ -2454,6 +2454,22 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2454 up_write(&EXT4_I(inode)->i_data_sem); 2454 up_write(&EXT4_I(inode)->i_data_sem);
2455} 2455}
2456 2456
2457/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */
2458static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
2459{
2460 int changed = 0;
2461
2462 if (newsize > inode->i_size) {
2463 i_size_write(inode, newsize);
2464 changed = 1;
2465 }
2466 if (newsize > EXT4_I(inode)->i_disksize) {
2467 ext4_update_i_disksize(inode, newsize);
2468 changed |= 2;
2469 }
2470 return changed;
2471}
2472
2457struct ext4_group_info { 2473struct ext4_group_info {
2458 unsigned long bb_state; 2474 unsigned long bb_state;
2459 struct rb_root bb_free_root; 2475 struct rb_root bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 76c2df382b7d..74292a71b384 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4665,7 +4665,8 @@ retry:
4665} 4665}
4666 4666
4667static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, 4667static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4668 ext4_lblk_t len, int flags, int mode) 4668 ext4_lblk_t len, loff_t new_size,
4669 int flags, int mode)
4669{ 4670{
4670 struct inode *inode = file_inode(file); 4671 struct inode *inode = file_inode(file);
4671 handle_t *handle; 4672 handle_t *handle;
@@ -4674,8 +4675,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4674 int retries = 0; 4675 int retries = 0;
4675 struct ext4_map_blocks map; 4676 struct ext4_map_blocks map;
4676 unsigned int credits; 4677 unsigned int credits;
4678 loff_t epos;
4677 4679
4678 map.m_lblk = offset; 4680 map.m_lblk = offset;
4681 map.m_len = len;
4679 /* 4682 /*
4680 * Don't normalize the request if it can fit in one extent so 4683 * Don't normalize the request if it can fit in one extent so
4681 * that it doesn't get unnecessarily split into multiple 4684 * that it doesn't get unnecessarily split into multiple
@@ -4690,9 +4693,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4690 credits = ext4_chunk_trans_blocks(inode, len); 4693 credits = ext4_chunk_trans_blocks(inode, len);
4691 4694
4692retry: 4695retry:
4693 while (ret >= 0 && ret < len) { 4696 while (ret >= 0 && len) {
4694 map.m_lblk = map.m_lblk + ret;
4695 map.m_len = len = len - ret;
4696 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, 4697 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4697 credits); 4698 credits);
4698 if (IS_ERR(handle)) { 4699 if (IS_ERR(handle)) {
@@ -4709,6 +4710,21 @@ retry:
4709 ret2 = ext4_journal_stop(handle); 4710 ret2 = ext4_journal_stop(handle);
4710 break; 4711 break;
4711 } 4712 }
4713 map.m_lblk += ret;
4714 map.m_len = len = len - ret;
4715 epos = (loff_t)map.m_lblk << inode->i_blkbits;
4716 inode->i_ctime = ext4_current_time(inode);
4717 if (new_size) {
4718 if (epos > new_size)
4719 epos = new_size;
4720 if (ext4_update_inode_size(inode, epos) & 0x1)
4721 inode->i_mtime = inode->i_ctime;
4722 } else {
4723 if (epos > inode->i_size)
4724 ext4_set_inode_flag(inode,
4725 EXT4_INODE_EOFBLOCKS);
4726 }
4727 ext4_mark_inode_dirty(handle, inode);
4712 ret2 = ext4_journal_stop(handle); 4728 ret2 = ext4_journal_stop(handle);
4713 if (ret2) 4729 if (ret2)
4714 break; 4730 break;
@@ -4731,7 +4747,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4731 loff_t new_size = 0; 4747 loff_t new_size = 0;
4732 int ret = 0; 4748 int ret = 0;
4733 int flags; 4749 int flags;
4734 int partial; 4750 int credits;
4751 int partial_begin, partial_end;
4735 loff_t start, end; 4752 loff_t start, end;
4736 ext4_lblk_t lblk; 4753 ext4_lblk_t lblk;
4737 struct address_space *mapping = inode->i_mapping; 4754 struct address_space *mapping = inode->i_mapping;
@@ -4771,7 +4788,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4771 4788
4772 if (start < offset || end > offset + len) 4789 if (start < offset || end > offset + len)
4773 return -EINVAL; 4790 return -EINVAL;
4774 partial = (offset + len) & ((1 << blkbits) - 1); 4791 partial_begin = offset & ((1 << blkbits) - 1);
4792 partial_end = (offset + len) & ((1 << blkbits) - 1);
4775 4793
4776 lblk = start >> blkbits; 4794 lblk = start >> blkbits;
4777 max_blocks = (end >> blkbits); 4795 max_blocks = (end >> blkbits);
@@ -4805,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4805 * If we have a partial block after EOF we have to allocate 4823 * If we have a partial block after EOF we have to allocate
4806 * the entire block. 4824 * the entire block.
4807 */ 4825 */
4808 if (partial) 4826 if (partial_end)
4809 max_blocks += 1; 4827 max_blocks += 1;
4810 } 4828 }
4811 4829
@@ -4813,6 +4831,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4813 4831
4814 /* Now release the pages and zero block aligned part of pages*/ 4832 /* Now release the pages and zero block aligned part of pages*/
4815 truncate_pagecache_range(inode, start, end - 1); 4833 truncate_pagecache_range(inode, start, end - 1);
4834 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4816 4835
4817 /* Wait all existing dio workers, newcomers will block on i_mutex */ 4836 /* Wait all existing dio workers, newcomers will block on i_mutex */
4818 ext4_inode_block_unlocked_dio(inode); 4837 ext4_inode_block_unlocked_dio(inode);
@@ -4825,13 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4825 if (ret) 4844 if (ret)
4826 goto out_dio; 4845 goto out_dio;
4827 4846
4828 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, 4847 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4829 mode); 4848 flags, mode);
4830 if (ret) 4849 if (ret)
4831 goto out_dio; 4850 goto out_dio;
4832 } 4851 }
4852 if (!partial_begin && !partial_end)
4853 goto out_dio;
4833 4854
4834 handle = ext4_journal_start(inode, EXT4_HT_MISC, 4); 4855 /*
4856 * In worst case we have to writeout two nonadjacent unwritten
4857 * blocks and update the inode
4858 */
4859 credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
4860 if (ext4_should_journal_data(inode))
4861 credits += 2;
4862 handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
4835 if (IS_ERR(handle)) { 4863 if (IS_ERR(handle)) {
4836 ret = PTR_ERR(handle); 4864 ret = PTR_ERR(handle);
4837 ext4_std_error(inode->i_sb, ret); 4865 ext4_std_error(inode->i_sb, ret);
@@ -4839,12 +4867,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4839 } 4867 }
4840 4868
4841 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4869 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4842
4843 if (new_size) { 4870 if (new_size) {
4844 if (new_size > i_size_read(inode)) 4871 ext4_update_inode_size(inode, new_size);
4845 i_size_write(inode, new_size);
4846 if (new_size > EXT4_I(inode)->i_disksize)
4847 ext4_update_i_disksize(inode, new_size);
4848 } else { 4872 } else {
4849 /* 4873 /*
4850 * Mark that we allocate beyond EOF so the subsequent truncate 4874 * Mark that we allocate beyond EOF so the subsequent truncate
@@ -4853,7 +4877,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4853 if ((offset + len) > i_size_read(inode)) 4877 if ((offset + len) > i_size_read(inode))
4854 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 4878 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4855 } 4879 }
4856
4857 ext4_mark_inode_dirty(handle, inode); 4880 ext4_mark_inode_dirty(handle, inode);
4858 4881
4859 /* Zero out partial block at the edges of the range */ 4882 /* Zero out partial block at the edges of the range */
@@ -4880,13 +4903,11 @@ out_mutex:
4880long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 4903long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4881{ 4904{
4882 struct inode *inode = file_inode(file); 4905 struct inode *inode = file_inode(file);
4883 handle_t *handle;
4884 loff_t new_size = 0; 4906 loff_t new_size = 0;
4885 unsigned int max_blocks; 4907 unsigned int max_blocks;
4886 int ret = 0; 4908 int ret = 0;
4887 int flags; 4909 int flags;
4888 ext4_lblk_t lblk; 4910 ext4_lblk_t lblk;
4889 struct timespec tv;
4890 unsigned int blkbits = inode->i_blkbits; 4911 unsigned int blkbits = inode->i_blkbits;
4891 4912
4892 /* Return error if mode is not supported */ 4913 /* Return error if mode is not supported */
@@ -4937,36 +4958,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4937 goto out; 4958 goto out;
4938 } 4959 }
4939 4960
4940 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); 4961 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4962 flags, mode);
4941 if (ret) 4963 if (ret)
4942 goto out; 4964 goto out;
4943 4965
4944 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 4966 if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
4945 if (IS_ERR(handle)) 4967 ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
4946 goto out; 4968 EXT4_I(inode)->i_sync_tid);
4947
4948 tv = inode->i_ctime = ext4_current_time(inode);
4949
4950 if (new_size) {
4951 if (new_size > i_size_read(inode)) {
4952 i_size_write(inode, new_size);
4953 inode->i_mtime = tv;
4954 }
4955 if (new_size > EXT4_I(inode)->i_disksize)
4956 ext4_update_i_disksize(inode, new_size);
4957 } else {
4958 /*
4959 * Mark that we allocate beyond EOF so the subsequent truncate
4960 * can proceed even if the new size is the same as i_size.
4961 */
4962 if ((offset + len) > i_size_read(inode))
4963 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4964 } 4969 }
4965 ext4_mark_inode_dirty(handle, inode);
4966 if (file->f_flags & O_SYNC)
4967 ext4_handle_sync(handle);
4968
4969 ext4_journal_stop(handle);
4970out: 4970out:
4971 mutex_unlock(&inode->i_mutex); 4971 mutex_unlock(&inode->i_mutex);
4972 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); 4972 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 367a60c07cf0..3aa26e9117c4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1055,27 +1055,11 @@ static int ext4_write_end(struct file *file,
1055 } else 1055 } else
1056 copied = block_write_end(file, mapping, pos, 1056 copied = block_write_end(file, mapping, pos,
1057 len, copied, page, fsdata); 1057 len, copied, page, fsdata);
1058
1059 /* 1058 /*
1060 * No need to use i_size_read() here, the i_size 1059 * it's important to update i_size while still holding page lock:
1061 * cannot change under us because we hole i_mutex.
1062 *
1063 * But it's important to update i_size while still holding page lock:
1064 * page writeout could otherwise come in and zero beyond i_size. 1060 * page writeout could otherwise come in and zero beyond i_size.
1065 */ 1061 */
1066 if (pos + copied > inode->i_size) { 1062 i_size_changed = ext4_update_inode_size(inode, pos + copied);
1067 i_size_write(inode, pos + copied);
1068 i_size_changed = 1;
1069 }
1070
1071 if (pos + copied > EXT4_I(inode)->i_disksize) {
1072 /* We need to mark inode dirty even if
1073 * new_i_size is less that inode->i_size
1074 * but greater than i_disksize. (hint delalloc)
1075 */
1076 ext4_update_i_disksize(inode, (pos + copied));
1077 i_size_changed = 1;
1078 }
1079 unlock_page(page); 1063 unlock_page(page);
1080 page_cache_release(page); 1064 page_cache_release(page);
1081 1065
@@ -1123,7 +1107,7 @@ static int ext4_journalled_write_end(struct file *file,
1123 int ret = 0, ret2; 1107 int ret = 0, ret2;
1124 int partial = 0; 1108 int partial = 0;
1125 unsigned from, to; 1109 unsigned from, to;
1126 loff_t new_i_size; 1110 int size_changed = 0;
1127 1111
1128 trace_ext4_journalled_write_end(inode, pos, len, copied); 1112 trace_ext4_journalled_write_end(inode, pos, len, copied);
1129 from = pos & (PAGE_CACHE_SIZE - 1); 1113 from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1146,20 +1130,18 @@ static int ext4_journalled_write_end(struct file *file,
1146 if (!partial) 1130 if (!partial)
1147 SetPageUptodate(page); 1131 SetPageUptodate(page);
1148 } 1132 }
1149 new_i_size = pos + copied; 1133 size_changed = ext4_update_inode_size(inode, pos + copied);
1150 if (new_i_size > inode->i_size)
1151 i_size_write(inode, pos+copied);
1152 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 1134 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1153 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; 1135 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1154 if (new_i_size > EXT4_I(inode)->i_disksize) { 1136 unlock_page(page);
1155 ext4_update_i_disksize(inode, new_i_size); 1137 page_cache_release(page);
1138
1139 if (size_changed) {
1156 ret2 = ext4_mark_inode_dirty(handle, inode); 1140 ret2 = ext4_mark_inode_dirty(handle, inode);
1157 if (!ret) 1141 if (!ret)
1158 ret = ret2; 1142 ret = ret2;
1159 } 1143 }
1160 1144
1161 unlock_page(page);
1162 page_cache_release(page);
1163 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 1145 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1164 /* if we have allocated more blocks and copied 1146 /* if we have allocated more blocks and copied
1165 * less. We will have blocks allocated outside 1147 * less. We will have blocks allocated outside
@@ -2095,6 +2077,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2095 struct ext4_map_blocks *map = &mpd->map; 2077 struct ext4_map_blocks *map = &mpd->map;
2096 int err; 2078 int err;
2097 loff_t disksize; 2079 loff_t disksize;
2080 int progress = 0;
2098 2081
2099 mpd->io_submit.io_end->offset = 2082 mpd->io_submit.io_end->offset =
2100 ((loff_t)map->m_lblk) << inode->i_blkbits; 2083 ((loff_t)map->m_lblk) << inode->i_blkbits;
@@ -2111,8 +2094,11 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2111 * is non-zero, a commit should free up blocks. 2094 * is non-zero, a commit should free up blocks.
2112 */ 2095 */
2113 if ((err == -ENOMEM) || 2096 if ((err == -ENOMEM) ||
2114 (err == -ENOSPC && ext4_count_free_clusters(sb))) 2097 (err == -ENOSPC && ext4_count_free_clusters(sb))) {
2098 if (progress)
2099 goto update_disksize;
2115 return err; 2100 return err;
2101 }
2116 ext4_msg(sb, KERN_CRIT, 2102 ext4_msg(sb, KERN_CRIT,
2117 "Delayed block allocation failed for " 2103 "Delayed block allocation failed for "
2118 "inode %lu at logical offset %llu with" 2104 "inode %lu at logical offset %llu with"
@@ -2129,15 +2115,17 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2129 *give_up_on_write = true; 2115 *give_up_on_write = true;
2130 return err; 2116 return err;
2131 } 2117 }
2118 progress = 1;
2132 /* 2119 /*
2133 * Update buffer state, submit mapped pages, and get us new 2120 * Update buffer state, submit mapped pages, and get us new
2134 * extent to map 2121 * extent to map
2135 */ 2122 */
2136 err = mpage_map_and_submit_buffers(mpd); 2123 err = mpage_map_and_submit_buffers(mpd);
2137 if (err < 0) 2124 if (err < 0)
2138 return err; 2125 goto update_disksize;
2139 } while (map->m_len); 2126 } while (map->m_len);
2140 2127
2128update_disksize:
2141 /* 2129 /*
2142 * Update on-disk size after IO is submitted. Races with 2130 * Update on-disk size after IO is submitted. Races with
2143 * truncate are avoided by checking i_size under i_data_sem. 2131 * truncate are avoided by checking i_size under i_data_sem.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 956027711faf..8b0f9ef517d6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1412,6 +1412,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1412 int last = first + count - 1; 1412 int last = first + count - 1;
1413 struct super_block *sb = e4b->bd_sb; 1413 struct super_block *sb = e4b->bd_sb;
1414 1414
1415 if (WARN_ON(count == 0))
1416 return;
1415 BUG_ON(last >= (sb->s_blocksize << 3)); 1417 BUG_ON(last >= (sb->s_blocksize << 3));
1416 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); 1418 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1417 /* Don't bother if the block group is corrupt. */ 1419 /* Don't bother if the block group is corrupt. */
@@ -3221,6 +3223,8 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3221 int err; 3223 int err;
3222 3224
3223 if (pa == NULL) { 3225 if (pa == NULL) {
3226 if (ac->ac_f_ex.fe_len == 0)
3227 return;
3224 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b); 3228 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3225 if (err) { 3229 if (err) {
3226 /* 3230 /*
@@ -3235,6 +3239,7 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3235 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start, 3239 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3236 ac->ac_f_ex.fe_len); 3240 ac->ac_f_ex.fe_len);
3237 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group); 3241 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3242 ext4_mb_unload_buddy(&e4b);
3238 return; 3243 return;
3239 } 3244 }
3240 if (pa->pa_type == MB_INODE_PA) 3245 if (pa->pa_type == MB_INODE_PA)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b147a67baa0d..90a3cdca3f88 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1227,7 +1227,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1227 buffer */ 1227 buffer */
1228 int num = 0; 1228 int num = 0;
1229 ext4_lblk_t nblocks; 1229 ext4_lblk_t nblocks;
1230 int i, err; 1230 int i, err = 0;
1231 int namelen; 1231 int namelen;
1232 1232
1233 *res_dir = NULL; 1233 *res_dir = NULL;
@@ -1264,7 +1264,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1264 * return. Otherwise, fall back to doing a search the 1264 * return. Otherwise, fall back to doing a search the
1265 * old fashioned way. 1265 * old fashioned way.
1266 */ 1266 */
1267 if (bh || (err != ERR_BAD_DX_DIR)) 1267 if (err == -ENOENT)
1268 return NULL;
1269 if (err && err != ERR_BAD_DX_DIR)
1270 return ERR_PTR(err);
1271 if (bh)
1268 return bh; 1272 return bh;
1269 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " 1273 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1270 "falling back\n")); 1274 "falling back\n"));
@@ -1295,6 +1299,11 @@ restart:
1295 } 1299 }
1296 num++; 1300 num++;
1297 bh = ext4_getblk(NULL, dir, b++, 0, &err); 1301 bh = ext4_getblk(NULL, dir, b++, 0, &err);
1302 if (unlikely(err)) {
1303 if (ra_max == 0)
1304 return ERR_PTR(err);
1305 break;
1306 }
1298 bh_use[ra_max] = bh; 1307 bh_use[ra_max] = bh;
1299 if (bh) 1308 if (bh)
1300 ll_rw_block(READ | REQ_META | REQ_PRIO, 1309 ll_rw_block(READ | REQ_META | REQ_PRIO,
@@ -1417,6 +1426,8 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
1417 return ERR_PTR(-ENAMETOOLONG); 1426 return ERR_PTR(-ENAMETOOLONG);
1418 1427
1419 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 1428 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
1429 if (IS_ERR(bh))
1430 return (struct dentry *) bh;
1420 inode = NULL; 1431 inode = NULL;
1421 if (bh) { 1432 if (bh) {
1422 __u32 ino = le32_to_cpu(de->inode); 1433 __u32 ino = le32_to_cpu(de->inode);
@@ -1450,6 +1461,8 @@ struct dentry *ext4_get_parent(struct dentry *child)
1450 struct buffer_head *bh; 1461 struct buffer_head *bh;
1451 1462
1452 bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL); 1463 bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
1464 if (IS_ERR(bh))
1465 return (struct dentry *) bh;
1453 if (!bh) 1466 if (!bh)
1454 return ERR_PTR(-ENOENT); 1467 return ERR_PTR(-ENOENT);
1455 ino = le32_to_cpu(de->inode); 1468 ino = le32_to_cpu(de->inode);
@@ -2727,6 +2740,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2727 2740
2728 retval = -ENOENT; 2741 retval = -ENOENT;
2729 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 2742 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2743 if (IS_ERR(bh))
2744 return PTR_ERR(bh);
2730 if (!bh) 2745 if (!bh)
2731 goto end_rmdir; 2746 goto end_rmdir;
2732 2747
@@ -2794,6 +2809,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2794 2809
2795 retval = -ENOENT; 2810 retval = -ENOENT;
2796 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 2811 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2812 if (IS_ERR(bh))
2813 return PTR_ERR(bh);
2797 if (!bh) 2814 if (!bh)
2798 goto end_unlink; 2815 goto end_unlink;
2799 2816
@@ -3121,6 +3138,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3121 struct ext4_dir_entry_2 *de; 3138 struct ext4_dir_entry_2 *de;
3122 3139
3123 bh = ext4_find_entry(dir, d_name, &de, NULL); 3140 bh = ext4_find_entry(dir, d_name, &de, NULL);
3141 if (IS_ERR(bh))
3142 return PTR_ERR(bh);
3124 if (bh) { 3143 if (bh) {
3125 retval = ext4_delete_entry(handle, dir, de, bh); 3144 retval = ext4_delete_entry(handle, dir, de, bh);
3126 brelse(bh); 3145 brelse(bh);
@@ -3128,7 +3147,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3128 return retval; 3147 return retval;
3129} 3148}
3130 3149
3131static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) 3150static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
3151 int force_reread)
3132{ 3152{
3133 int retval; 3153 int retval;
3134 /* 3154 /*
@@ -3140,7 +3160,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent)
3140 if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino || 3160 if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
3141 ent->de->name_len != ent->dentry->d_name.len || 3161 ent->de->name_len != ent->dentry->d_name.len ||
3142 strncmp(ent->de->name, ent->dentry->d_name.name, 3162 strncmp(ent->de->name, ent->dentry->d_name.name,
3143 ent->de->name_len)) { 3163 ent->de->name_len) ||
3164 force_reread) {
3144 retval = ext4_find_delete_entry(handle, ent->dir, 3165 retval = ext4_find_delete_entry(handle, ent->dir,
3145 &ent->dentry->d_name); 3166 &ent->dentry->d_name);
3146 } else { 3167 } else {
@@ -3191,6 +3212,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3191 .dentry = new_dentry, 3212 .dentry = new_dentry,
3192 .inode = new_dentry->d_inode, 3213 .inode = new_dentry->d_inode,
3193 }; 3214 };
3215 int force_reread;
3194 int retval; 3216 int retval;
3195 3217
3196 dquot_initialize(old.dir); 3218 dquot_initialize(old.dir);
@@ -3202,6 +3224,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3202 dquot_initialize(new.inode); 3224 dquot_initialize(new.inode);
3203 3225
3204 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); 3226 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3227 if (IS_ERR(old.bh))
3228 return PTR_ERR(old.bh);
3205 /* 3229 /*
3206 * Check for inode number is _not_ due to possible IO errors. 3230 * Check for inode number is _not_ due to possible IO errors.
3207 * We might rmdir the source, keep it as pwd of some process 3231 * We might rmdir the source, keep it as pwd of some process
@@ -3214,6 +3238,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3214 3238
3215 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, 3239 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3216 &new.de, &new.inlined); 3240 &new.de, &new.inlined);
3241 if (IS_ERR(new.bh)) {
3242 retval = PTR_ERR(new.bh);
3243 goto end_rename;
3244 }
3217 if (new.bh) { 3245 if (new.bh) {
3218 if (!new.inode) { 3246 if (!new.inode) {
3219 brelse(new.bh); 3247 brelse(new.bh);
@@ -3246,6 +3274,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3246 if (retval) 3274 if (retval)
3247 goto end_rename; 3275 goto end_rename;
3248 } 3276 }
3277 /*
3278 * If we're renaming a file within an inline_data dir and adding or
3279 * setting the new dirent causes a conversion from inline_data to
3280 * extents/blockmap, we need to force the dirent delete code to
3281 * re-read the directory, or else we end up trying to delete a dirent
3282 * from what is now the extent tree root (or a block map).
3283 */
3284 force_reread = (new.dir->i_ino == old.dir->i_ino &&
3285 ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
3249 if (!new.bh) { 3286 if (!new.bh) {
3250 retval = ext4_add_entry(handle, new.dentry, old.inode); 3287 retval = ext4_add_entry(handle, new.dentry, old.inode);
3251 if (retval) 3288 if (retval)
@@ -3256,6 +3293,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3256 if (retval) 3293 if (retval)
3257 goto end_rename; 3294 goto end_rename;
3258 } 3295 }
3296 if (force_reread)
3297 force_reread = !ext4_test_inode_flag(new.dir,
3298 EXT4_INODE_INLINE_DATA);
3259 3299
3260 /* 3300 /*
3261 * Like most other Unix systems, set the ctime for inodes on a 3301 * Like most other Unix systems, set the ctime for inodes on a
@@ -3267,7 +3307,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3267 /* 3307 /*
3268 * ok, that's it 3308 * ok, that's it
3269 */ 3309 */
3270 ext4_rename_delete(handle, &old); 3310 ext4_rename_delete(handle, &old, force_reread);
3271 3311
3272 if (new.inode) { 3312 if (new.inode) {
3273 ext4_dec_count(handle, new.inode); 3313 ext4_dec_count(handle, new.inode);
@@ -3330,6 +3370,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3330 3370
3331 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, 3371 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
3332 &old.de, &old.inlined); 3372 &old.de, &old.inlined);
3373 if (IS_ERR(old.bh))
3374 return PTR_ERR(old.bh);
3333 /* 3375 /*
3334 * Check for inode number is _not_ due to possible IO errors. 3376 * Check for inode number is _not_ due to possible IO errors.
3335 * We might rmdir the source, keep it as pwd of some process 3377 * We might rmdir the source, keep it as pwd of some process
@@ -3342,6 +3384,10 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3342 3384
3343 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, 3385 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3344 &new.de, &new.inlined); 3386 &new.de, &new.inlined);
3387 if (IS_ERR(new.bh)) {
3388 retval = PTR_ERR(new.bh);
3389 goto end_rename;
3390 }
3345 3391
3346 /* RENAME_EXCHANGE case: old *and* new must both exist */ 3392 /* RENAME_EXCHANGE case: old *and* new must both exist */
3347 if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino) 3393 if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32b43ad154b9..0b28b36e7915 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3181 3181
3182 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3182 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3183 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 3183 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3184 /* journal checksum v2 */ 3184 /* journal checksum v3 */
3185 compat = 0; 3185 compat = 0;
3186 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; 3186 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
3187 } else { 3187 } else {
3188 /* journal checksum v1 */ 3188 /* journal checksum v1 */
3189 compat = JBD2_FEATURE_COMPAT_CHECKSUM; 3189 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
@@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3205 jbd2_journal_clear_features(sbi->s_journal, 3205 jbd2_journal_clear_features(sbi->s_journal,
3206 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3206 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3207 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | 3207 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3208 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3208 JBD2_FEATURE_INCOMPAT_CSUM_V2); 3209 JBD2_FEATURE_INCOMPAT_CSUM_V2);
3209 } 3210 }
3210 3211
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 214fe1054fce..736a348509f7 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -23,7 +23,7 @@ config F2FS_STAT_FS
23 mounted as f2fs. Each file shows the whole f2fs information. 23 mounted as f2fs. Each file shows the whole f2fs information.
24 24
25 /sys/kernel/debug/f2fs/status includes: 25 /sys/kernel/debug/f2fs/status includes:
26 - major file system information managed by f2fs currently 26 - major filesystem information managed by f2fs currently
27 - average SIT information about whole segments 27 - average SIT information about whole segments
28 - current memory footprint consumed by f2fs. 28 - current memory footprint consumed by f2fs.
29 29
@@ -68,6 +68,6 @@ config F2FS_CHECK_FS
68 bool "F2FS consistency checking feature" 68 bool "F2FS consistency checking feature"
69 depends on F2FS_FS 69 depends on F2FS_FS
70 help 70 help
71 Enables BUG_ONs which check the file system consistency in runtime. 71 Enables BUG_ONs which check the filesystem consistency in runtime.
72 72
73 If you want to improve the performance, say N. 73 If you want to improve the performance, say N.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6aeed5bada52..ec3b7a5381fa 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -160,14 +160,11 @@ static int f2fs_write_meta_page(struct page *page,
160 goto redirty_out; 160 goto redirty_out;
161 if (wbc->for_reclaim) 161 if (wbc->for_reclaim)
162 goto redirty_out; 162 goto redirty_out;
163 163 if (unlikely(f2fs_cp_error(sbi)))
164 /* Should not write any meta pages, if any IO error was occurred */ 164 goto redirty_out;
165 if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
166 goto no_write;
167 165
168 f2fs_wait_on_page_writeback(page, META); 166 f2fs_wait_on_page_writeback(page, META);
169 write_meta_page(sbi, page); 167 write_meta_page(sbi, page);
170no_write:
171 dec_page_count(sbi, F2FS_DIRTY_META); 168 dec_page_count(sbi, F2FS_DIRTY_META);
172 unlock_page(page); 169 unlock_page(page);
173 return 0; 170 return 0;
@@ -348,7 +345,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
348 return e ? true : false; 345 return e ? true : false;
349} 346}
350 347
351static void release_dirty_inode(struct f2fs_sb_info *sbi) 348void release_dirty_inode(struct f2fs_sb_info *sbi)
352{ 349{
353 struct ino_entry *e, *tmp; 350 struct ino_entry *e, *tmp;
354 int i; 351 int i;
@@ -446,8 +443,8 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
446 struct f2fs_orphan_block *orphan_blk = NULL; 443 struct f2fs_orphan_block *orphan_blk = NULL;
447 unsigned int nentries = 0; 444 unsigned int nentries = 0;
448 unsigned short index; 445 unsigned short index;
449 unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + 446 unsigned short orphan_blocks =
450 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); 447 (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans);
451 struct page *page = NULL; 448 struct page *page = NULL;
452 struct ino_entry *orphan = NULL; 449 struct ino_entry *orphan = NULL;
453 450
@@ -737,7 +734,7 @@ retry:
737/* 734/*
738 * Freeze all the FS-operations for checkpoint. 735 * Freeze all the FS-operations for checkpoint.
739 */ 736 */
740static void block_operations(struct f2fs_sb_info *sbi) 737static int block_operations(struct f2fs_sb_info *sbi)
741{ 738{
742 struct writeback_control wbc = { 739 struct writeback_control wbc = {
743 .sync_mode = WB_SYNC_ALL, 740 .sync_mode = WB_SYNC_ALL,
@@ -745,6 +742,7 @@ static void block_operations(struct f2fs_sb_info *sbi)
745 .for_reclaim = 0, 742 .for_reclaim = 0,
746 }; 743 };
747 struct blk_plug plug; 744 struct blk_plug plug;
745 int err = 0;
748 746
749 blk_start_plug(&plug); 747 blk_start_plug(&plug);
750 748
@@ -754,11 +752,15 @@ retry_flush_dents:
754 if (get_pages(sbi, F2FS_DIRTY_DENTS)) { 752 if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
755 f2fs_unlock_all(sbi); 753 f2fs_unlock_all(sbi);
756 sync_dirty_dir_inodes(sbi); 754 sync_dirty_dir_inodes(sbi);
755 if (unlikely(f2fs_cp_error(sbi))) {
756 err = -EIO;
757 goto out;
758 }
757 goto retry_flush_dents; 759 goto retry_flush_dents;
758 } 760 }
759 761
760 /* 762 /*
761 * POR: we should ensure that there is no dirty node pages 763 * POR: we should ensure that there are no dirty node pages
762 * until finishing nat/sit flush. 764 * until finishing nat/sit flush.
763 */ 765 */
764retry_flush_nodes: 766retry_flush_nodes:
@@ -767,9 +769,16 @@ retry_flush_nodes:
767 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 769 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
768 up_write(&sbi->node_write); 770 up_write(&sbi->node_write);
769 sync_node_pages(sbi, 0, &wbc); 771 sync_node_pages(sbi, 0, &wbc);
772 if (unlikely(f2fs_cp_error(sbi))) {
773 f2fs_unlock_all(sbi);
774 err = -EIO;
775 goto out;
776 }
770 goto retry_flush_nodes; 777 goto retry_flush_nodes;
771 } 778 }
779out:
772 blk_finish_plug(&plug); 780 blk_finish_plug(&plug);
781 return err;
773} 782}
774 783
775static void unblock_operations(struct f2fs_sb_info *sbi) 784static void unblock_operations(struct f2fs_sb_info *sbi)
@@ -813,8 +822,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
813 discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); 822 discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
814 823
815 /* Flush all the NAT/SIT pages */ 824 /* Flush all the NAT/SIT pages */
816 while (get_pages(sbi, F2FS_DIRTY_META)) 825 while (get_pages(sbi, F2FS_DIRTY_META)) {
817 sync_meta_pages(sbi, META, LONG_MAX); 826 sync_meta_pages(sbi, META, LONG_MAX);
827 if (unlikely(f2fs_cp_error(sbi)))
828 return;
829 }
818 830
819 next_free_nid(sbi, &last_nid); 831 next_free_nid(sbi, &last_nid);
820 832
@@ -825,7 +837,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
825 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); 837 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
826 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); 838 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
827 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); 839 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
828 for (i = 0; i < 3; i++) { 840 for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
829 ckpt->cur_node_segno[i] = 841 ckpt->cur_node_segno[i] =
830 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); 842 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
831 ckpt->cur_node_blkoff[i] = 843 ckpt->cur_node_blkoff[i] =
@@ -833,7 +845,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
833 ckpt->alloc_type[i + CURSEG_HOT_NODE] = 845 ckpt->alloc_type[i + CURSEG_HOT_NODE] =
834 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); 846 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
835 } 847 }
836 for (i = 0; i < 3; i++) { 848 for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
837 ckpt->cur_data_segno[i] = 849 ckpt->cur_data_segno[i] =
838 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); 850 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
839 ckpt->cur_data_blkoff[i] = 851 ckpt->cur_data_blkoff[i] =
@@ -848,24 +860,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
848 860
849 /* 2 cp + n data seg summary + orphan inode blocks */ 861 /* 2 cp + n data seg summary + orphan inode blocks */
850 data_sum_blocks = npages_for_summary_flush(sbi); 862 data_sum_blocks = npages_for_summary_flush(sbi);
851 if (data_sum_blocks < 3) 863 if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
852 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 864 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
853 else 865 else
854 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 866 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
855 867
856 orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) 868 orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans);
857 / F2FS_ORPHANS_PER_BLOCK;
858 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + 869 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
859 orphan_blocks); 870 orphan_blocks);
860 871
861 if (is_umount) { 872 if (is_umount) {
862 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 873 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
863 ckpt->cp_pack_total_block_count = cpu_to_le32(2 + 874 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
864 cp_payload_blks + data_sum_blocks + 875 cp_payload_blks + data_sum_blocks +
865 orphan_blocks + NR_CURSEG_NODE_TYPE); 876 orphan_blocks + NR_CURSEG_NODE_TYPE);
866 } else { 877 } else {
867 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 878 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
868 ckpt->cp_pack_total_block_count = cpu_to_le32(2 + 879 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
869 cp_payload_blks + data_sum_blocks + 880 cp_payload_blks + data_sum_blocks +
870 orphan_blocks); 881 orphan_blocks);
871 } 882 }
@@ -924,6 +935,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
924 /* wait for previous submitted node/meta pages writeback */ 935 /* wait for previous submitted node/meta pages writeback */
925 wait_on_all_pages_writeback(sbi); 936 wait_on_all_pages_writeback(sbi);
926 937
938 if (unlikely(f2fs_cp_error(sbi)))
939 return;
940
927 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); 941 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
928 filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); 942 filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
929 943
@@ -934,15 +948,17 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
934 /* Here, we only have one bio having CP pack */ 948 /* Here, we only have one bio having CP pack */
935 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 949 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
936 950
937 if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { 951 release_dirty_inode(sbi);
938 clear_prefree_segments(sbi); 952
939 release_dirty_inode(sbi); 953 if (unlikely(f2fs_cp_error(sbi)))
940 F2FS_RESET_SB_DIRT(sbi); 954 return;
941 } 955
956 clear_prefree_segments(sbi);
957 F2FS_RESET_SB_DIRT(sbi);
942} 958}
943 959
944/* 960/*
945 * We guarantee that this checkpoint procedure should not fail. 961 * We guarantee that this checkpoint procedure will not fail.
946 */ 962 */
947void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 963void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
948{ 964{
@@ -952,7 +968,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
952 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); 968 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
953 969
954 mutex_lock(&sbi->cp_mutex); 970 mutex_lock(&sbi->cp_mutex);
955 block_operations(sbi); 971
972 if (!sbi->s_dirty)
973 goto out;
974 if (unlikely(f2fs_cp_error(sbi)))
975 goto out;
976 if (block_operations(sbi))
977 goto out;
956 978
957 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); 979 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
958 980
@@ -976,9 +998,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
976 do_checkpoint(sbi, is_umount); 998 do_checkpoint(sbi, is_umount);
977 999
978 unblock_operations(sbi); 1000 unblock_operations(sbi);
979 mutex_unlock(&sbi->cp_mutex);
980
981 stat_inc_cp_count(sbi->stat_info); 1001 stat_inc_cp_count(sbi->stat_info);
1002out:
1003 mutex_unlock(&sbi->cp_mutex);
982 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 1004 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
983} 1005}
984 1006
@@ -999,8 +1021,8 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
999 * for cp pack we can have max 1020*504 orphan entries 1021 * for cp pack we can have max 1020*504 orphan entries
1000 */ 1022 */
1001 sbi->n_orphans = 0; 1023 sbi->n_orphans = 0;
1002 sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) 1024 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1003 * F2FS_ORPHANS_PER_BLOCK; 1025 NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
1004} 1026}
1005 1027
1006int __init create_checkpoint_caches(void) 1028int __init create_checkpoint_caches(void)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 03313099c51c..76de83e25a89 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -53,7 +53,7 @@ static void f2fs_write_end_io(struct bio *bio, int err)
53 struct page *page = bvec->bv_page; 53 struct page *page = bvec->bv_page;
54 54
55 if (unlikely(err)) { 55 if (unlikely(err)) {
56 SetPageError(page); 56 set_page_dirty(page);
57 set_bit(AS_EIO, &page->mapping->flags); 57 set_bit(AS_EIO, &page->mapping->flags);
58 f2fs_stop_checkpoint(sbi); 58 f2fs_stop_checkpoint(sbi);
59 } 59 }
@@ -691,7 +691,7 @@ get_next:
691 allocated = true; 691 allocated = true;
692 blkaddr = dn.data_blkaddr; 692 blkaddr = dn.data_blkaddr;
693 } 693 }
694 /* Give more consecutive addresses for the read ahead */ 694 /* Give more consecutive addresses for the readahead */
695 if (blkaddr == (bh_result->b_blocknr + ofs)) { 695 if (blkaddr == (bh_result->b_blocknr + ofs)) {
696 ofs++; 696 ofs++;
697 dn.ofs_in_node++; 697 dn.ofs_in_node++;
@@ -739,7 +739,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
739 739
740 trace_f2fs_readpage(page, DATA); 740 trace_f2fs_readpage(page, DATA);
741 741
742 /* If the file has inline data, try to read it directlly */ 742 /* If the file has inline data, try to read it directly */
743 if (f2fs_has_inline_data(inode)) 743 if (f2fs_has_inline_data(inode))
744 ret = f2fs_read_inline_data(inode, page); 744 ret = f2fs_read_inline_data(inode, page);
745 else 745 else
@@ -836,10 +836,19 @@ write:
836 836
837 /* Dentry blocks are controlled by checkpoint */ 837 /* Dentry blocks are controlled by checkpoint */
838 if (S_ISDIR(inode->i_mode)) { 838 if (S_ISDIR(inode->i_mode)) {
839 if (unlikely(f2fs_cp_error(sbi)))
840 goto redirty_out;
839 err = do_write_data_page(page, &fio); 841 err = do_write_data_page(page, &fio);
840 goto done; 842 goto done;
841 } 843 }
842 844
845 /* we should bypass data pages to proceed the kworkder jobs */
846 if (unlikely(f2fs_cp_error(sbi))) {
847 SetPageError(page);
848 unlock_page(page);
849 return 0;
850 }
851
843 if (!wbc->for_reclaim) 852 if (!wbc->for_reclaim)
844 need_balance_fs = true; 853 need_balance_fs = true;
845 else if (has_not_enough_free_secs(sbi, 0)) 854 else if (has_not_enough_free_secs(sbi, 0))
@@ -927,7 +936,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
927 936
928 if (to > inode->i_size) { 937 if (to > inode->i_size) {
929 truncate_pagecache(inode, inode->i_size); 938 truncate_pagecache(inode, inode->i_size);
930 truncate_blocks(inode, inode->i_size); 939 truncate_blocks(inode, inode->i_size, true);
931 } 940 }
932} 941}
933 942
@@ -946,7 +955,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
946 955
947 f2fs_balance_fs(sbi); 956 f2fs_balance_fs(sbi);
948repeat: 957repeat:
949 err = f2fs_convert_inline_data(inode, pos + len); 958 err = f2fs_convert_inline_data(inode, pos + len, NULL);
950 if (err) 959 if (err)
951 goto fail; 960 goto fail;
952 961
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a441ba33be11..fecebdbfd781 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -32,7 +32,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
32 struct f2fs_stat_info *si = F2FS_STAT(sbi); 32 struct f2fs_stat_info *si = F2FS_STAT(sbi);
33 int i; 33 int i;
34 34
35 /* valid check of the segment numbers */ 35 /* validation check of the segment numbers */
36 si->hit_ext = sbi->read_hit_ext; 36 si->hit_ext = sbi->read_hit_ext;
37 si->total_ext = sbi->total_hit_ext; 37 si->total_ext = sbi->total_hit_ext;
38 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); 38 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
@@ -152,7 +152,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
152 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); 152 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi));
153 si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); 153 si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi));
154 154
155 /* buld nm */ 155 /* build nm */
156 si->base_mem += sizeof(struct f2fs_nm_info); 156 si->base_mem += sizeof(struct f2fs_nm_info);
157 si->base_mem += __bitmap_size(sbi, NAT_BITMAP); 157 si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
158 158
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index bcf893c3d903..155fb056b7f1 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -124,7 +124,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
124 124
125 /* 125 /*
126 * For the most part, it should be a bug when name_len is zero. 126 * For the most part, it should be a bug when name_len is zero.
127 * We stop here for figuring out where the bugs are occurred. 127 * We stop here for figuring out where the bugs has occurred.
128 */ 128 */
129 f2fs_bug_on(!de->name_len); 129 f2fs_bug_on(!de->name_len);
130 130
@@ -391,7 +391,7 @@ put_error:
391error: 391error:
392 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ 392 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
393 truncate_inode_pages(&inode->i_data, 0); 393 truncate_inode_pages(&inode->i_data, 0);
394 truncate_blocks(inode, 0); 394 truncate_blocks(inode, 0, false);
395 remove_dirty_dir_inode(inode); 395 remove_dirty_dir_inode(inode);
396 remove_inode_page(inode); 396 remove_inode_page(inode);
397 return ERR_PTR(err); 397 return ERR_PTR(err);
@@ -563,7 +563,7 @@ fail:
563} 563}
564 564
565/* 565/*
566 * It only removes the dentry from the dentry page,corresponding name 566 * It only removes the dentry from the dentry page, corresponding name
567 * entry in name page does not need to be touched during deletion. 567 * entry in name page does not need to be touched during deletion.
568 */ 568 */
569void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, 569void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4dab5338a97a..e921242186f6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -24,7 +24,7 @@
24#define f2fs_bug_on(condition) BUG_ON(condition) 24#define f2fs_bug_on(condition) BUG_ON(condition)
25#define f2fs_down_write(x, y) down_write_nest_lock(x, y) 25#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
26#else 26#else
27#define f2fs_bug_on(condition) 27#define f2fs_bug_on(condition) WARN_ON(condition)
28#define f2fs_down_write(x, y) down_write(x) 28#define f2fs_down_write(x, y) down_write(x)
29#endif 29#endif
30 30
@@ -395,7 +395,7 @@ enum count_type {
395}; 395};
396 396
397/* 397/*
398 * The below are the page types of bios used in submti_bio(). 398 * The below are the page types of bios used in submit_bio().
399 * The available types are: 399 * The available types are:
400 * DATA User data pages. It operates as async mode. 400 * DATA User data pages. It operates as async mode.
401 * NODE Node pages. It operates as async mode. 401 * NODE Node pages. It operates as async mode.
@@ -470,7 +470,7 @@ struct f2fs_sb_info {
470 struct list_head dir_inode_list; /* dir inode list */ 470 struct list_head dir_inode_list; /* dir inode list */
471 spinlock_t dir_inode_lock; /* for dir inode list lock */ 471 spinlock_t dir_inode_lock; /* for dir inode list lock */
472 472
473 /* basic file system units */ 473 /* basic filesystem units */
474 unsigned int log_sectors_per_block; /* log2 sectors per block */ 474 unsigned int log_sectors_per_block; /* log2 sectors per block */
475 unsigned int log_blocksize; /* log2 block size */ 475 unsigned int log_blocksize; /* log2 block size */
476 unsigned int blocksize; /* block size */ 476 unsigned int blocksize; /* block size */
@@ -799,7 +799,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
799 799
800 /* 800 /*
801 * odd numbered checkpoint should at cp segment 0 801 * odd numbered checkpoint should at cp segment 0
802 * and even segent must be at cp segment 1 802 * and even segment must be at cp segment 1
803 */ 803 */
804 if (!(ckpt_version & 1)) 804 if (!(ckpt_version & 1))
805 start_addr += sbi->blocks_per_seg; 805 start_addr += sbi->blocks_per_seg;
@@ -1096,6 +1096,11 @@ static inline int f2fs_readonly(struct super_block *sb)
1096 return sb->s_flags & MS_RDONLY; 1096 return sb->s_flags & MS_RDONLY;
1097} 1097}
1098 1098
1099static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi)
1100{
1101 return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
1102}
1103
1099static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) 1104static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
1100{ 1105{
1101 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 1106 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
@@ -1117,7 +1122,7 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
1117 */ 1122 */
1118int f2fs_sync_file(struct file *, loff_t, loff_t, int); 1123int f2fs_sync_file(struct file *, loff_t, loff_t, int);
1119void truncate_data_blocks(struct dnode_of_data *); 1124void truncate_data_blocks(struct dnode_of_data *);
1120int truncate_blocks(struct inode *, u64); 1125int truncate_blocks(struct inode *, u64, bool);
1121void f2fs_truncate(struct inode *); 1126void f2fs_truncate(struct inode *);
1122int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 1127int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
1123int f2fs_setattr(struct dentry *, struct iattr *); 1128int f2fs_setattr(struct dentry *, struct iattr *);
@@ -1202,10 +1207,8 @@ int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
1202bool alloc_nid(struct f2fs_sb_info *, nid_t *); 1207bool alloc_nid(struct f2fs_sb_info *, nid_t *);
1203void alloc_nid_done(struct f2fs_sb_info *, nid_t); 1208void alloc_nid_done(struct f2fs_sb_info *, nid_t);
1204void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1209void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
1205void recover_node_page(struct f2fs_sb_info *, struct page *,
1206 struct f2fs_summary *, struct node_info *, block_t);
1207void recover_inline_xattr(struct inode *, struct page *); 1210void recover_inline_xattr(struct inode *, struct page *);
1208bool recover_xattr_data(struct inode *, struct page *, block_t); 1211void recover_xattr_data(struct inode *, struct page *, block_t);
1209int recover_inode_page(struct f2fs_sb_info *, struct page *); 1212int recover_inode_page(struct f2fs_sb_info *, struct page *);
1210int restore_node_summary(struct f2fs_sb_info *, unsigned int, 1213int restore_node_summary(struct f2fs_sb_info *, unsigned int,
1211 struct f2fs_summary_block *); 1214 struct f2fs_summary_block *);
@@ -1238,8 +1241,6 @@ void write_data_page(struct page *, struct dnode_of_data *, block_t *,
1238void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); 1241void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *);
1239void recover_data_page(struct f2fs_sb_info *, struct page *, 1242void recover_data_page(struct f2fs_sb_info *, struct page *,
1240 struct f2fs_summary *, block_t, block_t); 1243 struct f2fs_summary *, block_t, block_t);
1241void rewrite_node_page(struct f2fs_sb_info *, struct page *,
1242 struct f2fs_summary *, block_t, block_t);
1243void allocate_data_block(struct f2fs_sb_info *, struct page *, 1244void allocate_data_block(struct f2fs_sb_info *, struct page *,
1244 block_t, block_t *, struct f2fs_summary *, int); 1245 block_t, block_t *, struct f2fs_summary *, int);
1245void f2fs_wait_on_page_writeback(struct page *, enum page_type); 1246void f2fs_wait_on_page_writeback(struct page *, enum page_type);
@@ -1262,6 +1263,7 @@ int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
1262long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1263long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1263void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1264void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1264void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1265void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1266void release_dirty_inode(struct f2fs_sb_info *);
1265bool exist_written_data(struct f2fs_sb_info *, nid_t, int); 1267bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
1266int acquire_orphan_inode(struct f2fs_sb_info *); 1268int acquire_orphan_inode(struct f2fs_sb_info *);
1267void release_orphan_inode(struct f2fs_sb_info *); 1269void release_orphan_inode(struct f2fs_sb_info *);
@@ -1439,8 +1441,8 @@ extern const struct inode_operations f2fs_special_inode_operations;
1439 */ 1441 */
1440bool f2fs_may_inline(struct inode *); 1442bool f2fs_may_inline(struct inode *);
1441int f2fs_read_inline_data(struct inode *, struct page *); 1443int f2fs_read_inline_data(struct inode *, struct page *);
1442int f2fs_convert_inline_data(struct inode *, pgoff_t); 1444int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *);
1443int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); 1445int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
1444void truncate_inline_data(struct inode *, u64); 1446void truncate_inline_data(struct inode *, u64);
1445int recover_inline_data(struct inode *, struct page *); 1447bool recover_inline_data(struct inode *, struct page *);
1446#endif 1448#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 208f1a9bd569..060aee65aee8 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -41,6 +41,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
41 41
42 sb_start_pagefault(inode->i_sb); 42 sb_start_pagefault(inode->i_sb);
43 43
44 /* force to convert with normal data indices */
45 err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page);
46 if (err)
47 goto out;
48
44 /* block allocation */ 49 /* block allocation */
45 f2fs_lock_op(sbi); 50 f2fs_lock_op(sbi);
46 set_new_dnode(&dn, inode, NULL, NULL, 0); 51 set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -110,6 +115,25 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
110 return 1; 115 return 1;
111} 116}
112 117
118static inline bool need_do_checkpoint(struct inode *inode)
119{
120 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
121 bool need_cp = false;
122
123 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
124 need_cp = true;
125 else if (file_wrong_pino(inode))
126 need_cp = true;
127 else if (!space_for_roll_forward(sbi))
128 need_cp = true;
129 else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
130 need_cp = true;
131 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
132 need_cp = true;
133
134 return need_cp;
135}
136
113int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 137int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
114{ 138{
115 struct inode *inode = file->f_mapping->host; 139 struct inode *inode = file->f_mapping->host;
@@ -154,23 +178,12 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
154 /* guarantee free sections for fsync */ 178 /* guarantee free sections for fsync */
155 f2fs_balance_fs(sbi); 179 f2fs_balance_fs(sbi);
156 180
157 down_read(&fi->i_sem);
158
159 /* 181 /*
160 * Both of fdatasync() and fsync() are able to be recovered from 182 * Both of fdatasync() and fsync() are able to be recovered from
161 * sudden-power-off. 183 * sudden-power-off.
162 */ 184 */
163 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) 185 down_read(&fi->i_sem);
164 need_cp = true; 186 need_cp = need_do_checkpoint(inode);
165 else if (file_wrong_pino(inode))
166 need_cp = true;
167 else if (!space_for_roll_forward(sbi))
168 need_cp = true;
169 else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
170 need_cp = true;
171 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
172 need_cp = true;
173
174 up_read(&fi->i_sem); 187 up_read(&fi->i_sem);
175 188
176 if (need_cp) { 189 if (need_cp) {
@@ -288,7 +301,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
288 if (err && err != -ENOENT) { 301 if (err && err != -ENOENT) {
289 goto fail; 302 goto fail;
290 } else if (err == -ENOENT) { 303 } else if (err == -ENOENT) {
291 /* direct node is not exist */ 304 /* direct node does not exists */
292 if (whence == SEEK_DATA) { 305 if (whence == SEEK_DATA) {
293 pgofs = PGOFS_OF_NEXT_DNODE(pgofs, 306 pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
294 F2FS_I(inode)); 307 F2FS_I(inode));
@@ -417,7 +430,7 @@ out:
417 f2fs_put_page(page, 1); 430 f2fs_put_page(page, 1);
418} 431}
419 432
420int truncate_blocks(struct inode *inode, u64 from) 433int truncate_blocks(struct inode *inode, u64 from, bool lock)
421{ 434{
422 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 435 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
423 unsigned int blocksize = inode->i_sb->s_blocksize; 436 unsigned int blocksize = inode->i_sb->s_blocksize;
@@ -433,14 +446,16 @@ int truncate_blocks(struct inode *inode, u64 from)
433 free_from = (pgoff_t) 446 free_from = (pgoff_t)
434 ((from + blocksize - 1) >> (sbi->log_blocksize)); 447 ((from + blocksize - 1) >> (sbi->log_blocksize));
435 448
436 f2fs_lock_op(sbi); 449 if (lock)
450 f2fs_lock_op(sbi);
437 451
438 set_new_dnode(&dn, inode, NULL, NULL, 0); 452 set_new_dnode(&dn, inode, NULL, NULL, 0);
439 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); 453 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
440 if (err) { 454 if (err) {
441 if (err == -ENOENT) 455 if (err == -ENOENT)
442 goto free_next; 456 goto free_next;
443 f2fs_unlock_op(sbi); 457 if (lock)
458 f2fs_unlock_op(sbi);
444 trace_f2fs_truncate_blocks_exit(inode, err); 459 trace_f2fs_truncate_blocks_exit(inode, err);
445 return err; 460 return err;
446 } 461 }
@@ -458,7 +473,8 @@ int truncate_blocks(struct inode *inode, u64 from)
458 f2fs_put_dnode(&dn); 473 f2fs_put_dnode(&dn);
459free_next: 474free_next:
460 err = truncate_inode_blocks(inode, free_from); 475 err = truncate_inode_blocks(inode, free_from);
461 f2fs_unlock_op(sbi); 476 if (lock)
477 f2fs_unlock_op(sbi);
462done: 478done:
463 /* lastly zero out the first data page */ 479 /* lastly zero out the first data page */
464 truncate_partial_data_page(inode, from); 480 truncate_partial_data_page(inode, from);
@@ -475,7 +491,7 @@ void f2fs_truncate(struct inode *inode)
475 491
476 trace_f2fs_truncate(inode); 492 trace_f2fs_truncate(inode);
477 493
478 if (!truncate_blocks(inode, i_size_read(inode))) { 494 if (!truncate_blocks(inode, i_size_read(inode), true)) {
479 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 495 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
480 mark_inode_dirty(inode); 496 mark_inode_dirty(inode);
481 } 497 }
@@ -533,7 +549,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
533 549
534 if ((attr->ia_valid & ATTR_SIZE) && 550 if ((attr->ia_valid & ATTR_SIZE) &&
535 attr->ia_size != i_size_read(inode)) { 551 attr->ia_size != i_size_read(inode)) {
536 err = f2fs_convert_inline_data(inode, attr->ia_size); 552 err = f2fs_convert_inline_data(inode, attr->ia_size, NULL);
537 if (err) 553 if (err)
538 return err; 554 return err;
539 555
@@ -622,7 +638,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
622 loff_t off_start, off_end; 638 loff_t off_start, off_end;
623 int ret = 0; 639 int ret = 0;
624 640
625 ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1); 641 ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
626 if (ret) 642 if (ret)
627 return ret; 643 return ret;
628 644
@@ -678,7 +694,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
678 if (ret) 694 if (ret)
679 return ret; 695 return ret;
680 696
681 ret = f2fs_convert_inline_data(inode, offset + len); 697 ret = f2fs_convert_inline_data(inode, offset + len, NULL);
682 if (ret) 698 if (ret)
683 return ret; 699 return ret;
684 700
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d7947d90ccc3..943a31db7cc3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -58,7 +58,7 @@ static int gc_thread_func(void *data)
58 * 3. IO subsystem is idle by checking the # of requests in 58 * 3. IO subsystem is idle by checking the # of requests in
59 * bdev's request list. 59 * bdev's request list.
60 * 60 *
61 * Note) We have to avoid triggering GCs too much frequently. 61 * Note) We have to avoid triggering GCs frequently.
62 * Because it is possible that some segments can be 62 * Because it is possible that some segments can be
63 * invalidated soon after by user update or deletion. 63 * invalidated soon after by user update or deletion.
64 * So, I'd like to wait some time to collect dirty segments. 64 * So, I'd like to wait some time to collect dirty segments.
@@ -222,7 +222,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
222 222
223 u = (vblocks * 100) >> sbi->log_blocks_per_seg; 223 u = (vblocks * 100) >> sbi->log_blocks_per_seg;
224 224
225 /* Handle if the system time is changed by user */ 225 /* Handle if the system time has changed by the user */
226 if (mtime < sit_i->min_mtime) 226 if (mtime < sit_i->min_mtime)
227 sit_i->min_mtime = mtime; 227 sit_i->min_mtime = mtime;
228 if (mtime > sit_i->max_mtime) 228 if (mtime > sit_i->max_mtime)
@@ -593,7 +593,7 @@ next_step:
593 593
594 if (phase == 2) { 594 if (phase == 2) {
595 inode = f2fs_iget(sb, dni.ino); 595 inode = f2fs_iget(sb, dni.ino);
596 if (IS_ERR(inode)) 596 if (IS_ERR(inode) || is_bad_inode(inode))
597 continue; 597 continue;
598 598
599 start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); 599 start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
@@ -693,7 +693,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
693gc_more: 693gc_more:
694 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 694 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
695 goto stop; 695 goto stop;
696 if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) 696 if (unlikely(f2fs_cp_error(sbi)))
697 goto stop; 697 goto stop;
698 698
699 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 699 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 5d5eb6047bf4..16f0b2b22999 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -91,7 +91,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
91 block_t invalid_user_blocks = sbi->user_block_count - 91 block_t invalid_user_blocks = sbi->user_block_count -
92 written_block_count(sbi); 92 written_block_count(sbi);
93 /* 93 /*
94 * Background GC is triggered with the following condition. 94 * Background GC is triggered with the following conditions.
95 * 1. There are a number of invalid blocks. 95 * 1. There are a number of invalid blocks.
96 * 2. There is not enough free space. 96 * 2. There is not enough free space.
97 */ 97 */
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 948d17bf7281..a844fcfb9a8d 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -42,7 +42,8 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[])
42 buf[1] += b1; 42 buf[1] += b1;
43} 43}
44 44
45static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) 45static void str2hashbuf(const unsigned char *msg, size_t len,
46 unsigned int *buf, int num)
46{ 47{
47 unsigned pad, val; 48 unsigned pad, val;
48 int i; 49 int i;
@@ -73,9 +74,9 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
73{ 74{
74 __u32 hash; 75 __u32 hash;
75 f2fs_hash_t f2fs_hash; 76 f2fs_hash_t f2fs_hash;
76 const char *p; 77 const unsigned char *p;
77 __u32 in[8], buf[4]; 78 __u32 in[8], buf[4];
78 const char *name = name_info->name; 79 const unsigned char *name = name_info->name;
79 size_t len = name_info->len; 80 size_t len = name_info->len;
80 81
81 if ((len <= 2) && (name[0] == '.') && 82 if ((len <= 2) && (name[0] == '.') &&
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 5beeccef9ae1..3e8ecdf3742b 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -68,7 +68,7 @@ out:
68 68
69static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) 69static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
70{ 70{
71 int err; 71 int err = 0;
72 struct page *ipage; 72 struct page *ipage;
73 struct dnode_of_data dn; 73 struct dnode_of_data dn;
74 void *src_addr, *dst_addr; 74 void *src_addr, *dst_addr;
@@ -86,6 +86,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
86 goto out; 86 goto out;
87 } 87 }
88 88
89 /* someone else converted inline_data already */
90 if (!f2fs_has_inline_data(inode))
91 goto out;
92
89 /* 93 /*
90 * i_addr[0] is not used for inline data, 94 * i_addr[0] is not used for inline data,
91 * so reserving new block will not destroy inline data 95 * so reserving new block will not destroy inline data
@@ -124,9 +128,10 @@ out:
124 return err; 128 return err;
125} 129}
126 130
127int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) 131int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size,
132 struct page *page)
128{ 133{
129 struct page *page; 134 struct page *new_page = page;
130 int err; 135 int err;
131 136
132 if (!f2fs_has_inline_data(inode)) 137 if (!f2fs_has_inline_data(inode))
@@ -134,17 +139,20 @@ int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size)
134 else if (to_size <= MAX_INLINE_DATA) 139 else if (to_size <= MAX_INLINE_DATA)
135 return 0; 140 return 0;
136 141
137 page = grab_cache_page(inode->i_mapping, 0); 142 if (!page || page->index != 0) {
138 if (!page) 143 new_page = grab_cache_page(inode->i_mapping, 0);
139 return -ENOMEM; 144 if (!new_page)
145 return -ENOMEM;
146 }
140 147
141 err = __f2fs_convert_inline_data(inode, page); 148 err = __f2fs_convert_inline_data(inode, new_page);
142 f2fs_put_page(page, 1); 149 if (!page || page->index != 0)
150 f2fs_put_page(new_page, 1);
143 return err; 151 return err;
144} 152}
145 153
146int f2fs_write_inline_data(struct inode *inode, 154int f2fs_write_inline_data(struct inode *inode,
147 struct page *page, unsigned size) 155 struct page *page, unsigned size)
148{ 156{
149 void *src_addr, *dst_addr; 157 void *src_addr, *dst_addr;
150 struct page *ipage; 158 struct page *ipage;
@@ -199,7 +207,7 @@ void truncate_inline_data(struct inode *inode, u64 from)
199 f2fs_put_page(ipage, 1); 207 f2fs_put_page(ipage, 1);
200} 208}
201 209
202int recover_inline_data(struct inode *inode, struct page *npage) 210bool recover_inline_data(struct inode *inode, struct page *npage)
203{ 211{
204 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 212 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
205 struct f2fs_inode *ri = NULL; 213 struct f2fs_inode *ri = NULL;
@@ -218,7 +226,7 @@ int recover_inline_data(struct inode *inode, struct page *npage)
218 ri = F2FS_INODE(npage); 226 ri = F2FS_INODE(npage);
219 227
220 if (f2fs_has_inline_data(inode) && 228 if (f2fs_has_inline_data(inode) &&
221 ri && ri->i_inline & F2FS_INLINE_DATA) { 229 ri && (ri->i_inline & F2FS_INLINE_DATA)) {
222process_inline: 230process_inline:
223 ipage = get_node_page(sbi, inode->i_ino); 231 ipage = get_node_page(sbi, inode->i_ino);
224 f2fs_bug_on(IS_ERR(ipage)); 232 f2fs_bug_on(IS_ERR(ipage));
@@ -230,7 +238,7 @@ process_inline:
230 memcpy(dst_addr, src_addr, MAX_INLINE_DATA); 238 memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
231 update_inode(inode, ipage); 239 update_inode(inode, ipage);
232 f2fs_put_page(ipage, 1); 240 f2fs_put_page(ipage, 1);
233 return -1; 241 return true;
234 } 242 }
235 243
236 if (f2fs_has_inline_data(inode)) { 244 if (f2fs_has_inline_data(inode)) {
@@ -242,10 +250,10 @@ process_inline:
242 clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 250 clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
243 update_inode(inode, ipage); 251 update_inode(inode, ipage);
244 f2fs_put_page(ipage, 1); 252 f2fs_put_page(ipage, 1);
245 } else if (ri && ri->i_inline & F2FS_INLINE_DATA) { 253 } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
246 truncate_blocks(inode, 0); 254 truncate_blocks(inode, 0, false);
247 set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 255 set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
248 goto process_inline; 256 goto process_inline;
249 } 257 }
250 return 0; 258 return false;
251} 259}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 27b03776ffd2..ee103fd7283c 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -134,9 +134,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
134 return 0; 134 return 0;
135out: 135out:
136 clear_nlink(inode); 136 clear_nlink(inode);
137 unlock_new_inode(inode); 137 iget_failed(inode);
138 make_bad_inode(inode);
139 iput(inode);
140 alloc_nid_failed(sbi, ino); 138 alloc_nid_failed(sbi, ino);
141 return err; 139 return err;
142} 140}
@@ -229,7 +227,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
229 f2fs_delete_entry(de, page, inode); 227 f2fs_delete_entry(de, page, inode);
230 f2fs_unlock_op(sbi); 228 f2fs_unlock_op(sbi);
231 229
232 /* In order to evict this inode, we set it dirty */ 230 /* In order to evict this inode, we set it dirty */
233 mark_inode_dirty(inode); 231 mark_inode_dirty(inode);
234fail: 232fail:
235 trace_f2fs_unlink_exit(inode, err); 233 trace_f2fs_unlink_exit(inode, err);
@@ -267,9 +265,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
267 return err; 265 return err;
268out: 266out:
269 clear_nlink(inode); 267 clear_nlink(inode);
270 unlock_new_inode(inode); 268 iget_failed(inode);
271 make_bad_inode(inode);
272 iput(inode);
273 alloc_nid_failed(sbi, inode->i_ino); 269 alloc_nid_failed(sbi, inode->i_ino);
274 return err; 270 return err;
275} 271}
@@ -308,9 +304,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
308out_fail: 304out_fail:
309 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 305 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
310 clear_nlink(inode); 306 clear_nlink(inode);
311 unlock_new_inode(inode); 307 iget_failed(inode);
312 make_bad_inode(inode);
313 iput(inode);
314 alloc_nid_failed(sbi, inode->i_ino); 308 alloc_nid_failed(sbi, inode->i_ino);
315 return err; 309 return err;
316} 310}
@@ -354,9 +348,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
354 return 0; 348 return 0;
355out: 349out:
356 clear_nlink(inode); 350 clear_nlink(inode);
357 unlock_new_inode(inode); 351 iget_failed(inode);
358 make_bad_inode(inode);
359 iput(inode);
360 alloc_nid_failed(sbi, inode->i_ino); 352 alloc_nid_failed(sbi, inode->i_ino);
361 return err; 353 return err;
362} 354}
@@ -688,9 +680,7 @@ release_out:
688out: 680out:
689 f2fs_unlock_op(sbi); 681 f2fs_unlock_op(sbi);
690 clear_nlink(inode); 682 clear_nlink(inode);
691 unlock_new_inode(inode); 683 iget_failed(inode);
692 make_bad_inode(inode);
693 iput(inode);
694 alloc_nid_failed(sbi, inode->i_ino); 684 alloc_nid_failed(sbi, inode->i_ino);
695 return err; 685 return err;
696} 686}
@@ -704,7 +694,6 @@ const struct inode_operations f2fs_dir_inode_operations = {
704 .mkdir = f2fs_mkdir, 694 .mkdir = f2fs_mkdir,
705 .rmdir = f2fs_rmdir, 695 .rmdir = f2fs_rmdir,
706 .mknod = f2fs_mknod, 696 .mknod = f2fs_mknod,
707 .rename = f2fs_rename,
708 .rename2 = f2fs_rename2, 697 .rename2 = f2fs_rename2,
709 .tmpfile = f2fs_tmpfile, 698 .tmpfile = f2fs_tmpfile,
710 .getattr = f2fs_getattr, 699 .getattr = f2fs_getattr,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d3d90d284631..45378196e19a 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -237,7 +237,7 @@ retry:
237 nat_get_blkaddr(e) != NULL_ADDR && 237 nat_get_blkaddr(e) != NULL_ADDR &&
238 new_blkaddr == NEW_ADDR); 238 new_blkaddr == NEW_ADDR);
239 239
240 /* increament version no as node is removed */ 240 /* increment version no as node is removed */
241 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { 241 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
242 unsigned char version = nat_get_version(e); 242 unsigned char version = nat_get_version(e);
243 nat_set_version(e, inc_node_version(version)); 243 nat_set_version(e, inc_node_version(version));
@@ -274,7 +274,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
274} 274}
275 275
276/* 276/*
277 * This function returns always success 277 * This function always returns success
278 */ 278 */
279void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) 279void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
280{ 280{
@@ -650,7 +650,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
650 650
651 /* get indirect nodes in the path */ 651 /* get indirect nodes in the path */
652 for (i = 0; i < idx + 1; i++) { 652 for (i = 0; i < idx + 1; i++) {
653 /* refernece count'll be increased */ 653 /* reference count'll be increased */
654 pages[i] = get_node_page(sbi, nid[i]); 654 pages[i] = get_node_page(sbi, nid[i]);
655 if (IS_ERR(pages[i])) { 655 if (IS_ERR(pages[i])) {
656 err = PTR_ERR(pages[i]); 656 err = PTR_ERR(pages[i]);
@@ -823,22 +823,26 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
823 */ 823 */
824void remove_inode_page(struct inode *inode) 824void remove_inode_page(struct inode *inode)
825{ 825{
826 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
827 struct page *page;
828 nid_t ino = inode->i_ino;
829 struct dnode_of_data dn; 826 struct dnode_of_data dn;
830 827
831 page = get_node_page(sbi, ino); 828 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
832 if (IS_ERR(page)) 829 if (get_dnode_of_data(&dn, 0, LOOKUP_NODE))
833 return; 830 return;
834 831
835 if (truncate_xattr_node(inode, page)) { 832 if (truncate_xattr_node(inode, dn.inode_page)) {
836 f2fs_put_page(page, 1); 833 f2fs_put_dnode(&dn);
837 return; 834 return;
838 } 835 }
839 /* 0 is possible, after f2fs_new_inode() is failed */ 836
837 /* remove potential inline_data blocks */
838 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
839 S_ISLNK(inode->i_mode))
840 truncate_data_blocks_range(&dn, 1);
841
842 /* 0 is possible, after f2fs_new_inode() has failed */
840 f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); 843 f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
841 set_new_dnode(&dn, inode, page, page, ino); 844
845 /* will put inode & node pages */
842 truncate_node(&dn); 846 truncate_node(&dn);
843} 847}
844 848
@@ -1129,8 +1133,11 @@ continue_unlock:
1129 set_fsync_mark(page, 0); 1133 set_fsync_mark(page, 0);
1130 set_dentry_mark(page, 0); 1134 set_dentry_mark(page, 0);
1131 } 1135 }
1132 NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); 1136
1133 wrote++; 1137 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
1138 unlock_page(page);
1139 else
1140 wrote++;
1134 1141
1135 if (--wbc->nr_to_write == 0) 1142 if (--wbc->nr_to_write == 0)
1136 break; 1143 break;
@@ -1212,6 +1219,8 @@ static int f2fs_write_node_page(struct page *page,
1212 1219
1213 if (unlikely(sbi->por_doing)) 1220 if (unlikely(sbi->por_doing))
1214 goto redirty_out; 1221 goto redirty_out;
1222 if (unlikely(f2fs_cp_error(sbi)))
1223 goto redirty_out;
1215 1224
1216 f2fs_wait_on_page_writeback(page, NODE); 1225 f2fs_wait_on_page_writeback(page, NODE);
1217 1226
@@ -1540,15 +1549,6 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1540 kmem_cache_free(free_nid_slab, i); 1549 kmem_cache_free(free_nid_slab, i);
1541} 1550}
1542 1551
1543void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1544 struct f2fs_summary *sum, struct node_info *ni,
1545 block_t new_blkaddr)
1546{
1547 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1548 set_node_addr(sbi, ni, new_blkaddr, false);
1549 clear_node_page_dirty(page);
1550}
1551
1552void recover_inline_xattr(struct inode *inode, struct page *page) 1552void recover_inline_xattr(struct inode *inode, struct page *page)
1553{ 1553{
1554 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1554 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -1557,40 +1557,33 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
1557 struct page *ipage; 1557 struct page *ipage;
1558 struct f2fs_inode *ri; 1558 struct f2fs_inode *ri;
1559 1559
1560 if (!f2fs_has_inline_xattr(inode))
1561 return;
1562
1563 if (!IS_INODE(page))
1564 return;
1565
1566 ri = F2FS_INODE(page);
1567 if (!(ri->i_inline & F2FS_INLINE_XATTR))
1568 return;
1569
1570 ipage = get_node_page(sbi, inode->i_ino); 1560 ipage = get_node_page(sbi, inode->i_ino);
1571 f2fs_bug_on(IS_ERR(ipage)); 1561 f2fs_bug_on(IS_ERR(ipage));
1572 1562
1563 ri = F2FS_INODE(page);
1564 if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
1565 clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR);
1566 goto update_inode;
1567 }
1568
1573 dst_addr = inline_xattr_addr(ipage); 1569 dst_addr = inline_xattr_addr(ipage);
1574 src_addr = inline_xattr_addr(page); 1570 src_addr = inline_xattr_addr(page);
1575 inline_size = inline_xattr_size(inode); 1571 inline_size = inline_xattr_size(inode);
1576 1572
1577 f2fs_wait_on_page_writeback(ipage, NODE); 1573 f2fs_wait_on_page_writeback(ipage, NODE);
1578 memcpy(dst_addr, src_addr, inline_size); 1574 memcpy(dst_addr, src_addr, inline_size);
1579 1575update_inode:
1580 update_inode(inode, ipage); 1576 update_inode(inode, ipage);
1581 f2fs_put_page(ipage, 1); 1577 f2fs_put_page(ipage, 1);
1582} 1578}
1583 1579
1584bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) 1580void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1585{ 1581{
1586 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1582 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1587 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 1583 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1588 nid_t new_xnid = nid_of_node(page); 1584 nid_t new_xnid = nid_of_node(page);
1589 struct node_info ni; 1585 struct node_info ni;
1590 1586
1591 if (!f2fs_has_xattr_block(ofs_of_node(page)))
1592 return false;
1593
1594 /* 1: invalidate the previous xattr nid */ 1587 /* 1: invalidate the previous xattr nid */
1595 if (!prev_xnid) 1588 if (!prev_xnid)
1596 goto recover_xnid; 1589 goto recover_xnid;
@@ -1618,7 +1611,6 @@ recover_xnid:
1618 set_node_addr(sbi, &ni, blkaddr, false); 1611 set_node_addr(sbi, &ni, blkaddr, false);
1619 1612
1620 update_inode_page(inode); 1613 update_inode_page(inode);
1621 return true;
1622} 1614}
1623 1615
1624int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 1616int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -1637,7 +1629,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1637 if (!ipage) 1629 if (!ipage)
1638 return -ENOMEM; 1630 return -ENOMEM;
1639 1631
1640 /* Should not use this inode from free nid list */ 1632 /* Should not use this inode from free nid list */
1641 remove_free_nid(NM_I(sbi), ino); 1633 remove_free_nid(NM_I(sbi), ino);
1642 1634
1643 SetPageUptodate(ipage); 1635 SetPageUptodate(ipage);
@@ -1651,6 +1643,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1651 dst->i_blocks = cpu_to_le64(1); 1643 dst->i_blocks = cpu_to_le64(1);
1652 dst->i_links = cpu_to_le32(1); 1644 dst->i_links = cpu_to_le32(1);
1653 dst->i_xattr_nid = 0; 1645 dst->i_xattr_nid = 0;
1646 dst->i_inline = src->i_inline & F2FS_INLINE_XATTR;
1654 1647
1655 new_ni = old_ni; 1648 new_ni = old_ni;
1656 new_ni.ino = ino; 1649 new_ni.ino = ino;
@@ -1659,13 +1652,14 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1659 WARN_ON(1); 1652 WARN_ON(1);
1660 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 1653 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1661 inc_valid_inode_count(sbi); 1654 inc_valid_inode_count(sbi);
1655 set_page_dirty(ipage);
1662 f2fs_put_page(ipage, 1); 1656 f2fs_put_page(ipage, 1);
1663 return 0; 1657 return 0;
1664} 1658}
1665 1659
1666/* 1660/*
1667 * ra_sum_pages() merge contiguous pages into one bio and submit. 1661 * ra_sum_pages() merge contiguous pages into one bio and submit.
1668 * these pre-readed pages are alloced in bd_inode's mapping tree. 1662 * these pre-read pages are allocated in bd_inode's mapping tree.
1669 */ 1663 */
1670static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, 1664static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
1671 int start, int nrpages) 1665 int start, int nrpages)
@@ -1709,7 +1703,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1709 for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { 1703 for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
1710 nrpages = min(last_offset - i, bio_blocks); 1704 nrpages = min(last_offset - i, bio_blocks);
1711 1705
1712 /* read ahead node pages */ 1706 /* readahead node pages */
1713 nrpages = ra_sum_pages(sbi, pages, addr, nrpages); 1707 nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
1714 if (!nrpages) 1708 if (!nrpages)
1715 return -ENOMEM; 1709 return -ENOMEM;
@@ -1967,7 +1961,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1967 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; 1961 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
1968 1962
1969 /* not used nids: 0, node, meta, (and root counted as valid node) */ 1963 /* not used nids: 0, node, meta, (and root counted as valid node) */
1970 nm_i->available_nids = nm_i->max_nid - 3; 1964 nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
1971 nm_i->fcnt = 0; 1965 nm_i->fcnt = 0;
1972 nm_i->nat_cnt = 0; 1966 nm_i->nat_cnt = 0;
1973 nm_i->ram_thresh = DEF_RAM_THRESHOLD; 1967 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index fe1c6d921ba2..756c41cd2582 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -62,8 +62,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
62 } 62 }
63retry: 63retry:
64 de = f2fs_find_entry(dir, &name, &page); 64 de = f2fs_find_entry(dir, &name, &page);
65 if (de && inode->i_ino == le32_to_cpu(de->ino)) 65 if (de && inode->i_ino == le32_to_cpu(de->ino)) {
66 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
66 goto out_unmap_put; 67 goto out_unmap_put;
68 }
67 if (de) { 69 if (de) {
68 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); 70 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
69 if (IS_ERR(einode)) { 71 if (IS_ERR(einode)) {
@@ -300,14 +302,19 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
300 struct node_info ni; 302 struct node_info ni;
301 int err = 0, recovered = 0; 303 int err = 0, recovered = 0;
302 304
303 recover_inline_xattr(inode, page); 305 /* step 1: recover xattr */
304 306 if (IS_INODE(page)) {
305 if (recover_inline_data(inode, page)) 307 recover_inline_xattr(inode, page);
308 } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
309 recover_xattr_data(inode, page, blkaddr);
306 goto out; 310 goto out;
311 }
307 312
308 if (recover_xattr_data(inode, page, blkaddr)) 313 /* step 2: recover inline data */
314 if (recover_inline_data(inode, page))
309 goto out; 315 goto out;
310 316
317 /* step 3: recover data indices */
311 start = start_bidx_of_node(ofs_of_node(page), fi); 318 start = start_bidx_of_node(ofs_of_node(page), fi);
312 end = start + ADDRS_PER_PAGE(page, fi); 319 end = start + ADDRS_PER_PAGE(page, fi);
313 320
@@ -364,8 +371,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
364 fill_node_footer(dn.node_page, dn.nid, ni.ino, 371 fill_node_footer(dn.node_page, dn.nid, ni.ino,
365 ofs_of_node(page), false); 372 ofs_of_node(page), false);
366 set_page_dirty(dn.node_page); 373 set_page_dirty(dn.node_page);
367
368 recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
369err: 374err:
370 f2fs_put_dnode(&dn); 375 f2fs_put_dnode(&dn);
371 f2fs_unlock_op(sbi); 376 f2fs_unlock_op(sbi);
@@ -452,6 +457,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
452 /* step #1: find fsynced inode numbers */ 457 /* step #1: find fsynced inode numbers */
453 sbi->por_doing = true; 458 sbi->por_doing = true;
454 459
460 /* prevent checkpoint */
461 mutex_lock(&sbi->cp_mutex);
462
455 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 463 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
456 464
457 err = find_fsync_dnodes(sbi, &inode_list); 465 err = find_fsync_dnodes(sbi, &inode_list);
@@ -465,7 +473,8 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
465 473
466 /* step #2: recover data */ 474 /* step #2: recover data */
467 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); 475 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
468 f2fs_bug_on(!list_empty(&inode_list)); 476 if (!err)
477 f2fs_bug_on(!list_empty(&inode_list));
469out: 478out:
470 destroy_fsync_dnodes(&inode_list); 479 destroy_fsync_dnodes(&inode_list);
471 kmem_cache_destroy(fsync_entry_slab); 480 kmem_cache_destroy(fsync_entry_slab);
@@ -482,8 +491,13 @@ out:
482 /* Flush all the NAT/SIT pages */ 491 /* Flush all the NAT/SIT pages */
483 while (get_pages(sbi, F2FS_DIRTY_META)) 492 while (get_pages(sbi, F2FS_DIRTY_META))
484 sync_meta_pages(sbi, META, LONG_MAX); 493 sync_meta_pages(sbi, META, LONG_MAX);
494 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
495 mutex_unlock(&sbi->cp_mutex);
485 } else if (need_writecp) { 496 } else if (need_writecp) {
497 mutex_unlock(&sbi->cp_mutex);
486 write_checkpoint(sbi, false); 498 write_checkpoint(sbi, false);
499 } else {
500 mutex_unlock(&sbi->cp_mutex);
487 } 501 }
488 return err; 502 return err;
489} 503}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0dfeebae2a50..0aa337cd5bba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -62,7 +62,7 @@ static inline unsigned long __reverse_ffs(unsigned long word)
62} 62}
63 63
64/* 64/*
65 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue 65 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
66 * f2fs_set_bit makes MSB and LSB reversed in a byte. 66 * f2fs_set_bit makes MSB and LSB reversed in a byte.
67 * Example: 67 * Example:
68 * LSB <--> MSB 68 * LSB <--> MSB
@@ -808,7 +808,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
808} 808}
809 809
810/* 810/*
811 * This function always allocates a used segment (from dirty seglist) by SSR 811 * This function always allocates a used segment(from dirty seglist) by SSR
812 * manner, so it should recover the existing segment information of valid blocks 812 * manner, so it should recover the existing segment information of valid blocks
813 */ 813 */
814static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) 814static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
@@ -1103,55 +1103,6 @@ void recover_data_page(struct f2fs_sb_info *sbi,
1103 mutex_unlock(&curseg->curseg_mutex); 1103 mutex_unlock(&curseg->curseg_mutex);
1104} 1104}
1105 1105
1106void rewrite_node_page(struct f2fs_sb_info *sbi,
1107 struct page *page, struct f2fs_summary *sum,
1108 block_t old_blkaddr, block_t new_blkaddr)
1109{
1110 struct sit_info *sit_i = SIT_I(sbi);
1111 int type = CURSEG_WARM_NODE;
1112 struct curseg_info *curseg;
1113 unsigned int segno, old_cursegno;
1114 block_t next_blkaddr = next_blkaddr_of_node(page);
1115 unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
1116 struct f2fs_io_info fio = {
1117 .type = NODE,
1118 .rw = WRITE_SYNC,
1119 };
1120
1121 curseg = CURSEG_I(sbi, type);
1122
1123 mutex_lock(&curseg->curseg_mutex);
1124 mutex_lock(&sit_i->sentry_lock);
1125
1126 segno = GET_SEGNO(sbi, new_blkaddr);
1127 old_cursegno = curseg->segno;
1128
1129 /* change the current segment */
1130 if (segno != curseg->segno) {
1131 curseg->next_segno = segno;
1132 change_curseg(sbi, type, true);
1133 }
1134 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1135 __add_sum_entry(sbi, type, sum);
1136
1137 /* change the current log to the next block addr in advance */
1138 if (next_segno != segno) {
1139 curseg->next_segno = next_segno;
1140 change_curseg(sbi, type, true);
1141 }
1142 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
1143
1144 /* rewrite node page */
1145 set_page_writeback(page);
1146 f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
1147 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1148 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1149 locate_dirty_segment(sbi, old_cursegno);
1150
1151 mutex_unlock(&sit_i->sentry_lock);
1152 mutex_unlock(&curseg->curseg_mutex);
1153}
1154
1155static inline bool is_merged_page(struct f2fs_sb_info *sbi, 1106static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1156 struct page *page, enum page_type type) 1107 struct page *page, enum page_type type)
1157{ 1108{
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 55973f7b0330..ff483257283b 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -549,7 +549,7 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
549} 549}
550 550
551/* 551/*
552 * Summary block is always treated as invalid block 552 * Summary block is always treated as an invalid block
553 */ 553 */
554static inline void check_block_count(struct f2fs_sb_info *sbi, 554static inline void check_block_count(struct f2fs_sb_info *sbi,
555 int segno, struct f2fs_sit_entry *raw_sit) 555 int segno, struct f2fs_sit_entry *raw_sit)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 657582fc7601..41bdf511003d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -432,9 +432,15 @@ static void f2fs_put_super(struct super_block *sb)
432 stop_gc_thread(sbi); 432 stop_gc_thread(sbi);
433 433
434 /* We don't need to do checkpoint when it's clean */ 434 /* We don't need to do checkpoint when it's clean */
435 if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES)) 435 if (sbi->s_dirty)
436 write_checkpoint(sbi, true); 436 write_checkpoint(sbi, true);
437 437
438 /*
439 * normally superblock is clean, so we need to release this.
440 * In addition, EIO will skip do checkpoint, we need this as well.
441 */
442 release_dirty_inode(sbi);
443
438 iput(sbi->node_inode); 444 iput(sbi->node_inode);
439 iput(sbi->meta_inode); 445 iput(sbi->meta_inode);
440 446
@@ -457,9 +463,6 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
457 463
458 trace_f2fs_sync_fs(sb, sync); 464 trace_f2fs_sync_fs(sb, sync);
459 465
460 if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
461 return 0;
462
463 if (sync) { 466 if (sync) {
464 mutex_lock(&sbi->gc_mutex); 467 mutex_lock(&sbi->gc_mutex);
465 write_checkpoint(sbi, false); 468 write_checkpoint(sbi, false);
@@ -505,8 +508,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
505 buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; 508 buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
506 buf->f_bavail = user_block_count - valid_user_blocks(sbi); 509 buf->f_bavail = user_block_count - valid_user_blocks(sbi);
507 510
508 buf->f_files = sbi->total_node_count; 511 buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
509 buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); 512 buf->f_ffree = buf->f_files - valid_inode_count(sbi);
510 513
511 buf->f_namelen = F2FS_NAME_LEN; 514 buf->f_namelen = F2FS_NAME_LEN;
512 buf->f_fsid.val[0] = (u32)id; 515 buf->f_fsid.val[0] = (u32)id;
@@ -663,7 +666,7 @@ restore_gc:
663 if (need_restart_gc) { 666 if (need_restart_gc) {
664 if (start_gc_thread(sbi)) 667 if (start_gc_thread(sbi))
665 f2fs_msg(sbi->sb, KERN_WARNING, 668 f2fs_msg(sbi->sb, KERN_WARNING,
666 "background gc thread is stop"); 669 "background gc thread has stopped");
667 } else if (need_stop_gc) { 670 } else if (need_stop_gc) {
668 stop_gc_thread(sbi); 671 stop_gc_thread(sbi);
669 } 672 }
@@ -812,7 +815,7 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
812 if (unlikely(fsmeta >= total)) 815 if (unlikely(fsmeta >= total))
813 return 1; 816 return 1;
814 817
815 if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { 818 if (unlikely(f2fs_cp_error(sbi))) {
816 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); 819 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
817 return 1; 820 return 1;
818 } 821 }
@@ -899,8 +902,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
899 struct buffer_head *raw_super_buf; 902 struct buffer_head *raw_super_buf;
900 struct inode *root; 903 struct inode *root;
901 long err = -EINVAL; 904 long err = -EINVAL;
905 bool retry = true;
902 int i; 906 int i;
903 907
908try_onemore:
904 /* allocate memory for f2fs-specific super block info */ 909 /* allocate memory for f2fs-specific super block info */
905 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); 910 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
906 if (!sbi) 911 if (!sbi)
@@ -1080,9 +1085,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1080 /* recover fsynced data */ 1085 /* recover fsynced data */
1081 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { 1086 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1082 err = recover_fsync_data(sbi); 1087 err = recover_fsync_data(sbi);
1083 if (err) 1088 if (err) {
1084 f2fs_msg(sb, KERN_ERR, 1089 f2fs_msg(sb, KERN_ERR,
1085 "Cannot recover all fsync data errno=%ld", err); 1090 "Cannot recover all fsync data errno=%ld", err);
1091 goto free_kobj;
1092 }
1086 } 1093 }
1087 1094
1088 /* 1095 /*
@@ -1123,6 +1130,13 @@ free_sb_buf:
1123 brelse(raw_super_buf); 1130 brelse(raw_super_buf);
1124free_sbi: 1131free_sbi:
1125 kfree(sbi); 1132 kfree(sbi);
1133
1134 /* give only one another chance */
1135 if (retry) {
1136 retry = 0;
1137 shrink_dcache_sb(sb);
1138 goto try_onemore;
1139 }
1126 return err; 1140 return err;
1127} 1141}
1128 1142
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 8bea941ee309..728a5dc3dc16 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -528,7 +528,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
528 int free; 528 int free;
529 /* 529 /*
530 * If value is NULL, it is remove operation. 530 * If value is NULL, it is remove operation.
531 * In case of update operation, we caculate free. 531 * In case of update operation, we calculate free.
532 */ 532 */
533 free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); 533 free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
534 if (found) 534 if (found)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6fac74349856..b73e0215baa7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
97 struct commit_header *h; 97 struct commit_header *h;
98 __u32 csum; 98 __u32 csum;
99 99
100 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 100 if (!jbd2_journal_has_csum_v2or3(j))
101 return; 101 return;
102 102
103 h = (struct commit_header *)(bh->b_data); 103 h = (struct commit_header *)(bh->b_data);
@@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
313 return checksum; 313 return checksum;
314} 314}
315 315
316static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, 316static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
317 unsigned long long block) 317 unsigned long long block)
318{ 318{
319 tag->t_blocknr = cpu_to_be32(block & (u32)~0); 319 tag->t_blocknr = cpu_to_be32(block & (u32)~0);
320 if (tag_bytes > JBD2_TAG_SIZE32) 320 if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT))
321 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); 321 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
322} 322}
323 323
@@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j,
327 struct jbd2_journal_block_tail *tail; 327 struct jbd2_journal_block_tail *tail;
328 __u32 csum; 328 __u32 csum;
329 329
330 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 330 if (!jbd2_journal_has_csum_v2or3(j))
331 return; 331 return;
332 332
333 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - 333 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
@@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j,
340static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, 340static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
341 struct buffer_head *bh, __u32 sequence) 341 struct buffer_head *bh, __u32 sequence)
342{ 342{
343 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
343 struct page *page = bh->b_page; 344 struct page *page = bh->b_page;
344 __u8 *addr; 345 __u8 *addr;
345 __u32 csum32; 346 __u32 csum32;
346 __be32 seq; 347 __be32 seq;
347 348
348 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 349 if (!jbd2_journal_has_csum_v2or3(j))
349 return; 350 return;
350 351
351 seq = cpu_to_be32(sequence); 352 seq = cpu_to_be32(sequence);
@@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
355 bh->b_size); 356 bh->b_size);
356 kunmap_atomic(addr); 357 kunmap_atomic(addr);
357 358
358 /* We only have space to store the lower 16 bits of the crc32c. */ 359 if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
359 tag->t_checksum = cpu_to_be16(csum32); 360 tag3->t_checksum = cpu_to_be32(csum32);
361 else
362 tag->t_checksum = cpu_to_be16(csum32);
360} 363}
361/* 364/*
362 * jbd2_journal_commit_transaction 365 * jbd2_journal_commit_transaction
@@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
396 LIST_HEAD(io_bufs); 399 LIST_HEAD(io_bufs);
397 LIST_HEAD(log_bufs); 400 LIST_HEAD(log_bufs);
398 401
399 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 402 if (jbd2_journal_has_csum_v2or3(journal))
400 csum_size = sizeof(struct jbd2_journal_block_tail); 403 csum_size = sizeof(struct jbd2_journal_block_tail);
401 404
402 /* 405 /*
@@ -690,7 +693,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
690 tag_flag |= JBD2_FLAG_SAME_UUID; 693 tag_flag |= JBD2_FLAG_SAME_UUID;
691 694
692 tag = (journal_block_tag_t *) tagp; 695 tag = (journal_block_tag_t *) tagp;
693 write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); 696 write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
694 tag->t_flags = cpu_to_be16(tag_flag); 697 tag->t_flags = cpu_to_be16(tag_flag);
695 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], 698 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
696 commit_transaction->t_tid); 699 commit_transaction->t_tid);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 67b8e303946c..19d74d86d99c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug);
124/* Checksumming functions */ 124/* Checksumming functions */
125static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 125static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
126{ 126{
127 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 127 if (!jbd2_journal_has_csum_v2or3(j))
128 return 1; 128 return 1;
129 129
130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; 130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
@@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
145 145
146static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) 146static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
147{ 147{
148 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 148 if (!jbd2_journal_has_csum_v2or3(j))
149 return 1; 149 return 1;
150 150
151 return sb->s_checksum == jbd2_superblock_csum(j, sb); 151 return sb->s_checksum == jbd2_superblock_csum(j, sb);
@@ -153,7 +153,7 @@ static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
153 153
154static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) 154static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
155{ 155{
156 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 156 if (!jbd2_journal_has_csum_v2or3(j))
157 return; 157 return;
158 158
159 sb->s_checksum = jbd2_superblock_csum(j, sb); 159 sb->s_checksum = jbd2_superblock_csum(j, sb);
@@ -1522,21 +1522,29 @@ static int journal_get_superblock(journal_t *journal)
1522 goto out; 1522 goto out;
1523 } 1523 }
1524 1524
1525 if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && 1525 if (jbd2_journal_has_csum_v2or3(journal) &&
1526 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1526 JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
1527 /* Can't have checksum v1 and v2 on at the same time! */ 1527 /* Can't have checksum v1 and v2 on at the same time! */
1528 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " 1528 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
1529 "at the same time!\n"); 1529 "at the same time!\n");
1530 goto out; 1530 goto out;
1531 } 1531 }
1532 1532
1533 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
1534 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
1535 /* Can't have checksum v2 and v3 at the same time! */
1536 printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
1537 "at the same time!\n");
1538 goto out;
1539 }
1540
1533 if (!jbd2_verify_csum_type(journal, sb)) { 1541 if (!jbd2_verify_csum_type(journal, sb)) {
1534 printk(KERN_ERR "JBD2: Unknown checksum type\n"); 1542 printk(KERN_ERR "JBD2: Unknown checksum type\n");
1535 goto out; 1543 goto out;
1536 } 1544 }
1537 1545
1538 /* Load the checksum driver */ 1546 /* Load the checksum driver */
1539 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1547 if (jbd2_journal_has_csum_v2or3(journal)) {
1540 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 1548 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
1541 if (IS_ERR(journal->j_chksum_driver)) { 1549 if (IS_ERR(journal->j_chksum_driver)) {
1542 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); 1550 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
@@ -1553,7 +1561,7 @@ static int journal_get_superblock(journal_t *journal)
1553 } 1561 }
1554 1562
1555 /* Precompute checksum seed for all metadata */ 1563 /* Precompute checksum seed for all metadata */
1556 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 1564 if (jbd2_journal_has_csum_v2or3(journal))
1557 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 1565 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
1558 sizeof(sb->s_uuid)); 1566 sizeof(sb->s_uuid));
1559 1567
@@ -1813,8 +1821,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1813 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) 1821 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
1814 return 0; 1822 return 0;
1815 1823
1816 /* Asking for checksumming v2 and v1? Only give them v2. */ 1824 /* If enabling v2 checksums, turn on v3 instead */
1817 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && 1825 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
1826 incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
1827 incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3;
1828 }
1829
1830 /* Asking for checksumming v3 and v1? Only give them v3. */
1831 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
1818 compat & JBD2_FEATURE_COMPAT_CHECKSUM) 1832 compat & JBD2_FEATURE_COMPAT_CHECKSUM)
1819 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; 1833 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
1820 1834
@@ -1823,8 +1837,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1823 1837
1824 sb = journal->j_superblock; 1838 sb = journal->j_superblock;
1825 1839
1826 /* If enabling v2 checksums, update superblock */ 1840 /* If enabling v3 checksums, update superblock */
1827 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1841 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
1828 sb->s_checksum_type = JBD2_CRC32C_CHKSUM; 1842 sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
1829 sb->s_feature_compat &= 1843 sb->s_feature_compat &=
1830 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); 1844 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
@@ -1842,8 +1856,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1842 } 1856 }
1843 1857
1844 /* Precompute checksum seed for all metadata */ 1858 /* Precompute checksum seed for all metadata */
1845 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 1859 if (jbd2_journal_has_csum_v2or3(journal))
1846 JBD2_FEATURE_INCOMPAT_CSUM_V2))
1847 journal->j_csum_seed = jbd2_chksum(journal, ~0, 1860 journal->j_csum_seed = jbd2_chksum(journal, ~0,
1848 sb->s_uuid, 1861 sb->s_uuid,
1849 sizeof(sb->s_uuid)); 1862 sizeof(sb->s_uuid));
@@ -1852,7 +1865,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1852 /* If enabling v1 checksums, downgrade superblock */ 1865 /* If enabling v1 checksums, downgrade superblock */
1853 if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) 1866 if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
1854 sb->s_feature_incompat &= 1867 sb->s_feature_incompat &=
1855 ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); 1868 ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 |
1869 JBD2_FEATURE_INCOMPAT_CSUM_V3);
1856 1870
1857 sb->s_feature_compat |= cpu_to_be32(compat); 1871 sb->s_feature_compat |= cpu_to_be32(compat);
1858 sb->s_feature_ro_compat |= cpu_to_be32(ro); 1872 sb->s_feature_ro_compat |= cpu_to_be32(ro);
@@ -2165,16 +2179,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
2165 */ 2179 */
2166size_t journal_tag_bytes(journal_t *journal) 2180size_t journal_tag_bytes(journal_t *journal)
2167{ 2181{
2168 journal_block_tag_t tag; 2182 size_t sz;
2169 size_t x = 0; 2183
2184 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
2185 return sizeof(journal_block_tag3_t);
2186
2187 sz = sizeof(journal_block_tag_t);
2170 2188
2171 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 2189 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
2172 x += sizeof(tag.t_checksum); 2190 sz += sizeof(__u16);
2173 2191
2174 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 2192 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
2175 return x + JBD2_TAG_SIZE64; 2193 return sz;
2176 else 2194 else
2177 return x + JBD2_TAG_SIZE32; 2195 return sz - sizeof(__u32);
2178} 2196}
2179 2197
2180/* 2198/*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 3b6bb19d60b1..9b329b55ffe3 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
181 __be32 provided; 181 __be32 provided;
182 __u32 calculated; 182 __u32 calculated;
183 183
184 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 184 if (!jbd2_journal_has_csum_v2or3(j))
185 return 1; 185 return 1;
186 186
187 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - 187 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
@@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
205 int nr = 0, size = journal->j_blocksize; 205 int nr = 0, size = journal->j_blocksize;
206 int tag_bytes = journal_tag_bytes(journal); 206 int tag_bytes = journal_tag_bytes(journal);
207 207
208 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 208 if (jbd2_journal_has_csum_v2or3(journal))
209 size -= sizeof(struct jbd2_journal_block_tail); 209 size -= sizeof(struct jbd2_journal_block_tail);
210 210
211 tagp = &bh->b_data[sizeof(journal_header_t)]; 211 tagp = &bh->b_data[sizeof(journal_header_t)];
@@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal)
338 return err; 338 return err;
339} 339}
340 340
341static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) 341static inline unsigned long long read_tag_block(journal_t *journal,
342 journal_block_tag_t *tag)
342{ 343{
343 unsigned long long block = be32_to_cpu(tag->t_blocknr); 344 unsigned long long block = be32_to_cpu(tag->t_blocknr);
344 if (tag_bytes > JBD2_TAG_SIZE32) 345 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
345 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 346 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
346 return block; 347 return block;
347} 348}
@@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
384 __be32 provided; 385 __be32 provided;
385 __u32 calculated; 386 __u32 calculated;
386 387
387 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 388 if (!jbd2_journal_has_csum_v2or3(j))
388 return 1; 389 return 1;
389 390
390 h = buf; 391 h = buf;
@@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
399static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 400static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
400 void *buf, __u32 sequence) 401 void *buf, __u32 sequence)
401{ 402{
403 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
402 __u32 csum32; 404 __u32 csum32;
403 __be32 seq; 405 __be32 seq;
404 406
405 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 407 if (!jbd2_journal_has_csum_v2or3(j))
406 return 1; 408 return 1;
407 409
408 seq = cpu_to_be32(sequence); 410 seq = cpu_to_be32(sequence);
409 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 411 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
410 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 412 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
411 413
412 return tag->t_checksum == cpu_to_be16(csum32); 414 if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
415 return tag3->t_checksum == cpu_to_be32(csum32);
416 else
417 return tag->t_checksum == cpu_to_be16(csum32);
413} 418}
414 419
415static int do_one_pass(journal_t *journal, 420static int do_one_pass(journal_t *journal,
@@ -426,6 +431,7 @@ static int do_one_pass(journal_t *journal,
426 int tag_bytes = journal_tag_bytes(journal); 431 int tag_bytes = journal_tag_bytes(journal);
427 __u32 crc32_sum = ~0; /* Transactional Checksums */ 432 __u32 crc32_sum = ~0; /* Transactional Checksums */
428 int descr_csum_size = 0; 433 int descr_csum_size = 0;
434 int block_error = 0;
429 435
430 /* 436 /*
431 * First thing is to establish what we expect to find in the log 437 * First thing is to establish what we expect to find in the log
@@ -512,8 +518,7 @@ static int do_one_pass(journal_t *journal,
512 switch(blocktype) { 518 switch(blocktype) {
513 case JBD2_DESCRIPTOR_BLOCK: 519 case JBD2_DESCRIPTOR_BLOCK:
514 /* Verify checksum first */ 520 /* Verify checksum first */
515 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 521 if (jbd2_journal_has_csum_v2or3(journal))
516 JBD2_FEATURE_INCOMPAT_CSUM_V2))
517 descr_csum_size = 522 descr_csum_size =
518 sizeof(struct jbd2_journal_block_tail); 523 sizeof(struct jbd2_journal_block_tail);
519 if (descr_csum_size > 0 && 524 if (descr_csum_size > 0 &&
@@ -574,7 +579,7 @@ static int do_one_pass(journal_t *journal,
574 unsigned long long blocknr; 579 unsigned long long blocknr;
575 580
576 J_ASSERT(obh != NULL); 581 J_ASSERT(obh != NULL);
577 blocknr = read_tag_block(tag_bytes, 582 blocknr = read_tag_block(journal,
578 tag); 583 tag);
579 584
580 /* If the block has been 585 /* If the block has been
@@ -598,7 +603,8 @@ static int do_one_pass(journal_t *journal,
598 "checksum recovering " 603 "checksum recovering "
599 "block %llu in log\n", 604 "block %llu in log\n",
600 blocknr); 605 blocknr);
601 continue; 606 block_error = 1;
607 goto skip_write;
602 } 608 }
603 609
604 /* Find a buffer for the new 610 /* Find a buffer for the new
@@ -797,7 +803,8 @@ static int do_one_pass(journal_t *journal,
797 success = -EIO; 803 success = -EIO;
798 } 804 }
799 } 805 }
800 806 if (block_error && success == 0)
807 success = -EIO;
801 return success; 808 return success;
802 809
803 failed: 810 failed:
@@ -811,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
811 __be32 provided; 818 __be32 provided;
812 __u32 calculated; 819 __u32 calculated;
813 820
814 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 821 if (!jbd2_journal_has_csum_v2or3(j))
815 return 1; 822 return 1;
816 823
817 tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - 824 tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 198c9c10276d..d5e95a175c92 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -91,8 +91,8 @@
91#include <linux/list.h> 91#include <linux/list.h>
92#include <linux/init.h> 92#include <linux/init.h>
93#include <linux/bio.h> 93#include <linux/bio.h>
94#endif
95#include <linux/log2.h> 94#include <linux/log2.h>
95#endif
96 96
97static struct kmem_cache *jbd2_revoke_record_cache; 97static struct kmem_cache *jbd2_revoke_record_cache;
98static struct kmem_cache *jbd2_revoke_table_cache; 98static struct kmem_cache *jbd2_revoke_table_cache;
@@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal,
597 offset = *offsetp; 597 offset = *offsetp;
598 598
599 /* Do we need to leave space at the end for a checksum? */ 599 /* Do we need to leave space at the end for a checksum? */
600 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 600 if (jbd2_journal_has_csum_v2or3(journal))
601 csum_size = sizeof(struct jbd2_journal_revoke_tail); 601 csum_size = sizeof(struct jbd2_journal_revoke_tail);
602 602
603 /* Make sure we have a descriptor with space left for the record */ 603 /* Make sure we have a descriptor with space left for the record */
@@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
644 struct jbd2_journal_revoke_tail *tail; 644 struct jbd2_journal_revoke_tail *tail;
645 __u32 csum; 645 __u32 csum;
646 646
647 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 647 if (!jbd2_journal_has_csum_v2or3(j))
648 return; 648 return;
649 649
650 tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - 650 tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
diff --git a/fs/locks.c b/fs/locks.c
index cb66fb05ad4a..bb08857f90b5 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1619,7 +1619,7 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
1619 smp_mb(); 1619 smp_mb();
1620 error = check_conflicting_open(dentry, arg); 1620 error = check_conflicting_open(dentry, arg);
1621 if (error) 1621 if (error)
1622 locks_unlink_lock(flp); 1622 locks_unlink_lock(before);
1623out: 1623out:
1624 if (is_deleg) 1624 if (is_deleg)
1625 mutex_unlock(&inode->i_mutex); 1625 mutex_unlock(&inode->i_mutex);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index d0fec260132a..24c6898159cc 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -129,7 +129,10 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
129 .rpc_argp = &args, 129 .rpc_argp = &args,
130 .rpc_resp = &fattr, 130 .rpc_resp = &fattr,
131 }; 131 };
132 int status; 132 int status = 0;
133
134 if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL))
135 goto out;
133 136
134 status = -EOPNOTSUPP; 137 status = -EOPNOTSUPP;
135 if (!nfs_server_capable(inode, NFS_CAP_ACLS)) 138 if (!nfs_server_capable(inode, NFS_CAP_ACLS))
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 75ae8d22f067..7dd8aca31c29 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2560,6 +2560,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2560 struct nfs4_closedata *calldata = data; 2560 struct nfs4_closedata *calldata = data;
2561 struct nfs4_state *state = calldata->state; 2561 struct nfs4_state *state = calldata->state;
2562 struct nfs_server *server = NFS_SERVER(calldata->inode); 2562 struct nfs_server *server = NFS_SERVER(calldata->inode);
2563 nfs4_stateid *res_stateid = NULL;
2563 2564
2564 dprintk("%s: begin!\n", __func__); 2565 dprintk("%s: begin!\n", __func__);
2565 if (!nfs4_sequence_done(task, &calldata->res.seq_res)) 2566 if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -2570,12 +2571,12 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2570 */ 2571 */
2571 switch (task->tk_status) { 2572 switch (task->tk_status) {
2572 case 0: 2573 case 0:
2573 if (calldata->roc) 2574 res_stateid = &calldata->res.stateid;
2575 if (calldata->arg.fmode == 0 && calldata->roc)
2574 pnfs_roc_set_barrier(state->inode, 2576 pnfs_roc_set_barrier(state->inode,
2575 calldata->roc_barrier); 2577 calldata->roc_barrier);
2576 nfs_clear_open_stateid(state, &calldata->res.stateid, 0);
2577 renew_lease(server, calldata->timestamp); 2578 renew_lease(server, calldata->timestamp);
2578 goto out_release; 2579 break;
2579 case -NFS4ERR_ADMIN_REVOKED: 2580 case -NFS4ERR_ADMIN_REVOKED:
2580 case -NFS4ERR_STALE_STATEID: 2581 case -NFS4ERR_STALE_STATEID:
2581 case -NFS4ERR_OLD_STATEID: 2582 case -NFS4ERR_OLD_STATEID:
@@ -2589,7 +2590,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2589 goto out_release; 2590 goto out_release;
2590 } 2591 }
2591 } 2592 }
2592 nfs_clear_open_stateid(state, NULL, calldata->arg.fmode); 2593 nfs_clear_open_stateid(state, res_stateid, calldata->arg.fmode);
2593out_release: 2594out_release:
2594 nfs_release_seqid(calldata->arg.seqid); 2595 nfs_release_seqid(calldata->arg.seqid);
2595 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 2596 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
@@ -2601,6 +2602,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2601 struct nfs4_closedata *calldata = data; 2602 struct nfs4_closedata *calldata = data;
2602 struct nfs4_state *state = calldata->state; 2603 struct nfs4_state *state = calldata->state;
2603 struct inode *inode = calldata->inode; 2604 struct inode *inode = calldata->inode;
2605 bool is_rdonly, is_wronly, is_rdwr;
2604 int call_close = 0; 2606 int call_close = 0;
2605 2607
2606 dprintk("%s: begin!\n", __func__); 2608 dprintk("%s: begin!\n", __func__);
@@ -2608,18 +2610,24 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2608 goto out_wait; 2610 goto out_wait;
2609 2611
2610 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 2612 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
2611 calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
2612 spin_lock(&state->owner->so_lock); 2613 spin_lock(&state->owner->so_lock);
2614 is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
2615 is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
2616 is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
2617 /* Calculate the current open share mode */
2618 calldata->arg.fmode = 0;
2619 if (is_rdonly || is_rdwr)
2620 calldata->arg.fmode |= FMODE_READ;
2621 if (is_wronly || is_rdwr)
2622 calldata->arg.fmode |= FMODE_WRITE;
2613 /* Calculate the change in open mode */ 2623 /* Calculate the change in open mode */
2614 if (state->n_rdwr == 0) { 2624 if (state->n_rdwr == 0) {
2615 if (state->n_rdonly == 0) { 2625 if (state->n_rdonly == 0) {
2616 call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); 2626 call_close |= is_rdonly || is_rdwr;
2617 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
2618 calldata->arg.fmode &= ~FMODE_READ; 2627 calldata->arg.fmode &= ~FMODE_READ;
2619 } 2628 }
2620 if (state->n_wronly == 0) { 2629 if (state->n_wronly == 0) {
2621 call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); 2630 call_close |= is_wronly || is_rdwr;
2622 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
2623 calldata->arg.fmode &= ~FMODE_WRITE; 2631 calldata->arg.fmode &= ~FMODE_WRITE;
2624 } 2632 }
2625 } 2633 }
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 1ec141e758d7..62e8ec619b4c 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -160,9 +160,18 @@ static void o2quo_make_decision(struct work_struct *work)
160 } 160 }
161 161
162out: 162out:
163 spin_unlock(&qs->qs_lock); 163 if (fence) {
164 if (fence) 164 spin_unlock(&qs->qs_lock);
165 o2quo_fence_self(); 165 o2quo_fence_self();
166 } else {
167 mlog(ML_NOTICE, "not fencing this node, heartbeating: %d, "
168 "connected: %d, lowest: %d (%sreachable)\n",
169 qs->qs_heartbeating, qs->qs_connected, lowest_hb,
170 lowest_reachable ? "" : "un");
171 spin_unlock(&qs->qs_lock);
172
173 }
174
166} 175}
167 176
168static void o2quo_set_hold(struct o2quo_state *qs, u8 node) 177static void o2quo_set_hold(struct o2quo_state *qs, u8 node)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 681691bc233a..ea34952f9496 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1480,6 +1480,14 @@ static int o2net_set_nodelay(struct socket *sock)
1480 return ret; 1480 return ret;
1481} 1481}
1482 1482
1483static int o2net_set_usertimeout(struct socket *sock)
1484{
1485 int user_timeout = O2NET_TCP_USER_TIMEOUT;
1486
1487 return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
1488 (char *)&user_timeout, sizeof(user_timeout));
1489}
1490
1483static void o2net_initialize_handshake(void) 1491static void o2net_initialize_handshake(void)
1484{ 1492{
1485 o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( 1493 o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
@@ -1536,16 +1544,20 @@ static void o2net_idle_timer(unsigned long data)
1536#endif 1544#endif
1537 1545
1538 printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been " 1546 printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been "
1539 "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc), 1547 "idle for %lu.%lu secs.\n",
1540 msecs / 1000, msecs % 1000); 1548 SC_NODEF_ARGS(sc), msecs / 1000, msecs % 1000);
1541 1549
1542 /* 1550 /* idle timerout happen, don't shutdown the connection, but
1543 * Initialize the nn_timeout so that the next connection attempt 1551 * make fence decision. Maybe the connection can recover before
1544 * will continue in o2net_start_connect. 1552 * the decision is made.
1545 */ 1553 */
1546 atomic_set(&nn->nn_timeout, 1); 1554 atomic_set(&nn->nn_timeout, 1);
1555 o2quo_conn_err(o2net_num_from_nn(nn));
1556 queue_delayed_work(o2net_wq, &nn->nn_still_up,
1557 msecs_to_jiffies(O2NET_QUORUM_DELAY_MS));
1558
1559 o2net_sc_reset_idle_timer(sc);
1547 1560
1548 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
1549} 1561}
1550 1562
1551static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) 1563static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
@@ -1560,6 +1572,15 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
1560 1572
1561static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 1573static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
1562{ 1574{
1575 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
1576
1577 /* clear fence decision since the connection recover from timeout*/
1578 if (atomic_read(&nn->nn_timeout)) {
1579 o2quo_conn_up(o2net_num_from_nn(nn));
1580 cancel_delayed_work(&nn->nn_still_up);
1581 atomic_set(&nn->nn_timeout, 0);
1582 }
1583
1563 /* Only push out an existing timer */ 1584 /* Only push out an existing timer */
1564 if (timer_pending(&sc->sc_idle_timeout)) 1585 if (timer_pending(&sc->sc_idle_timeout))
1565 o2net_sc_reset_idle_timer(sc); 1586 o2net_sc_reset_idle_timer(sc);
@@ -1650,6 +1671,12 @@ static void o2net_start_connect(struct work_struct *work)
1650 goto out; 1671 goto out;
1651 } 1672 }
1652 1673
1674 ret = o2net_set_usertimeout(sock);
1675 if (ret) {
1676 mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
1677 goto out;
1678 }
1679
1653 o2net_register_callbacks(sc->sc_sock->sk, sc); 1680 o2net_register_callbacks(sc->sc_sock->sk, sc);
1654 1681
1655 spin_lock(&nn->nn_lock); 1682 spin_lock(&nn->nn_lock);
@@ -1831,6 +1858,12 @@ static int o2net_accept_one(struct socket *sock, int *more)
1831 goto out; 1858 goto out;
1832 } 1859 }
1833 1860
1861 ret = o2net_set_usertimeout(new_sock);
1862 if (ret) {
1863 mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
1864 goto out;
1865 }
1866
1834 slen = sizeof(sin); 1867 slen = sizeof(sin);
1835 ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1868 ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
1836 &slen, 1); 1869 &slen, 1);
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 5bada2a69b50..c571e849fda4 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -63,6 +63,7 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data,
63#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000 63#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000
64#define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000 64#define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000
65 65
66#define O2NET_TCP_USER_TIMEOUT 0x7fffffff
66 67
67/* TODO: figure this out.... */ 68/* TODO: figure this out.... */
68static inline int o2net_link_down(int err, struct socket *sock) 69static inline int o2net_link_down(int err, struct socket *sock)
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 6f66b3751ace..53e6c40ed4c6 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -35,9 +35,8 @@
35 copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) 35 copy_to_user((typeof(a) __user *)b, &(a), sizeof(a))
36 36
37/* 37/*
38 * This call is void because we are already reporting an error that may 38 * This is just a best-effort to tell userspace that this request
39 * be -EFAULT. The error will be returned from the ioctl(2) call. It's 39 * caused the error.
40 * just a best-effort to tell userspace that this request caused the error.
41 */ 40 */
42static inline void o2info_set_request_error(struct ocfs2_info_request *kreq, 41static inline void o2info_set_request_error(struct ocfs2_info_request *kreq,
43 struct ocfs2_info_request __user *req) 42 struct ocfs2_info_request __user *req)
@@ -146,136 +145,105 @@ bail:
146static int ocfs2_info_handle_blocksize(struct inode *inode, 145static int ocfs2_info_handle_blocksize(struct inode *inode,
147 struct ocfs2_info_request __user *req) 146 struct ocfs2_info_request __user *req)
148{ 147{
149 int status = -EFAULT;
150 struct ocfs2_info_blocksize oib; 148 struct ocfs2_info_blocksize oib;
151 149
152 if (o2info_from_user(oib, req)) 150 if (o2info_from_user(oib, req))
153 goto bail; 151 return -EFAULT;
154 152
155 oib.ib_blocksize = inode->i_sb->s_blocksize; 153 oib.ib_blocksize = inode->i_sb->s_blocksize;
156 154
157 o2info_set_request_filled(&oib.ib_req); 155 o2info_set_request_filled(&oib.ib_req);
158 156
159 if (o2info_to_user(oib, req)) 157 if (o2info_to_user(oib, req))
160 goto bail; 158 return -EFAULT;
161
162 status = 0;
163bail:
164 if (status)
165 o2info_set_request_error(&oib.ib_req, req);
166 159
167 return status; 160 return 0;
168} 161}
169 162
170static int ocfs2_info_handle_clustersize(struct inode *inode, 163static int ocfs2_info_handle_clustersize(struct inode *inode,
171 struct ocfs2_info_request __user *req) 164 struct ocfs2_info_request __user *req)
172{ 165{
173 int status = -EFAULT;
174 struct ocfs2_info_clustersize oic; 166 struct ocfs2_info_clustersize oic;
175 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 167 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
176 168
177 if (o2info_from_user(oic, req)) 169 if (o2info_from_user(oic, req))
178 goto bail; 170 return -EFAULT;
179 171
180 oic.ic_clustersize = osb->s_clustersize; 172 oic.ic_clustersize = osb->s_clustersize;
181 173
182 o2info_set_request_filled(&oic.ic_req); 174 o2info_set_request_filled(&oic.ic_req);
183 175
184 if (o2info_to_user(oic, req)) 176 if (o2info_to_user(oic, req))
185 goto bail; 177 return -EFAULT;
186
187 status = 0;
188bail:
189 if (status)
190 o2info_set_request_error(&oic.ic_req, req);
191 178
192 return status; 179 return 0;
193} 180}
194 181
195static int ocfs2_info_handle_maxslots(struct inode *inode, 182static int ocfs2_info_handle_maxslots(struct inode *inode,
196 struct ocfs2_info_request __user *req) 183 struct ocfs2_info_request __user *req)
197{ 184{
198 int status = -EFAULT;
199 struct ocfs2_info_maxslots oim; 185 struct ocfs2_info_maxslots oim;
200 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 186 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
201 187
202 if (o2info_from_user(oim, req)) 188 if (o2info_from_user(oim, req))
203 goto bail; 189 return -EFAULT;
204 190
205 oim.im_max_slots = osb->max_slots; 191 oim.im_max_slots = osb->max_slots;
206 192
207 o2info_set_request_filled(&oim.im_req); 193 o2info_set_request_filled(&oim.im_req);
208 194
209 if (o2info_to_user(oim, req)) 195 if (o2info_to_user(oim, req))
210 goto bail; 196 return -EFAULT;
211 197
212 status = 0; 198 return 0;
213bail:
214 if (status)
215 o2info_set_request_error(&oim.im_req, req);
216
217 return status;
218} 199}
219 200
220static int ocfs2_info_handle_label(struct inode *inode, 201static int ocfs2_info_handle_label(struct inode *inode,
221 struct ocfs2_info_request __user *req) 202 struct ocfs2_info_request __user *req)
222{ 203{
223 int status = -EFAULT;
224 struct ocfs2_info_label oil; 204 struct ocfs2_info_label oil;
225 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 205 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
226 206
227 if (o2info_from_user(oil, req)) 207 if (o2info_from_user(oil, req))
228 goto bail; 208 return -EFAULT;
229 209
230 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); 210 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
231 211
232 o2info_set_request_filled(&oil.il_req); 212 o2info_set_request_filled(&oil.il_req);
233 213
234 if (o2info_to_user(oil, req)) 214 if (o2info_to_user(oil, req))
235 goto bail; 215 return -EFAULT;
236 216
237 status = 0; 217 return 0;
238bail:
239 if (status)
240 o2info_set_request_error(&oil.il_req, req);
241
242 return status;
243} 218}
244 219
245static int ocfs2_info_handle_uuid(struct inode *inode, 220static int ocfs2_info_handle_uuid(struct inode *inode,
246 struct ocfs2_info_request __user *req) 221 struct ocfs2_info_request __user *req)
247{ 222{
248 int status = -EFAULT;
249 struct ocfs2_info_uuid oiu; 223 struct ocfs2_info_uuid oiu;
250 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 224 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
251 225
252 if (o2info_from_user(oiu, req)) 226 if (o2info_from_user(oiu, req))
253 goto bail; 227 return -EFAULT;
254 228
255 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); 229 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
256 230
257 o2info_set_request_filled(&oiu.iu_req); 231 o2info_set_request_filled(&oiu.iu_req);
258 232
259 if (o2info_to_user(oiu, req)) 233 if (o2info_to_user(oiu, req))
260 goto bail; 234 return -EFAULT;
261
262 status = 0;
263bail:
264 if (status)
265 o2info_set_request_error(&oiu.iu_req, req);
266 235
267 return status; 236 return 0;
268} 237}
269 238
270static int ocfs2_info_handle_fs_features(struct inode *inode, 239static int ocfs2_info_handle_fs_features(struct inode *inode,
271 struct ocfs2_info_request __user *req) 240 struct ocfs2_info_request __user *req)
272{ 241{
273 int status = -EFAULT;
274 struct ocfs2_info_fs_features oif; 242 struct ocfs2_info_fs_features oif;
275 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 243 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
276 244
277 if (o2info_from_user(oif, req)) 245 if (o2info_from_user(oif, req))
278 goto bail; 246 return -EFAULT;
279 247
280 oif.if_compat_features = osb->s_feature_compat; 248 oif.if_compat_features = osb->s_feature_compat;
281 oif.if_incompat_features = osb->s_feature_incompat; 249 oif.if_incompat_features = osb->s_feature_incompat;
@@ -284,39 +252,28 @@ static int ocfs2_info_handle_fs_features(struct inode *inode,
284 o2info_set_request_filled(&oif.if_req); 252 o2info_set_request_filled(&oif.if_req);
285 253
286 if (o2info_to_user(oif, req)) 254 if (o2info_to_user(oif, req))
287 goto bail; 255 return -EFAULT;
288 256
289 status = 0; 257 return 0;
290bail:
291 if (status)
292 o2info_set_request_error(&oif.if_req, req);
293
294 return status;
295} 258}
296 259
297static int ocfs2_info_handle_journal_size(struct inode *inode, 260static int ocfs2_info_handle_journal_size(struct inode *inode,
298 struct ocfs2_info_request __user *req) 261 struct ocfs2_info_request __user *req)
299{ 262{
300 int status = -EFAULT;
301 struct ocfs2_info_journal_size oij; 263 struct ocfs2_info_journal_size oij;
302 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 264 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
303 265
304 if (o2info_from_user(oij, req)) 266 if (o2info_from_user(oij, req))
305 goto bail; 267 return -EFAULT;
306 268
307 oij.ij_journal_size = i_size_read(osb->journal->j_inode); 269 oij.ij_journal_size = i_size_read(osb->journal->j_inode);
308 270
309 o2info_set_request_filled(&oij.ij_req); 271 o2info_set_request_filled(&oij.ij_req);
310 272
311 if (o2info_to_user(oij, req)) 273 if (o2info_to_user(oij, req))
312 goto bail; 274 return -EFAULT;
313 275
314 status = 0; 276 return 0;
315bail:
316 if (status)
317 o2info_set_request_error(&oij.ij_req, req);
318
319 return status;
320} 277}
321 278
322static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, 279static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
@@ -373,7 +330,7 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
373 u32 i; 330 u32 i;
374 u64 blkno = -1; 331 u64 blkno = -1;
375 char namebuf[40]; 332 char namebuf[40];
376 int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE; 333 int status, type = INODE_ALLOC_SYSTEM_INODE;
377 struct ocfs2_info_freeinode *oifi = NULL; 334 struct ocfs2_info_freeinode *oifi = NULL;
378 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 335 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
379 struct inode *inode_alloc = NULL; 336 struct inode *inode_alloc = NULL;
@@ -385,8 +342,10 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
385 goto out_err; 342 goto out_err;
386 } 343 }
387 344
388 if (o2info_from_user(*oifi, req)) 345 if (o2info_from_user(*oifi, req)) {
389 goto bail; 346 status = -EFAULT;
347 goto out_free;
348 }
390 349
391 oifi->ifi_slotnum = osb->max_slots; 350 oifi->ifi_slotnum = osb->max_slots;
392 351
@@ -424,14 +383,16 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
424 383
425 o2info_set_request_filled(&oifi->ifi_req); 384 o2info_set_request_filled(&oifi->ifi_req);
426 385
427 if (o2info_to_user(*oifi, req)) 386 if (o2info_to_user(*oifi, req)) {
428 goto bail; 387 status = -EFAULT;
388 goto out_free;
389 }
429 390
430 status = 0; 391 status = 0;
431bail: 392bail:
432 if (status) 393 if (status)
433 o2info_set_request_error(&oifi->ifi_req, req); 394 o2info_set_request_error(&oifi->ifi_req, req);
434 395out_free:
435 kfree(oifi); 396 kfree(oifi);
436out_err: 397out_err:
437 return status; 398 return status;
@@ -658,7 +619,7 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
658{ 619{
659 u64 blkno = -1; 620 u64 blkno = -1;
660 char namebuf[40]; 621 char namebuf[40];
661 int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE; 622 int status, type = GLOBAL_BITMAP_SYSTEM_INODE;
662 623
663 struct ocfs2_info_freefrag *oiff; 624 struct ocfs2_info_freefrag *oiff;
664 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 625 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -671,8 +632,10 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
671 goto out_err; 632 goto out_err;
672 } 633 }
673 634
674 if (o2info_from_user(*oiff, req)) 635 if (o2info_from_user(*oiff, req)) {
675 goto bail; 636 status = -EFAULT;
637 goto out_free;
638 }
676 /* 639 /*
677 * chunksize from userspace should be power of 2. 640 * chunksize from userspace should be power of 2.
678 */ 641 */
@@ -711,14 +674,14 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
711 674
712 if (o2info_to_user(*oiff, req)) { 675 if (o2info_to_user(*oiff, req)) {
713 status = -EFAULT; 676 status = -EFAULT;
714 goto bail; 677 goto out_free;
715 } 678 }
716 679
717 status = 0; 680 status = 0;
718bail: 681bail:
719 if (status) 682 if (status)
720 o2info_set_request_error(&oiff->iff_req, req); 683 o2info_set_request_error(&oiff->iff_req, req);
721 684out_free:
722 kfree(oiff); 685 kfree(oiff);
723out_err: 686out_err:
724 return status; 687 return status;
@@ -727,23 +690,17 @@ out_err:
727static int ocfs2_info_handle_unknown(struct inode *inode, 690static int ocfs2_info_handle_unknown(struct inode *inode,
728 struct ocfs2_info_request __user *req) 691 struct ocfs2_info_request __user *req)
729{ 692{
730 int status = -EFAULT;
731 struct ocfs2_info_request oir; 693 struct ocfs2_info_request oir;
732 694
733 if (o2info_from_user(oir, req)) 695 if (o2info_from_user(oir, req))
734 goto bail; 696 return -EFAULT;
735 697
736 o2info_clear_request_filled(&oir); 698 o2info_clear_request_filled(&oir);
737 699
738 if (o2info_to_user(oir, req)) 700 if (o2info_to_user(oir, req))
739 goto bail; 701 return -EFAULT;
740 702
741 status = 0; 703 return 0;
742bail:
743 if (status)
744 o2info_set_request_error(&oir, req);
745
746 return status;
747} 704}
748 705
749/* 706/*
diff --git a/fs/sync.c b/fs/sync.c
index b28d1dd10e8b..bdc729d80e5e 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -65,7 +65,7 @@ int sync_filesystem(struct super_block *sb)
65 return ret; 65 return ret;
66 return __sync_filesystem(sb, 1); 66 return __sync_filesystem(sb, 1);
67} 67}
68EXPORT_SYMBOL_GPL(sync_filesystem); 68EXPORT_SYMBOL(sync_filesystem);
69 69
70static void sync_inodes_one_sb(struct super_block *sb, void *arg) 70static void sync_inodes_one_sb(struct super_block *sb, void *arg)
71{ 71{
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index de2d26d32844..86df952d3e24 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5424,7 +5424,7 @@ xfs_bmap_shift_extents(
5424 struct xfs_bmap_free *flist, 5424 struct xfs_bmap_free *flist,
5425 int num_exts) 5425 int num_exts)
5426{ 5426{
5427 struct xfs_btree_cur *cur; 5427 struct xfs_btree_cur *cur = NULL;
5428 struct xfs_bmbt_rec_host *gotp; 5428 struct xfs_bmbt_rec_host *gotp;
5429 struct xfs_bmbt_irec got; 5429 struct xfs_bmbt_irec got;
5430 struct xfs_bmbt_irec left; 5430 struct xfs_bmbt_irec left;
@@ -5435,7 +5435,7 @@ xfs_bmap_shift_extents(
5435 int error = 0; 5435 int error = 0;
5436 int i; 5436 int i;
5437 int whichfork = XFS_DATA_FORK; 5437 int whichfork = XFS_DATA_FORK;
5438 int logflags; 5438 int logflags = 0;
5439 xfs_filblks_t blockcount = 0; 5439 xfs_filblks_t blockcount = 0;
5440 int total_extents; 5440 int total_extents;
5441 5441
@@ -5478,16 +5478,11 @@ xfs_bmap_shift_extents(
5478 } 5478 }
5479 } 5479 }
5480 5480
5481 /* We are going to change core inode */
5482 logflags = XFS_ILOG_CORE;
5483 if (ifp->if_flags & XFS_IFBROOT) { 5481 if (ifp->if_flags & XFS_IFBROOT) {
5484 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5482 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5485 cur->bc_private.b.firstblock = *firstblock; 5483 cur->bc_private.b.firstblock = *firstblock;
5486 cur->bc_private.b.flist = flist; 5484 cur->bc_private.b.flist = flist;
5487 cur->bc_private.b.flags = 0; 5485 cur->bc_private.b.flags = 0;
5488 } else {
5489 cur = NULL;
5490 logflags |= XFS_ILOG_DEXT;
5491 } 5486 }
5492 5487
5493 /* 5488 /*
@@ -5545,11 +5540,14 @@ xfs_bmap_shift_extents(
5545 blockcount = left.br_blockcount + 5540 blockcount = left.br_blockcount +
5546 got.br_blockcount; 5541 got.br_blockcount;
5547 xfs_iext_remove(ip, *current_ext, 1, 0); 5542 xfs_iext_remove(ip, *current_ext, 1, 0);
5543 logflags |= XFS_ILOG_CORE;
5548 if (cur) { 5544 if (cur) {
5549 error = xfs_btree_delete(cur, &i); 5545 error = xfs_btree_delete(cur, &i);
5550 if (error) 5546 if (error)
5551 goto del_cursor; 5547 goto del_cursor;
5552 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); 5548 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5549 } else {
5550 logflags |= XFS_ILOG_DEXT;
5553 } 5551 }
5554 XFS_IFORK_NEXT_SET(ip, whichfork, 5552 XFS_IFORK_NEXT_SET(ip, whichfork,
5555 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 5553 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
@@ -5575,6 +5573,7 @@ xfs_bmap_shift_extents(
5575 got.br_startoff = startoff; 5573 got.br_startoff = startoff;
5576 } 5574 }
5577 5575
5576 logflags |= XFS_ILOG_CORE;
5578 if (cur) { 5577 if (cur) {
5579 error = xfs_bmbt_update(cur, got.br_startoff, 5578 error = xfs_bmbt_update(cur, got.br_startoff,
5580 got.br_startblock, 5579 got.br_startblock,
@@ -5582,6 +5581,8 @@ xfs_bmap_shift_extents(
5582 got.br_state); 5581 got.br_state);
5583 if (error) 5582 if (error)
5584 goto del_cursor; 5583 goto del_cursor;
5584 } else {
5585 logflags |= XFS_ILOG_DEXT;
5585 } 5586 }
5586 5587
5587 (*current_ext)++; 5588 (*current_ext)++;
@@ -5597,6 +5598,7 @@ del_cursor:
5597 xfs_btree_del_cursor(cur, 5598 xfs_btree_del_cursor(cur,
5598 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5599 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5599 5600
5600 xfs_trans_log_inode(tp, ip, logflags); 5601 if (logflags)
5602 xfs_trans_log_inode(tp, ip, logflags);
5601 return error; 5603 return error;
5602} 5604}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 11e9b4caa54f..b984647c24db 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1753,11 +1753,72 @@ xfs_vm_readpages(
1753 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1753 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1754} 1754}
1755 1755
1756/*
1757 * This is basically a copy of __set_page_dirty_buffers() with one
1758 * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
1759 * dirty, we'll never be able to clean them because we don't write buffers
1760 * beyond EOF, and that means we can't invalidate pages that span EOF
1761 * that have been marked dirty. Further, the dirty state can leak into
1762 * the file interior if the file is extended, resulting in all sorts of
1763 * bad things happening as the state does not match the underlying data.
1764 *
1765 * XXX: this really indicates that bufferheads in XFS need to die. Warts like
1766 * this only exist because of bufferheads and how the generic code manages them.
1767 */
1768STATIC int
1769xfs_vm_set_page_dirty(
1770 struct page *page)
1771{
1772 struct address_space *mapping = page->mapping;
1773 struct inode *inode = mapping->host;
1774 loff_t end_offset;
1775 loff_t offset;
1776 int newly_dirty;
1777
1778 if (unlikely(!mapping))
1779 return !TestSetPageDirty(page);
1780
1781 end_offset = i_size_read(inode);
1782 offset = page_offset(page);
1783
1784 spin_lock(&mapping->private_lock);
1785 if (page_has_buffers(page)) {
1786 struct buffer_head *head = page_buffers(page);
1787 struct buffer_head *bh = head;
1788
1789 do {
1790 if (offset < end_offset)
1791 set_buffer_dirty(bh);
1792 bh = bh->b_this_page;
1793 offset += 1 << inode->i_blkbits;
1794 } while (bh != head);
1795 }
1796 newly_dirty = !TestSetPageDirty(page);
1797 spin_unlock(&mapping->private_lock);
1798
1799 if (newly_dirty) {
1800 /* sigh - __set_page_dirty() is static, so copy it here, too */
1801 unsigned long flags;
1802
1803 spin_lock_irqsave(&mapping->tree_lock, flags);
1804 if (page->mapping) { /* Race with truncate? */
1805 WARN_ON_ONCE(!PageUptodate(page));
1806 account_page_dirtied(page, mapping);
1807 radix_tree_tag_set(&mapping->page_tree,
1808 page_index(page), PAGECACHE_TAG_DIRTY);
1809 }
1810 spin_unlock_irqrestore(&mapping->tree_lock, flags);
1811 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1812 }
1813 return newly_dirty;
1814}
1815
1756const struct address_space_operations xfs_address_space_operations = { 1816const struct address_space_operations xfs_address_space_operations = {
1757 .readpage = xfs_vm_readpage, 1817 .readpage = xfs_vm_readpage,
1758 .readpages = xfs_vm_readpages, 1818 .readpages = xfs_vm_readpages,
1759 .writepage = xfs_vm_writepage, 1819 .writepage = xfs_vm_writepage,
1760 .writepages = xfs_vm_writepages, 1820 .writepages = xfs_vm_writepages,
1821 .set_page_dirty = xfs_vm_set_page_dirty,
1761 .releasepage = xfs_vm_releasepage, 1822 .releasepage = xfs_vm_releasepage,
1762 .invalidatepage = xfs_vm_invalidatepage, 1823 .invalidatepage = xfs_vm_invalidatepage,
1763 .write_begin = xfs_vm_write_begin, 1824 .write_begin = xfs_vm_write_begin,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2f1e30d39a35..1707980f9a4b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1470,6 +1470,26 @@ xfs_collapse_file_space(
1470 start_fsb = XFS_B_TO_FSB(mp, offset + len); 1470 start_fsb = XFS_B_TO_FSB(mp, offset + len);
1471 shift_fsb = XFS_B_TO_FSB(mp, len); 1471 shift_fsb = XFS_B_TO_FSB(mp, len);
1472 1472
1473 /*
1474 * Writeback the entire file and force remove any post-eof blocks. The
1475 * writeback prevents changes to the extent list via concurrent
1476 * writeback and the eofblocks trim prevents the extent shift algorithm
1477 * from running into a post-eof delalloc extent.
1478 *
1479 * XXX: This is a temporary fix until the extent shift loop below is
1480 * converted to use offsets and lookups within the ILOCK rather than
1481 * carrying around the index into the extent list for the next
1482 * iteration.
1483 */
1484 error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
1485 if (error)
1486 return error;
1487 if (xfs_can_free_eofblocks(ip, true)) {
1488 error = xfs_free_eofblocks(mp, ip, false);
1489 if (error)
1490 return error;
1491 }
1492
1473 error = xfs_free_file_space(ip, offset, len); 1493 error = xfs_free_file_space(ip, offset, len);
1474 if (error) 1494 if (error)
1475 return error; 1495 return error;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 076b1708d134..de5368c803f9 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -291,12 +291,22 @@ xfs_file_read_iter(
291 if (inode->i_mapping->nrpages) { 291 if (inode->i_mapping->nrpages) {
292 ret = filemap_write_and_wait_range( 292 ret = filemap_write_and_wait_range(
293 VFS_I(ip)->i_mapping, 293 VFS_I(ip)->i_mapping,
294 pos, -1); 294 pos, pos + size - 1);
295 if (ret) { 295 if (ret) {
296 xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); 296 xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
297 return ret; 297 return ret;
298 } 298 }
299 truncate_pagecache_range(VFS_I(ip), pos, -1); 299
300 /*
301 * Invalidate whole pages. This can return an error if
302 * we fail to invalidate a page, but this should never
303 * happen on XFS. Warn if it does fail.
304 */
305 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
306 pos >> PAGE_CACHE_SHIFT,
307 (pos + size - 1) >> PAGE_CACHE_SHIFT);
308 WARN_ON_ONCE(ret);
309 ret = 0;
300 } 310 }
301 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); 311 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
302 } 312 }
@@ -632,10 +642,19 @@ xfs_file_dio_aio_write(
632 642
633 if (mapping->nrpages) { 643 if (mapping->nrpages) {
634 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 644 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
635 pos, -1); 645 pos, pos + count - 1);
636 if (ret) 646 if (ret)
637 goto out; 647 goto out;
638 truncate_pagecache_range(VFS_I(ip), pos, -1); 648 /*
649 * Invalidate whole pages. This can return an error if
650 * we fail to invalidate a page, but this should never
651 * happen on XFS. Warn if it does fail.
652 */
653 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
654 pos >> PAGE_CACHE_SHIFT,
655 (pos + count - 1) >> PAGE_CACHE_SHIFT);
656 WARN_ON_ONCE(ret);
657 ret = 0;
639 } 658 }
640 659
641 /* 660 /*