aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c90
-rw-r--r--fs/btrfs/async-thread.c44
-rw-r--r--fs/btrfs/async-thread.h28
-rw-r--r--fs/btrfs/btrfs_inode.h13
-rw-r--r--fs/btrfs/delayed-inode.c4
-rw-r--r--fs/btrfs/disk-io.c56
-rw-r--r--fs/btrfs/extent-tree.c23
-rw-r--r--fs/btrfs/extent_io.c5
-rw-r--r--fs/btrfs/file.c19
-rw-r--r--fs/btrfs/inode.c300
-rw-r--r--fs/btrfs/ioctl.c68
-rw-r--r--fs/btrfs/ordered-data.c1
-rw-r--r--fs/btrfs/qgroup.c3
-rw-r--r--fs/btrfs/raid56.c9
-rw-r--r--fs/btrfs/reada.c3
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/tree-log.c80
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/volumes.c64
-rw-r--r--fs/cifs/Kconfig35
-rw-r--r--fs/cifs/cifsfs.c24
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h10
-rw-r--r--fs/cifs/cifspdu.h23
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/dir.c8
-rw-r--r--fs/cifs/file.c10
-rw-r--r--fs/cifs/inode.c11
-rw-r--r--fs/cifs/link.c12
-rw-r--r--fs/cifs/misc.c7
-rw-r--r--fs/cifs/netmisc.c20
-rw-r--r--fs/cifs/readdir.c6
-rw-r--r--fs/cifs/sess.c24
-rw-r--r--fs/cifs/smb1ops.c7
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/cifs/smb2inode.c2
-rw-r--r--fs/cifs/smb2maperror.c4
-rw-r--r--fs/cifs/smb2misc.c17
-rw-r--r--fs/cifs/smb2ops.c172
-rw-r--r--fs/cifs/smb2pdu.c23
-rw-r--r--fs/cifs/smb2pdu.h6
-rw-r--r--fs/cifs/smbfsctl.h2
-rw-r--r--fs/dcache.c11
-rw-r--r--fs/eventpoll.c3
-rw-r--r--fs/ext3/super.c5
-rw-r--r--fs/ext4/ext4.h18
-rw-r--r--fs/ext4/extents.c88
-rw-r--r--fs/ext4/inode.c44
-rw-r--r--fs/ext4/mballoc.c5
-rw-r--r--fs/ext4/namei.c58
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c5
-rw-r--r--fs/f2fs/Kconfig4
-rw-r--r--fs/f2fs/checkpoint.c80
-rw-r--r--fs/f2fs/data.c19
-rw-r--r--fs/f2fs/debug.c4
-rw-r--r--fs/f2fs/dir.c6
-rw-r--r--fs/f2fs/f2fs.h26
-rw-r--r--fs/f2fs/file.c60
-rw-r--r--fs/f2fs/gc.c8
-rw-r--r--fs/f2fs/gc.h2
-rw-r--r--fs/f2fs/hash.c7
-rw-r--r--fs/f2fs/inline.c38
-rw-r--r--fs/f2fs/namei.c23
-rw-r--r--fs/f2fs/node.c80
-rw-r--r--fs/f2fs/recovery.c30
-rw-r--r--fs/f2fs/segment.c53
-rw-r--r--fs/f2fs/segment.h2
-rw-r--r--fs/f2fs/super.c32
-rw-r--r--fs/f2fs/xattr.c2
-rw-r--r--fs/gfs2/bmap.c9
-rw-r--r--fs/gfs2/file.c15
-rw-r--r--fs/gfs2/incore.h7
-rw-r--r--fs/gfs2/inode.c9
-rw-r--r--fs/gfs2/super.c20
-rw-r--r--fs/isofs/inode.c15
-rw-r--r--fs/isofs/isofs.h23
-rw-r--r--fs/isofs/rock.c39
-rw-r--r--fs/jbd2/commit.c21
-rw-r--r--fs/jbd2/journal.c56
-rw-r--r--fs/jbd2/recovery.c33
-rw-r--r--fs/jbd2/revoke.c6
-rw-r--r--fs/lockd/svc.c4
-rw-r--r--fs/locks.c2
-rw-r--r--fs/namei.c96
-rw-r--r--fs/namespace.c10
-rw-r--r--fs/nfs/client.c12
-rw-r--r--fs/nfs/filelayout/filelayout.c5
-rw-r--r--fs/nfs/nfs3acl.c5
-rw-r--r--fs/nfs/nfs4_fs.h13
-rw-r--r--fs/nfs/nfs4client.c38
-rw-r--r--fs/nfs/nfs4proc.c50
-rw-r--r--fs/nfs/nfs4state.c24
-rw-r--r--fs/nfs/pagelist.c84
-rw-r--r--fs/nfs/write.c21
-rw-r--r--fs/nfsd/nfs4xdr.c14
-rw-r--r--fs/notify/fdinfo.c4
-rw-r--r--fs/ocfs2/cluster/quorum.c13
-rw-r--r--fs/ocfs2/cluster/tcp.c45
-rw-r--r--fs/ocfs2/cluster/tcp.h1
-rw-r--r--fs/ocfs2/ioctl.c129
-rw-r--r--fs/pnode.c1
-rw-r--r--fs/sync.c2
-rw-r--r--fs/udf/ialloc.c28
-rw-r--r--fs/udf/inode.c161
-rw-r--r--fs/udf/namei.c156
-rw-r--r--fs/udf/super.c69
-rw-r--r--fs/udf/udfdecl.h3
-rw-r--r--fs/ufs/inode.c7
-rw-r--r--fs/ufs/namei.c14
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c18
-rw-r--r--fs/xfs/xfs_aops.c61
-rw-r--r--fs/xfs/xfs_bmap_util.c20
-rw-r--r--fs/xfs/xfs_file.c27
115 files changed, 2148 insertions, 1225 deletions
diff --git a/fs/aio.c b/fs/aio.c
index ae635872affb..733750096b71 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -141,6 +141,7 @@ struct kioctx {
141 141
142 struct { 142 struct {
143 unsigned tail; 143 unsigned tail;
144 unsigned completed_events;
144 spinlock_t completion_lock; 145 spinlock_t completion_lock;
145 } ____cacheline_aligned_in_smp; 146 } ____cacheline_aligned_in_smp;
146 147
@@ -792,6 +793,8 @@ void exit_aio(struct mm_struct *mm)
792 793
793 for (i = 0; i < table->nr; ++i) { 794 for (i = 0; i < table->nr; ++i) {
794 struct kioctx *ctx = table->table[i]; 795 struct kioctx *ctx = table->table[i];
796 struct completion requests_done =
797 COMPLETION_INITIALIZER_ONSTACK(requests_done);
795 798
796 if (!ctx) 799 if (!ctx)
797 continue; 800 continue;
@@ -803,7 +806,10 @@ void exit_aio(struct mm_struct *mm)
803 * that it needs to unmap the area, just set it to 0. 806 * that it needs to unmap the area, just set it to 0.
804 */ 807 */
805 ctx->mmap_size = 0; 808 ctx->mmap_size = 0;
806 kill_ioctx(mm, ctx, NULL); 809 kill_ioctx(mm, ctx, &requests_done);
810
811 /* Wait until all IO for the context are done. */
812 wait_for_completion(&requests_done);
807 } 813 }
808 814
809 RCU_INIT_POINTER(mm->ioctx_table, NULL); 815 RCU_INIT_POINTER(mm->ioctx_table, NULL);
@@ -857,6 +863,68 @@ out:
857 return ret; 863 return ret;
858} 864}
859 865
866/* refill_reqs_available
867 * Updates the reqs_available reference counts used for tracking the
868 * number of free slots in the completion ring. This can be called
869 * from aio_complete() (to optimistically update reqs_available) or
870 * from aio_get_req() (the we're out of events case). It must be
871 * called holding ctx->completion_lock.
872 */
873static void refill_reqs_available(struct kioctx *ctx, unsigned head,
874 unsigned tail)
875{
876 unsigned events_in_ring, completed;
877
878 /* Clamp head since userland can write to it. */
879 head %= ctx->nr_events;
880 if (head <= tail)
881 events_in_ring = tail - head;
882 else
883 events_in_ring = ctx->nr_events - (head - tail);
884
885 completed = ctx->completed_events;
886 if (events_in_ring < completed)
887 completed -= events_in_ring;
888 else
889 completed = 0;
890
891 if (!completed)
892 return;
893
894 ctx->completed_events -= completed;
895 put_reqs_available(ctx, completed);
896}
897
898/* user_refill_reqs_available
899 * Called to refill reqs_available when aio_get_req() encounters an
900 * out of space in the completion ring.
901 */
902static void user_refill_reqs_available(struct kioctx *ctx)
903{
904 spin_lock_irq(&ctx->completion_lock);
905 if (ctx->completed_events) {
906 struct aio_ring *ring;
907 unsigned head;
908
909 /* Access of ring->head may race with aio_read_events_ring()
910 * here, but that's okay since whether we read the old version
911 * or the new version, and either will be valid. The important
912 * part is that head cannot pass tail since we prevent
913 * aio_complete() from updating tail by holding
914 * ctx->completion_lock. Even if head is invalid, the check
915 * against ctx->completed_events below will make sure we do the
916 * safe/right thing.
917 */
918 ring = kmap_atomic(ctx->ring_pages[0]);
919 head = ring->head;
920 kunmap_atomic(ring);
921
922 refill_reqs_available(ctx, head, ctx->tail);
923 }
924
925 spin_unlock_irq(&ctx->completion_lock);
926}
927
860/* aio_get_req 928/* aio_get_req
861 * Allocate a slot for an aio request. 929 * Allocate a slot for an aio request.
862 * Returns NULL if no requests are free. 930 * Returns NULL if no requests are free.
@@ -865,8 +933,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx)
865{ 933{
866 struct kiocb *req; 934 struct kiocb *req;
867 935
868 if (!get_reqs_available(ctx)) 936 if (!get_reqs_available(ctx)) {
869 return NULL; 937 user_refill_reqs_available(ctx);
938 if (!get_reqs_available(ctx))
939 return NULL;
940 }
870 941
871 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); 942 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
872 if (unlikely(!req)) 943 if (unlikely(!req))
@@ -925,8 +996,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
925 struct kioctx *ctx = iocb->ki_ctx; 996 struct kioctx *ctx = iocb->ki_ctx;
926 struct aio_ring *ring; 997 struct aio_ring *ring;
927 struct io_event *ev_page, *event; 998 struct io_event *ev_page, *event;
999 unsigned tail, pos, head;
928 unsigned long flags; 1000 unsigned long flags;
929 unsigned tail, pos;
930 1001
931 /* 1002 /*
932 * Special case handling for sync iocbs: 1003 * Special case handling for sync iocbs:
@@ -987,10 +1058,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
987 ctx->tail = tail; 1058 ctx->tail = tail;
988 1059
989 ring = kmap_atomic(ctx->ring_pages[0]); 1060 ring = kmap_atomic(ctx->ring_pages[0]);
1061 head = ring->head;
990 ring->tail = tail; 1062 ring->tail = tail;
991 kunmap_atomic(ring); 1063 kunmap_atomic(ring);
992 flush_dcache_page(ctx->ring_pages[0]); 1064 flush_dcache_page(ctx->ring_pages[0]);
993 1065
1066 ctx->completed_events++;
1067 if (ctx->completed_events > 1)
1068 refill_reqs_available(ctx, head, tail);
994 spin_unlock_irqrestore(&ctx->completion_lock, flags); 1069 spin_unlock_irqrestore(&ctx->completion_lock, flags);
995 1070
996 pr_debug("added to ring %p at [%u]\n", iocb, tail); 1071 pr_debug("added to ring %p at [%u]\n", iocb, tail);
@@ -1005,7 +1080,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1005 1080
1006 /* everything turned out well, dispose of the aiocb. */ 1081 /* everything turned out well, dispose of the aiocb. */
1007 kiocb_free(iocb); 1082 kiocb_free(iocb);
1008 put_reqs_available(ctx, 1);
1009 1083
1010 /* 1084 /*
1011 * We have to order our ring_info tail store above and test 1085 * We have to order our ring_info tail store above and test
@@ -1042,6 +1116,12 @@ static long aio_read_events_ring(struct kioctx *ctx,
1042 tail = ring->tail; 1116 tail = ring->tail;
1043 kunmap_atomic(ring); 1117 kunmap_atomic(ring);
1044 1118
1119 /*
1120 * Ensure that once we've read the current tail pointer, that
1121 * we also see the events that were stored up to the tail.
1122 */
1123 smp_rmb();
1124
1045 pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); 1125 pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events);
1046 1126
1047 if (head == tail) 1127 if (head == tail)
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 5a201d81049c..fbd76ded9a34 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -22,7 +22,6 @@
22#include <linux/list.h> 22#include <linux/list.h>
23#include <linux/spinlock.h> 23#include <linux/spinlock.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <linux/workqueue.h>
26#include "async-thread.h" 25#include "async-thread.h"
27#include "ctree.h" 26#include "ctree.h"
28 27
@@ -55,8 +54,39 @@ struct btrfs_workqueue {
55 struct __btrfs_workqueue *high; 54 struct __btrfs_workqueue *high;
56}; 55};
57 56
58static inline struct __btrfs_workqueue 57static void normal_work_helper(struct btrfs_work *work);
59*__btrfs_alloc_workqueue(const char *name, int flags, int max_active, 58
59#define BTRFS_WORK_HELPER(name) \
60void btrfs_##name(struct work_struct *arg) \
61{ \
62 struct btrfs_work *work = container_of(arg, struct btrfs_work, \
63 normal_work); \
64 normal_work_helper(work); \
65}
66
67BTRFS_WORK_HELPER(worker_helper);
68BTRFS_WORK_HELPER(delalloc_helper);
69BTRFS_WORK_HELPER(flush_delalloc_helper);
70BTRFS_WORK_HELPER(cache_helper);
71BTRFS_WORK_HELPER(submit_helper);
72BTRFS_WORK_HELPER(fixup_helper);
73BTRFS_WORK_HELPER(endio_helper);
74BTRFS_WORK_HELPER(endio_meta_helper);
75BTRFS_WORK_HELPER(endio_meta_write_helper);
76BTRFS_WORK_HELPER(endio_raid56_helper);
77BTRFS_WORK_HELPER(rmw_helper);
78BTRFS_WORK_HELPER(endio_write_helper);
79BTRFS_WORK_HELPER(freespace_write_helper);
80BTRFS_WORK_HELPER(delayed_meta_helper);
81BTRFS_WORK_HELPER(readahead_helper);
82BTRFS_WORK_HELPER(qgroup_rescan_helper);
83BTRFS_WORK_HELPER(extent_refs_helper);
84BTRFS_WORK_HELPER(scrub_helper);
85BTRFS_WORK_HELPER(scrubwrc_helper);
86BTRFS_WORK_HELPER(scrubnc_helper);
87
88static struct __btrfs_workqueue *
89__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
60 int thresh) 90 int thresh)
61{ 91{
62 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); 92 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -232,13 +262,11 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
232 spin_unlock_irqrestore(lock, flags); 262 spin_unlock_irqrestore(lock, flags);
233} 263}
234 264
235static void normal_work_helper(struct work_struct *arg) 265static void normal_work_helper(struct btrfs_work *work)
236{ 266{
237 struct btrfs_work *work;
238 struct __btrfs_workqueue *wq; 267 struct __btrfs_workqueue *wq;
239 int need_order = 0; 268 int need_order = 0;
240 269
241 work = container_of(arg, struct btrfs_work, normal_work);
242 /* 270 /*
243 * We should not touch things inside work in the following cases: 271 * We should not touch things inside work in the following cases:
244 * 1) after work->func() if it has no ordered_free 272 * 1) after work->func() if it has no ordered_free
@@ -262,7 +290,7 @@ static void normal_work_helper(struct work_struct *arg)
262 trace_btrfs_all_work_done(work); 290 trace_btrfs_all_work_done(work);
263} 291}
264 292
265void btrfs_init_work(struct btrfs_work *work, 293void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
266 btrfs_func_t func, 294 btrfs_func_t func,
267 btrfs_func_t ordered_func, 295 btrfs_func_t ordered_func,
268 btrfs_func_t ordered_free) 296 btrfs_func_t ordered_free)
@@ -270,7 +298,7 @@ void btrfs_init_work(struct btrfs_work *work,
270 work->func = func; 298 work->func = func;
271 work->ordered_func = ordered_func; 299 work->ordered_func = ordered_func;
272 work->ordered_free = ordered_free; 300 work->ordered_free = ordered_free;
273 INIT_WORK(&work->normal_work, normal_work_helper); 301 INIT_WORK(&work->normal_work, uniq_func);
274 INIT_LIST_HEAD(&work->ordered_list); 302 INIT_LIST_HEAD(&work->ordered_list);
275 work->flags = 0; 303 work->flags = 0;
276} 304}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 9c6b66d15fb0..e9e31c94758f 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -19,12 +19,14 @@
19 19
20#ifndef __BTRFS_ASYNC_THREAD_ 20#ifndef __BTRFS_ASYNC_THREAD_
21#define __BTRFS_ASYNC_THREAD_ 21#define __BTRFS_ASYNC_THREAD_
22#include <linux/workqueue.h>
22 23
23struct btrfs_workqueue; 24struct btrfs_workqueue;
24/* Internal use only */ 25/* Internal use only */
25struct __btrfs_workqueue; 26struct __btrfs_workqueue;
26struct btrfs_work; 27struct btrfs_work;
27typedef void (*btrfs_func_t)(struct btrfs_work *arg); 28typedef void (*btrfs_func_t)(struct btrfs_work *arg);
29typedef void (*btrfs_work_func_t)(struct work_struct *arg);
28 30
29struct btrfs_work { 31struct btrfs_work {
30 btrfs_func_t func; 32 btrfs_func_t func;
@@ -38,11 +40,35 @@ struct btrfs_work {
38 unsigned long flags; 40 unsigned long flags;
39}; 41};
40 42
43#define BTRFS_WORK_HELPER_PROTO(name) \
44void btrfs_##name(struct work_struct *arg)
45
46BTRFS_WORK_HELPER_PROTO(worker_helper);
47BTRFS_WORK_HELPER_PROTO(delalloc_helper);
48BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
49BTRFS_WORK_HELPER_PROTO(cache_helper);
50BTRFS_WORK_HELPER_PROTO(submit_helper);
51BTRFS_WORK_HELPER_PROTO(fixup_helper);
52BTRFS_WORK_HELPER_PROTO(endio_helper);
53BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
54BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
55BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
56BTRFS_WORK_HELPER_PROTO(rmw_helper);
57BTRFS_WORK_HELPER_PROTO(endio_write_helper);
58BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
59BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
60BTRFS_WORK_HELPER_PROTO(readahead_helper);
61BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
62BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
63BTRFS_WORK_HELPER_PROTO(scrub_helper);
64BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
65BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
66
41struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, 67struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
42 int flags, 68 int flags,
43 int max_active, 69 int max_active,
44 int thresh); 70 int thresh);
45void btrfs_init_work(struct btrfs_work *work, 71void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
46 btrfs_func_t func, 72 btrfs_func_t func,
47 btrfs_func_t ordered_func, 73 btrfs_func_t ordered_func,
48 btrfs_func_t ordered_free); 74 btrfs_func_t ordered_free);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 43527fd78825..56b8522d5767 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -234,8 +234,17 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
234 BTRFS_I(inode)->last_sub_trans <= 234 BTRFS_I(inode)->last_sub_trans <=
235 BTRFS_I(inode)->last_log_commit && 235 BTRFS_I(inode)->last_log_commit &&
236 BTRFS_I(inode)->last_sub_trans <= 236 BTRFS_I(inode)->last_sub_trans <=
237 BTRFS_I(inode)->root->last_log_commit) 237 BTRFS_I(inode)->root->last_log_commit) {
238 return 1; 238 /*
239 * After a ranged fsync we might have left some extent maps
240 * (that fall outside the fsync's range). So return false
241 * here if the list isn't empty, to make sure btrfs_log_inode()
242 * will be called and process those extent maps.
243 */
244 smp_mb();
245 if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents))
246 return 1;
247 }
239 return 0; 248 return 0;
240} 249}
241 250
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index da775bfdebc9..a2e90f855d7d 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1395 return -ENOMEM; 1395 return -ENOMEM;
1396 1396
1397 async_work->delayed_root = delayed_root; 1397 async_work->delayed_root = delayed_root;
1398 btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, 1398 btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1399 NULL, NULL); 1399 btrfs_async_run_delayed_root, NULL, NULL);
1400 async_work->nr = nr; 1400 async_work->nr = nr;
1401 1401
1402 btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); 1402 btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d0ed9e664f7d..a1d36e62179c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -39,7 +39,6 @@
39#include "btrfs_inode.h" 39#include "btrfs_inode.h"
40#include "volumes.h" 40#include "volumes.h"
41#include "print-tree.h" 41#include "print-tree.h"
42#include "async-thread.h"
43#include "locking.h" 42#include "locking.h"
44#include "tree-log.h" 43#include "tree-log.h"
45#include "free-space-cache.h" 44#include "free-space-cache.h"
@@ -693,35 +692,41 @@ static void end_workqueue_bio(struct bio *bio, int err)
693{ 692{
694 struct end_io_wq *end_io_wq = bio->bi_private; 693 struct end_io_wq *end_io_wq = bio->bi_private;
695 struct btrfs_fs_info *fs_info; 694 struct btrfs_fs_info *fs_info;
695 struct btrfs_workqueue *wq;
696 btrfs_work_func_t func;
696 697
697 fs_info = end_io_wq->info; 698 fs_info = end_io_wq->info;
698 end_io_wq->error = err; 699 end_io_wq->error = err;
699 btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
700 700
701 if (bio->bi_rw & REQ_WRITE) { 701 if (bio->bi_rw & REQ_WRITE) {
702 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) 702 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
703 btrfs_queue_work(fs_info->endio_meta_write_workers, 703 wq = fs_info->endio_meta_write_workers;
704 &end_io_wq->work); 704 func = btrfs_endio_meta_write_helper;
705 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) 705 } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
706 btrfs_queue_work(fs_info->endio_freespace_worker, 706 wq = fs_info->endio_freespace_worker;
707 &end_io_wq->work); 707 func = btrfs_freespace_write_helper;
708 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) 708 } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
709 btrfs_queue_work(fs_info->endio_raid56_workers, 709 wq = fs_info->endio_raid56_workers;
710 &end_io_wq->work); 710 func = btrfs_endio_raid56_helper;
711 else 711 } else {
712 btrfs_queue_work(fs_info->endio_write_workers, 712 wq = fs_info->endio_write_workers;
713 &end_io_wq->work); 713 func = btrfs_endio_write_helper;
714 }
714 } else { 715 } else {
715 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) 716 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
716 btrfs_queue_work(fs_info->endio_raid56_workers, 717 wq = fs_info->endio_raid56_workers;
717 &end_io_wq->work); 718 func = btrfs_endio_raid56_helper;
718 else if (end_io_wq->metadata) 719 } else if (end_io_wq->metadata) {
719 btrfs_queue_work(fs_info->endio_meta_workers, 720 wq = fs_info->endio_meta_workers;
720 &end_io_wq->work); 721 func = btrfs_endio_meta_helper;
721 else 722 } else {
722 btrfs_queue_work(fs_info->endio_workers, 723 wq = fs_info->endio_workers;
723 &end_io_wq->work); 724 func = btrfs_endio_helper;
725 }
724 } 726 }
727
728 btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
729 btrfs_queue_work(wq, &end_io_wq->work);
725} 730}
726 731
727/* 732/*
@@ -828,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
828 async->submit_bio_start = submit_bio_start; 833 async->submit_bio_start = submit_bio_start;
829 async->submit_bio_done = submit_bio_done; 834 async->submit_bio_done = submit_bio_done;
830 835
831 btrfs_init_work(&async->work, run_one_async_start, 836 btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
832 run_one_async_done, run_one_async_free); 837 run_one_async_done, run_one_async_free);
833 838
834 async->bio_flags = bio_flags; 839 async->bio_flags = bio_flags;
@@ -3450,7 +3455,8 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3450 btrfs_set_stack_device_generation(dev_item, 0); 3455 btrfs_set_stack_device_generation(dev_item, 0);
3451 btrfs_set_stack_device_type(dev_item, dev->type); 3456 btrfs_set_stack_device_type(dev_item, dev->type);
3452 btrfs_set_stack_device_id(dev_item, dev->devid); 3457 btrfs_set_stack_device_id(dev_item, dev->devid);
3453 btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); 3458 btrfs_set_stack_device_total_bytes(dev_item,
3459 dev->disk_total_bytes);
3454 btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); 3460 btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
3455 btrfs_set_stack_device_io_align(dev_item, dev->io_align); 3461 btrfs_set_stack_device_io_align(dev_item, dev->io_align);
3456 btrfs_set_stack_device_io_width(dev_item, dev->io_width); 3462 btrfs_set_stack_device_io_width(dev_item, dev->io_width);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 102ed3143976..3efe1c3877bf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
552 caching_ctl->block_group = cache; 552 caching_ctl->block_group = cache;
553 caching_ctl->progress = cache->key.objectid; 553 caching_ctl->progress = cache->key.objectid;
554 atomic_set(&caching_ctl->count, 1); 554 atomic_set(&caching_ctl->count, 1);
555 btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); 555 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
556 caching_thread, NULL, NULL);
556 557
557 spin_lock(&cache->lock); 558 spin_lock(&cache->lock);
558 /* 559 /*
@@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
2749 async->sync = 0; 2750 async->sync = 0;
2750 init_completion(&async->wait); 2751 init_completion(&async->wait);
2751 2752
2752 btrfs_init_work(&async->work, delayed_ref_async_start, 2753 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2753 NULL, NULL); 2754 delayed_ref_async_start, NULL, NULL);
2754 2755
2755 btrfs_queue_work(root->fs_info->extent_workers, &async->work); 2756 btrfs_queue_work(root->fs_info->extent_workers, &async->work);
2756 2757
@@ -3586,13 +3587,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3586 */ 3587 */
3587static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 3588static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3588{ 3589{
3589 /* 3590 u64 num_devices = root->fs_info->fs_devices->rw_devices;
3590 * we add in the count of missing devices because we want
3591 * to make sure that any RAID levels on a degraded FS
3592 * continue to be honored.
3593 */
3594 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3595 root->fs_info->fs_devices->missing_devices;
3596 u64 target; 3591 u64 target;
3597 u64 tmp; 3592 u64 tmp;
3598 3593
@@ -8440,13 +8435,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8440 if (stripped) 8435 if (stripped)
8441 return extended_to_chunk(stripped); 8436 return extended_to_chunk(stripped);
8442 8437
8443 /* 8438 num_devices = root->fs_info->fs_devices->rw_devices;
8444 * we add in the count of missing devices because we want
8445 * to make sure that any RAID levels on a degraded FS
8446 * continue to be honored.
8447 */
8448 num_devices = root->fs_info->fs_devices->rw_devices +
8449 root->fs_info->fs_devices->missing_devices;
8450 8439
8451 stripped = BTRFS_BLOCK_GROUP_RAID0 | 8440 stripped = BTRFS_BLOCK_GROUP_RAID0 |
8452 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | 8441 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e11aab9f391..af0359dcf337 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2532,6 +2532,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2532 test_bit(BIO_UPTODATE, &bio->bi_flags); 2532 test_bit(BIO_UPTODATE, &bio->bi_flags);
2533 if (err) 2533 if (err)
2534 uptodate = 0; 2534 uptodate = 0;
2535 offset += len;
2535 continue; 2536 continue;
2536 } 2537 }
2537 } 2538 }
@@ -4207,8 +4208,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4207 return -ENOMEM; 4208 return -ENOMEM;
4208 path->leave_spinning = 1; 4209 path->leave_spinning = 1;
4209 4210
4210 start = ALIGN(start, BTRFS_I(inode)->root->sectorsize); 4211 start = round_down(start, BTRFS_I(inode)->root->sectorsize);
4211 len = ALIGN(len, BTRFS_I(inode)->root->sectorsize); 4212 len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start;
4212 4213
4213 /* 4214 /*
4214 * lookup the last file extent. We're not using i_size here 4215 * lookup the last file extent. We're not using i_size here
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d3afac292d67..ff1cc0399b9a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1840,7 +1840,15 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
1840{ 1840{
1841 if (filp->private_data) 1841 if (filp->private_data)
1842 btrfs_ioctl_trans_end(filp); 1842 btrfs_ioctl_trans_end(filp);
1843 filemap_flush(inode->i_mapping); 1843 /*
1844 * ordered_data_close is set by settattr when we are about to truncate
1845 * a file from a non-zero size to a zero size. This tries to
1846 * flush down new bytes that may have been written if the
1847 * application were using truncate to replace a file in place.
1848 */
1849 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1850 &BTRFS_I(inode)->runtime_flags))
1851 filemap_flush(inode->i_mapping);
1844 return 0; 1852 return 0;
1845} 1853}
1846 1854
@@ -1958,7 +1966,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1958 1966
1959 btrfs_init_log_ctx(&ctx); 1967 btrfs_init_log_ctx(&ctx);
1960 1968
1961 ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); 1969 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
1962 if (ret < 0) { 1970 if (ret < 0) {
1963 /* Fallthrough and commit/free transaction. */ 1971 /* Fallthrough and commit/free transaction. */
1964 ret = 1; 1972 ret = 1;
@@ -2088,10 +2096,9 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
2088 goto out; 2096 goto out;
2089 } 2097 }
2090 2098
2091 if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { 2099 if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
2092 u64 num_bytes; 2100 u64 num_bytes;
2093 2101
2094 path->slots[0]++;
2095 key.offset = offset; 2102 key.offset = offset;
2096 btrfs_set_item_key_safe(root, path, &key); 2103 btrfs_set_item_key_safe(root, path, &key);
2097 fi = btrfs_item_ptr(leaf, path->slots[0], 2104 fi = btrfs_item_ptr(leaf, path->slots[0],
@@ -2216,7 +2223,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2216 goto out_only_mutex; 2223 goto out_only_mutex;
2217 } 2224 }
2218 2225
2219 lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize); 2226 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
2220 lockend = round_down(offset + len, 2227 lockend = round_down(offset + len,
2221 BTRFS_I(inode)->root->sectorsize) - 1; 2228 BTRFS_I(inode)->root->sectorsize) - 1;
2222 same_page = ((offset >> PAGE_CACHE_SHIFT) == 2229 same_page = ((offset >> PAGE_CACHE_SHIFT) ==
@@ -2277,7 +2284,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2277 tail_start + tail_len, 0, 1); 2284 tail_start + tail_len, 0, 1);
2278 if (ret) 2285 if (ret)
2279 goto out_only_mutex; 2286 goto out_only_mutex;
2280 } 2287 }
2281 } 2288 }
2282 } 2289 }
2283 2290
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 03708ef3deef..016c403bfe7e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -778,8 +778,12 @@ retry:
778 ins.offset, 778 ins.offset,
779 BTRFS_ORDERED_COMPRESSED, 779 BTRFS_ORDERED_COMPRESSED,
780 async_extent->compress_type); 780 async_extent->compress_type);
781 if (ret) 781 if (ret) {
782 btrfs_drop_extent_cache(inode, async_extent->start,
783 async_extent->start +
784 async_extent->ram_size - 1, 0);
782 goto out_free_reserve; 785 goto out_free_reserve;
786 }
783 787
784 /* 788 /*
785 * clear dirty, set writeback and unlock the pages. 789 * clear dirty, set writeback and unlock the pages.
@@ -971,14 +975,14 @@ static noinline int cow_file_range(struct inode *inode,
971 ret = btrfs_add_ordered_extent(inode, start, ins.objectid, 975 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
972 ram_size, cur_alloc_size, 0); 976 ram_size, cur_alloc_size, 0);
973 if (ret) 977 if (ret)
974 goto out_reserve; 978 goto out_drop_extent_cache;
975 979
976 if (root->root_key.objectid == 980 if (root->root_key.objectid ==
977 BTRFS_DATA_RELOC_TREE_OBJECTID) { 981 BTRFS_DATA_RELOC_TREE_OBJECTID) {
978 ret = btrfs_reloc_clone_csums(inode, start, 982 ret = btrfs_reloc_clone_csums(inode, start,
979 cur_alloc_size); 983 cur_alloc_size);
980 if (ret) 984 if (ret)
981 goto out_reserve; 985 goto out_drop_extent_cache;
982 } 986 }
983 987
984 if (disk_num_bytes < cur_alloc_size) 988 if (disk_num_bytes < cur_alloc_size)
@@ -1006,6 +1010,8 @@ static noinline int cow_file_range(struct inode *inode,
1006out: 1010out:
1007 return ret; 1011 return ret;
1008 1012
1013out_drop_extent_cache:
1014 btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
1009out_reserve: 1015out_reserve:
1010 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 1016 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
1011out_unlock: 1017out_unlock:
@@ -1096,8 +1102,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1096 async_cow->end = cur_end; 1102 async_cow->end = cur_end;
1097 INIT_LIST_HEAD(&async_cow->extents); 1103 INIT_LIST_HEAD(&async_cow->extents);
1098 1104
1099 btrfs_init_work(&async_cow->work, async_cow_start, 1105 btrfs_init_work(&async_cow->work,
1100 async_cow_submit, async_cow_free); 1106 btrfs_delalloc_helper,
1107 async_cow_start, async_cow_submit,
1108 async_cow_free);
1101 1109
1102 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> 1110 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
1103 PAGE_CACHE_SHIFT; 1111 PAGE_CACHE_SHIFT;
@@ -1881,7 +1889,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
1881 1889
1882 SetPageChecked(page); 1890 SetPageChecked(page);
1883 page_cache_get(page); 1891 page_cache_get(page);
1884 btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); 1892 btrfs_init_work(&fixup->work, btrfs_fixup_helper,
1893 btrfs_writepage_fixup_worker, NULL, NULL);
1885 fixup->page = page; 1894 fixup->page = page;
1886 btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); 1895 btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
1887 return -EBUSY; 1896 return -EBUSY;
@@ -2822,7 +2831,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2822 struct inode *inode = page->mapping->host; 2831 struct inode *inode = page->mapping->host;
2823 struct btrfs_root *root = BTRFS_I(inode)->root; 2832 struct btrfs_root *root = BTRFS_I(inode)->root;
2824 struct btrfs_ordered_extent *ordered_extent = NULL; 2833 struct btrfs_ordered_extent *ordered_extent = NULL;
2825 struct btrfs_workqueue *workers; 2834 struct btrfs_workqueue *wq;
2835 btrfs_work_func_t func;
2826 2836
2827 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); 2837 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
2828 2838
@@ -2831,13 +2841,17 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2831 end - start + 1, uptodate)) 2841 end - start + 1, uptodate))
2832 return 0; 2842 return 0;
2833 2843
2834 btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); 2844 if (btrfs_is_free_space_inode(inode)) {
2845 wq = root->fs_info->endio_freespace_worker;
2846 func = btrfs_freespace_write_helper;
2847 } else {
2848 wq = root->fs_info->endio_write_workers;
2849 func = btrfs_endio_write_helper;
2850 }
2835 2851
2836 if (btrfs_is_free_space_inode(inode)) 2852 btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
2837 workers = root->fs_info->endio_freespace_worker; 2853 NULL);
2838 else 2854 btrfs_queue_work(wq, &ordered_extent->work);
2839 workers = root->fs_info->endio_write_workers;
2840 btrfs_queue_work(workers, &ordered_extent->work);
2841 2855
2842 return 0; 2856 return 0;
2843} 2857}
@@ -4234,7 +4248,8 @@ out:
4234 btrfs_abort_transaction(trans, root, ret); 4248 btrfs_abort_transaction(trans, root, ret);
4235 } 4249 }
4236error: 4250error:
4237 if (last_size != (u64)-1) 4251 if (last_size != (u64)-1 &&
4252 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
4238 btrfs_ordered_update_i_size(inode, last_size, NULL); 4253 btrfs_ordered_update_i_size(inode, last_size, NULL);
4239 btrfs_free_path(path); 4254 btrfs_free_path(path);
4240 return err; 4255 return err;
@@ -4674,6 +4689,11 @@ static void evict_inode_truncate_pages(struct inode *inode)
4674 clear_bit(EXTENT_FLAG_LOGGING, &em->flags); 4689 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
4675 remove_extent_mapping(map_tree, em); 4690 remove_extent_mapping(map_tree, em);
4676 free_extent_map(em); 4691 free_extent_map(em);
4692 if (need_resched()) {
4693 write_unlock(&map_tree->lock);
4694 cond_resched();
4695 write_lock(&map_tree->lock);
4696 }
4677 } 4697 }
4678 write_unlock(&map_tree->lock); 4698 write_unlock(&map_tree->lock);
4679 4699
@@ -4696,6 +4716,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
4696 &cached_state, GFP_NOFS); 4716 &cached_state, GFP_NOFS);
4697 free_extent_state(state); 4717 free_extent_state(state);
4698 4718
4719 cond_resched();
4699 spin_lock(&io_tree->lock); 4720 spin_lock(&io_tree->lock);
4700 } 4721 }
4701 spin_unlock(&io_tree->lock); 4722 spin_unlock(&io_tree->lock);
@@ -5181,6 +5202,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5181 iput(inode); 5202 iput(inode);
5182 inode = ERR_PTR(ret); 5203 inode = ERR_PTR(ret);
5183 } 5204 }
5205 /*
5206 * If orphan cleanup did remove any orphans, it means the tree
5207 * was modified and therefore the commit root is not the same as
5208 * the current root anymore. This is a problem, because send
5209 * uses the commit root and therefore can see inode items that
5210 * don't exist in the current root anymore, and for example make
5211 * calls to btrfs_iget, which will do tree lookups based on the
5212 * current root and not on the commit root. Those lookups will
5213 * fail, returning a -ESTALE error, and making send fail with
5214 * that error. So make sure a send does not see any orphans we
5215 * have just removed, and that it will see the same inodes
5216 * regardless of whether a transaction commit happened before
5217 * it started (meaning that the commit root will be the same as
5218 * the current root) or not.
5219 */
5220 if (sub_root->node != sub_root->commit_root) {
5221 u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
5222
5223 if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
5224 struct extent_buffer *eb;
5225
5226 /*
5227 * Assert we can't have races between dentry
5228 * lookup called through the snapshot creation
5229 * ioctl and the VFS.
5230 */
5231 ASSERT(mutex_is_locked(&dir->i_mutex));
5232
5233 down_write(&root->fs_info->commit_root_sem);
5234 eb = sub_root->commit_root;
5235 sub_root->commit_root =
5236 btrfs_root_node(sub_root);
5237 up_write(&root->fs_info->commit_root_sem);
5238 free_extent_buffer(eb);
5239 }
5240 }
5184 } 5241 }
5185 5242
5186 return inode; 5243 return inode;
@@ -5577,6 +5634,17 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
5577 return ret; 5634 return ret;
5578} 5635}
5579 5636
5637static int btrfs_insert_inode_locked(struct inode *inode)
5638{
5639 struct btrfs_iget_args args;
5640 args.location = &BTRFS_I(inode)->location;
5641 args.root = BTRFS_I(inode)->root;
5642
5643 return insert_inode_locked4(inode,
5644 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
5645 btrfs_find_actor, &args);
5646}
5647
5580static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, 5648static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5581 struct btrfs_root *root, 5649 struct btrfs_root *root,
5582 struct inode *dir, 5650 struct inode *dir,
@@ -5606,6 +5674,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5606 } 5674 }
5607 5675
5608 /* 5676 /*
5677 * O_TMPFILE, set link count to 0, so that after this point,
5678 * we fill in an inode item with the correct link count.
5679 */
5680 if (!name)
5681 set_nlink(inode, 0);
5682
5683 /*
5609 * we have to initialize this early, so we can reclaim the inode 5684 * we have to initialize this early, so we can reclaim the inode
5610 * number if we fail afterwards in this function. 5685 * number if we fail afterwards in this function.
5611 */ 5686 */
@@ -5662,10 +5737,19 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5662 sizes[1] = name_len + sizeof(*ref); 5737 sizes[1] = name_len + sizeof(*ref);
5663 } 5738 }
5664 5739
5740 location = &BTRFS_I(inode)->location;
5741 location->objectid = objectid;
5742 location->offset = 0;
5743 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
5744
5745 ret = btrfs_insert_inode_locked(inode);
5746 if (ret < 0)
5747 goto fail;
5748
5665 path->leave_spinning = 1; 5749 path->leave_spinning = 1;
5666 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); 5750 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
5667 if (ret != 0) 5751 if (ret != 0)
5668 goto fail; 5752 goto fail_unlock;
5669 5753
5670 inode_init_owner(inode, dir, mode); 5754 inode_init_owner(inode, dir, mode);
5671 inode_set_bytes(inode, 0); 5755 inode_set_bytes(inode, 0);
@@ -5688,11 +5772,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5688 btrfs_mark_buffer_dirty(path->nodes[0]); 5772 btrfs_mark_buffer_dirty(path->nodes[0]);
5689 btrfs_free_path(path); 5773 btrfs_free_path(path);
5690 5774
5691 location = &BTRFS_I(inode)->location;
5692 location->objectid = objectid;
5693 location->offset = 0;
5694 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
5695
5696 btrfs_inherit_iflags(inode, dir); 5775 btrfs_inherit_iflags(inode, dir);
5697 5776
5698 if (S_ISREG(mode)) { 5777 if (S_ISREG(mode)) {
@@ -5703,7 +5782,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5703 BTRFS_INODE_NODATASUM; 5782 BTRFS_INODE_NODATASUM;
5704 } 5783 }
5705 5784
5706 btrfs_insert_inode_hash(inode);
5707 inode_tree_add(inode); 5785 inode_tree_add(inode);
5708 5786
5709 trace_btrfs_inode_new(inode); 5787 trace_btrfs_inode_new(inode);
@@ -5718,6 +5796,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5718 btrfs_ino(inode), root->root_key.objectid, ret); 5796 btrfs_ino(inode), root->root_key.objectid, ret);
5719 5797
5720 return inode; 5798 return inode;
5799
5800fail_unlock:
5801 unlock_new_inode(inode);
5721fail: 5802fail:
5722 if (dir && name) 5803 if (dir && name)
5723 BTRFS_I(dir)->index_cnt--; 5804 BTRFS_I(dir)->index_cnt--;
@@ -5852,28 +5933,28 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
5852 goto out_unlock; 5933 goto out_unlock;
5853 } 5934 }
5854 5935
5855 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
5856 if (err) {
5857 drop_inode = 1;
5858 goto out_unlock;
5859 }
5860
5861 /* 5936 /*
5862 * If the active LSM wants to access the inode during 5937 * If the active LSM wants to access the inode during
5863 * d_instantiate it needs these. Smack checks to see 5938 * d_instantiate it needs these. Smack checks to see
5864 * if the filesystem supports xattrs by looking at the 5939 * if the filesystem supports xattrs by looking at the
5865 * ops vector. 5940 * ops vector.
5866 */ 5941 */
5867
5868 inode->i_op = &btrfs_special_inode_operations; 5942 inode->i_op = &btrfs_special_inode_operations;
5869 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 5943 init_special_inode(inode, inode->i_mode, rdev);
5944
5945 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
5870 if (err) 5946 if (err)
5871 drop_inode = 1; 5947 goto out_unlock_inode;
5872 else { 5948
5873 init_special_inode(inode, inode->i_mode, rdev); 5949 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
5950 if (err) {
5951 goto out_unlock_inode;
5952 } else {
5874 btrfs_update_inode(trans, root, inode); 5953 btrfs_update_inode(trans, root, inode);
5954 unlock_new_inode(inode);
5875 d_instantiate(dentry, inode); 5955 d_instantiate(dentry, inode);
5876 } 5956 }
5957
5877out_unlock: 5958out_unlock:
5878 btrfs_end_transaction(trans, root); 5959 btrfs_end_transaction(trans, root);
5879 btrfs_balance_delayed_items(root); 5960 btrfs_balance_delayed_items(root);
@@ -5883,6 +5964,12 @@ out_unlock:
5883 iput(inode); 5964 iput(inode);
5884 } 5965 }
5885 return err; 5966 return err;
5967
5968out_unlock_inode:
5969 drop_inode = 1;
5970 unlock_new_inode(inode);
5971 goto out_unlock;
5972
5886} 5973}
5887 5974
5888static int btrfs_create(struct inode *dir, struct dentry *dentry, 5975static int btrfs_create(struct inode *dir, struct dentry *dentry,
@@ -5917,15 +6004,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
5917 goto out_unlock; 6004 goto out_unlock;
5918 } 6005 }
5919 drop_inode_on_err = 1; 6006 drop_inode_on_err = 1;
5920
5921 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
5922 if (err)
5923 goto out_unlock;
5924
5925 err = btrfs_update_inode(trans, root, inode);
5926 if (err)
5927 goto out_unlock;
5928
5929 /* 6007 /*
5930 * If the active LSM wants to access the inode during 6008 * If the active LSM wants to access the inode during
5931 * d_instantiate it needs these. Smack checks to see 6009 * d_instantiate it needs these. Smack checks to see
@@ -5934,14 +6012,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
5934 */ 6012 */
5935 inode->i_fop = &btrfs_file_operations; 6013 inode->i_fop = &btrfs_file_operations;
5936 inode->i_op = &btrfs_file_inode_operations; 6014 inode->i_op = &btrfs_file_inode_operations;
6015 inode->i_mapping->a_ops = &btrfs_aops;
6016 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
6017
6018 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6019 if (err)
6020 goto out_unlock_inode;
6021
6022 err = btrfs_update_inode(trans, root, inode);
6023 if (err)
6024 goto out_unlock_inode;
5937 6025
5938 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 6026 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
5939 if (err) 6027 if (err)
5940 goto out_unlock; 6028 goto out_unlock_inode;
5941 6029
5942 inode->i_mapping->a_ops = &btrfs_aops;
5943 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
5944 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 6030 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
6031 unlock_new_inode(inode);
5945 d_instantiate(dentry, inode); 6032 d_instantiate(dentry, inode);
5946 6033
5947out_unlock: 6034out_unlock:
@@ -5953,6 +6040,11 @@ out_unlock:
5953 btrfs_balance_delayed_items(root); 6040 btrfs_balance_delayed_items(root);
5954 btrfs_btree_balance_dirty(root); 6041 btrfs_btree_balance_dirty(root);
5955 return err; 6042 return err;
6043
6044out_unlock_inode:
6045 unlock_new_inode(inode);
6046 goto out_unlock;
6047
5956} 6048}
5957 6049
5958static int btrfs_link(struct dentry *old_dentry, struct inode *dir, 6050static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
@@ -6060,25 +6152,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
6060 } 6152 }
6061 6153
6062 drop_on_err = 1; 6154 drop_on_err = 1;
6155 /* these must be set before we unlock the inode */
6156 inode->i_op = &btrfs_dir_inode_operations;
6157 inode->i_fop = &btrfs_dir_file_operations;
6063 6158
6064 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); 6159 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6065 if (err) 6160 if (err)
6066 goto out_fail; 6161 goto out_fail_inode;
6067
6068 inode->i_op = &btrfs_dir_inode_operations;
6069 inode->i_fop = &btrfs_dir_file_operations;
6070 6162
6071 btrfs_i_size_write(inode, 0); 6163 btrfs_i_size_write(inode, 0);
6072 err = btrfs_update_inode(trans, root, inode); 6164 err = btrfs_update_inode(trans, root, inode);
6073 if (err) 6165 if (err)
6074 goto out_fail; 6166 goto out_fail_inode;
6075 6167
6076 err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, 6168 err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
6077 dentry->d_name.len, 0, index); 6169 dentry->d_name.len, 0, index);
6078 if (err) 6170 if (err)
6079 goto out_fail; 6171 goto out_fail_inode;
6080 6172
6081 d_instantiate(dentry, inode); 6173 d_instantiate(dentry, inode);
6174 /*
6175 * mkdir is special. We're unlocking after we call d_instantiate
6176 * to avoid a race with nfsd calling d_instantiate.
6177 */
6178 unlock_new_inode(inode);
6082 drop_on_err = 0; 6179 drop_on_err = 0;
6083 6180
6084out_fail: 6181out_fail:
@@ -6088,6 +6185,10 @@ out_fail:
6088 btrfs_balance_delayed_items(root); 6185 btrfs_balance_delayed_items(root);
6089 btrfs_btree_balance_dirty(root); 6186 btrfs_btree_balance_dirty(root);
6090 return err; 6187 return err;
6188
6189out_fail_inode:
6190 unlock_new_inode(inode);
6191 goto out_fail;
6091} 6192}
6092 6193
6093/* helper for btfs_get_extent. Given an existing extent in the tree, 6194/* helper for btfs_get_extent. Given an existing extent in the tree,
@@ -6097,14 +6198,14 @@ out_fail:
6097static int merge_extent_mapping(struct extent_map_tree *em_tree, 6198static int merge_extent_mapping(struct extent_map_tree *em_tree,
6098 struct extent_map *existing, 6199 struct extent_map *existing,
6099 struct extent_map *em, 6200 struct extent_map *em,
6100 u64 map_start, u64 map_len) 6201 u64 map_start)
6101{ 6202{
6102 u64 start_diff; 6203 u64 start_diff;
6103 6204
6104 BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); 6205 BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
6105 start_diff = map_start - em->start; 6206 start_diff = map_start - em->start;
6106 em->start = map_start; 6207 em->start = map_start;
6107 em->len = map_len; 6208 em->len = existing->start - em->start;
6108 if (em->block_start < EXTENT_MAP_LAST_BYTE && 6209 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
6109 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 6210 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
6110 em->block_start += start_diff; 6211 em->block_start += start_diff;
@@ -6275,6 +6376,8 @@ next:
6275 goto not_found; 6376 goto not_found;
6276 if (start + len <= found_key.offset) 6377 if (start + len <= found_key.offset)
6277 goto not_found; 6378 goto not_found;
6379 if (start > found_key.offset)
6380 goto next;
6278 em->start = start; 6381 em->start = start;
6279 em->orig_start = start; 6382 em->orig_start = start;
6280 em->len = found_key.offset - start; 6383 em->len = found_key.offset - start;
@@ -6390,8 +6493,7 @@ insert:
6390 em->len); 6493 em->len);
6391 if (existing) { 6494 if (existing) {
6392 err = merge_extent_mapping(em_tree, existing, 6495 err = merge_extent_mapping(em_tree, existing,
6393 em, start, 6496 em, start);
6394 root->sectorsize);
6395 free_extent_map(existing); 6497 free_extent_map(existing);
6396 if (err) { 6498 if (err) {
6397 free_extent_map(em); 6499 free_extent_map(em);
@@ -7158,7 +7260,8 @@ again:
7158 if (!ret) 7260 if (!ret)
7159 goto out_test; 7261 goto out_test;
7160 7262
7161 btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); 7263 btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
7264 finish_ordered_fn, NULL, NULL);
7162 btrfs_queue_work(root->fs_info->endio_write_workers, 7265 btrfs_queue_work(root->fs_info->endio_write_workers,
7163 &ordered->work); 7266 &ordered->work);
7164out_test: 7267out_test:
@@ -7306,10 +7409,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7306 map_length = orig_bio->bi_iter.bi_size; 7409 map_length = orig_bio->bi_iter.bi_size;
7307 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, 7410 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
7308 &map_length, NULL, 0); 7411 &map_length, NULL, 0);
7309 if (ret) { 7412 if (ret)
7310 bio_put(orig_bio);
7311 return -EIO; 7413 return -EIO;
7312 }
7313 7414
7314 if (map_length >= orig_bio->bi_iter.bi_size) { 7415 if (map_length >= orig_bio->bi_iter.bi_size) {
7315 bio = orig_bio; 7416 bio = orig_bio;
@@ -7326,6 +7427,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7326 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); 7427 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
7327 if (!bio) 7428 if (!bio)
7328 return -ENOMEM; 7429 return -ENOMEM;
7430
7329 bio->bi_private = dip; 7431 bio->bi_private = dip;
7330 bio->bi_end_io = btrfs_end_dio_bio; 7432 bio->bi_end_io = btrfs_end_dio_bio;
7331 atomic_inc(&dip->pending_bios); 7433 atomic_inc(&dip->pending_bios);
@@ -7534,7 +7636,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
7534 count = iov_iter_count(iter); 7636 count = iov_iter_count(iter);
7535 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 7637 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
7536 &BTRFS_I(inode)->runtime_flags)) 7638 &BTRFS_I(inode)->runtime_flags))
7537 filemap_fdatawrite_range(inode->i_mapping, offset, count); 7639 filemap_fdatawrite_range(inode->i_mapping, offset,
7640 offset + count - 1);
7538 7641
7539 if (rw & WRITE) { 7642 if (rw & WRITE) {
7540 /* 7643 /*
@@ -8041,6 +8144,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
8041 8144
8042 set_nlink(inode, 1); 8145 set_nlink(inode, 1);
8043 btrfs_i_size_write(inode, 0); 8146 btrfs_i_size_write(inode, 0);
8147 unlock_new_inode(inode);
8044 8148
8045 err = btrfs_subvol_inherit_props(trans, new_root, parent_root); 8149 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
8046 if (err) 8150 if (err)
@@ -8495,7 +8599,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
8495 work->inode = inode; 8599 work->inode = inode;
8496 work->wait = wait; 8600 work->wait = wait;
8497 work->delay_iput = delay_iput; 8601 work->delay_iput = delay_iput;
8498 btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); 8602 WARN_ON_ONCE(!inode);
8603 btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
8604 btrfs_run_delalloc_work, NULL, NULL);
8499 8605
8500 return work; 8606 return work;
8501} 8607}
@@ -8699,12 +8805,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
8699 goto out_unlock; 8805 goto out_unlock;
8700 } 8806 }
8701 8807
8702 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
8703 if (err) {
8704 drop_inode = 1;
8705 goto out_unlock;
8706 }
8707
8708 /* 8808 /*
8709 * If the active LSM wants to access the inode during 8809 * If the active LSM wants to access the inode during
8710 * d_instantiate it needs these. Smack checks to see 8810 * d_instantiate it needs these. Smack checks to see
@@ -8713,23 +8813,22 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
8713 */ 8813 */
8714 inode->i_fop = &btrfs_file_operations; 8814 inode->i_fop = &btrfs_file_operations;
8715 inode->i_op = &btrfs_file_inode_operations; 8815 inode->i_op = &btrfs_file_inode_operations;
8816 inode->i_mapping->a_ops = &btrfs_aops;
8817 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
8818 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
8819
8820 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
8821 if (err)
8822 goto out_unlock_inode;
8716 8823
8717 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 8824 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
8718 if (err) 8825 if (err)
8719 drop_inode = 1; 8826 goto out_unlock_inode;
8720 else {
8721 inode->i_mapping->a_ops = &btrfs_aops;
8722 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
8723 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
8724 }
8725 if (drop_inode)
8726 goto out_unlock;
8727 8827
8728 path = btrfs_alloc_path(); 8828 path = btrfs_alloc_path();
8729 if (!path) { 8829 if (!path) {
8730 err = -ENOMEM; 8830 err = -ENOMEM;
8731 drop_inode = 1; 8831 goto out_unlock_inode;
8732 goto out_unlock;
8733 } 8832 }
8734 key.objectid = btrfs_ino(inode); 8833 key.objectid = btrfs_ino(inode);
8735 key.offset = 0; 8834 key.offset = 0;
@@ -8738,9 +8837,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
8738 err = btrfs_insert_empty_item(trans, root, path, &key, 8837 err = btrfs_insert_empty_item(trans, root, path, &key,
8739 datasize); 8838 datasize);
8740 if (err) { 8839 if (err) {
8741 drop_inode = 1;
8742 btrfs_free_path(path); 8840 btrfs_free_path(path);
8743 goto out_unlock; 8841 goto out_unlock_inode;
8744 } 8842 }
8745 leaf = path->nodes[0]; 8843 leaf = path->nodes[0];
8746 ei = btrfs_item_ptr(leaf, path->slots[0], 8844 ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -8764,12 +8862,15 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
8764 inode_set_bytes(inode, name_len); 8862 inode_set_bytes(inode, name_len);
8765 btrfs_i_size_write(inode, name_len); 8863 btrfs_i_size_write(inode, name_len);
8766 err = btrfs_update_inode(trans, root, inode); 8864 err = btrfs_update_inode(trans, root, inode);
8767 if (err) 8865 if (err) {
8768 drop_inode = 1; 8866 drop_inode = 1;
8867 goto out_unlock_inode;
8868 }
8869
8870 unlock_new_inode(inode);
8871 d_instantiate(dentry, inode);
8769 8872
8770out_unlock: 8873out_unlock:
8771 if (!err)
8772 d_instantiate(dentry, inode);
8773 btrfs_end_transaction(trans, root); 8874 btrfs_end_transaction(trans, root);
8774 if (drop_inode) { 8875 if (drop_inode) {
8775 inode_dec_link_count(inode); 8876 inode_dec_link_count(inode);
@@ -8777,6 +8878,11 @@ out_unlock:
8777 } 8878 }
8778 btrfs_btree_balance_dirty(root); 8879 btrfs_btree_balance_dirty(root);
8779 return err; 8880 return err;
8881
8882out_unlock_inode:
8883 drop_inode = 1;
8884 unlock_new_inode(inode);
8885 goto out_unlock;
8780} 8886}
8781 8887
8782static int __btrfs_prealloc_file_range(struct inode *inode, int mode, 8888static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -8960,14 +9066,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
8960 goto out; 9066 goto out;
8961 } 9067 }
8962 9068
8963 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
8964 if (ret)
8965 goto out;
8966
8967 ret = btrfs_update_inode(trans, root, inode);
8968 if (ret)
8969 goto out;
8970
8971 inode->i_fop = &btrfs_file_operations; 9069 inode->i_fop = &btrfs_file_operations;
8972 inode->i_op = &btrfs_file_inode_operations; 9070 inode->i_op = &btrfs_file_inode_operations;
8973 9071
@@ -8975,10 +9073,26 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
8975 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 9073 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
8976 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 9074 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
8977 9075
9076 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
9077 if (ret)
9078 goto out_inode;
9079
9080 ret = btrfs_update_inode(trans, root, inode);
9081 if (ret)
9082 goto out_inode;
8978 ret = btrfs_orphan_add(trans, inode); 9083 ret = btrfs_orphan_add(trans, inode);
8979 if (ret) 9084 if (ret)
8980 goto out; 9085 goto out_inode;
8981 9086
9087 /*
9088 * We set number of links to 0 in btrfs_new_inode(), and here we set
9089 * it to 1 because d_tmpfile() will issue a warning if the count is 0,
9090 * through:
9091 *
9092 * d_tmpfile() -> inode_dec_link_count() -> drop_nlink()
9093 */
9094 set_nlink(inode, 1);
9095 unlock_new_inode(inode);
8982 d_tmpfile(dentry, inode); 9096 d_tmpfile(dentry, inode);
8983 mark_inode_dirty(inode); 9097 mark_inode_dirty(inode);
8984 9098
@@ -8988,8 +9102,12 @@ out:
8988 iput(inode); 9102 iput(inode);
8989 btrfs_balance_delayed_items(root); 9103 btrfs_balance_delayed_items(root);
8990 btrfs_btree_balance_dirty(root); 9104 btrfs_btree_balance_dirty(root);
8991
8992 return ret; 9105 return ret;
9106
9107out_inode:
9108 unlock_new_inode(inode);
9109 goto out;
9110
8993} 9111}
8994 9112
8995static const struct inode_operations btrfs_dir_inode_operations = { 9113static const struct inode_operations btrfs_dir_inode_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 47aceb494d1d..8a8e29878c34 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
711 if (ret) 711 if (ret)
712 goto fail; 712 goto fail;
713 713
714 ret = btrfs_orphan_cleanup(pending_snapshot->snap);
715 if (ret)
716 goto fail;
717
718 /*
719 * If orphan cleanup did remove any orphans, it means the tree was
720 * modified and therefore the commit root is not the same as the
721 * current root anymore. This is a problem, because send uses the
722 * commit root and therefore can see inode items that don't exist
723 * in the current root anymore, and for example make calls to
724 * btrfs_iget, which will do tree lookups based on the current root
725 * and not on the commit root. Those lookups will fail, returning a
726 * -ESTALE error, and making send fail with that error. So make sure
727 * a send does not see any orphans we have just removed, and that it
728 * will see the same inodes regardless of whether a transaction
729 * commit happened before it started (meaning that the commit root
730 * will be the same as the current root) or not.
731 */
732 if (readonly && pending_snapshot->snap->node !=
733 pending_snapshot->snap->commit_root) {
734 trans = btrfs_join_transaction(pending_snapshot->snap);
735 if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) {
736 ret = PTR_ERR(trans);
737 goto fail;
738 }
739 if (!IS_ERR(trans)) {
740 ret = btrfs_commit_transaction(trans,
741 pending_snapshot->snap);
742 if (ret)
743 goto fail;
744 }
745 }
746
747 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); 714 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
748 if (IS_ERR(inode)) { 715 if (IS_ERR(inode)) {
749 ret = PTR_ERR(inode); 716 ret = PTR_ERR(inode);
@@ -1052,8 +1019,10 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
1052 return false; 1019 return false;
1053 1020
1054 next = defrag_lookup_extent(inode, em->start + em->len); 1021 next = defrag_lookup_extent(inode, em->start + em->len);
1055 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || 1022 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
1056 (em->block_start + em->block_len == next->block_start)) 1023 ret = false;
1024 else if ((em->block_start + em->block_len == next->block_start) &&
1025 (em->block_len > 128 * 1024 && next->block_len > 128 * 1024))
1057 ret = false; 1026 ret = false;
1058 1027
1059 free_extent_map(next); 1028 free_extent_map(next);
@@ -1088,7 +1057,6 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh,
1088 } 1057 }
1089 1058
1090 next_mergeable = defrag_check_next_extent(inode, em); 1059 next_mergeable = defrag_check_next_extent(inode, em);
1091
1092 /* 1060 /*
1093 * we hit a real extent, if it is big or the next extent is not a 1061 * we hit a real extent, if it is big or the next extent is not a
1094 * real extent, don't bother defragging it 1062 * real extent, don't bother defragging it
@@ -1735,7 +1703,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1735 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | 1703 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY |
1736 BTRFS_SUBVOL_QGROUP_INHERIT)) { 1704 BTRFS_SUBVOL_QGROUP_INHERIT)) {
1737 ret = -EOPNOTSUPP; 1705 ret = -EOPNOTSUPP;
1738 goto out; 1706 goto free_args;
1739 } 1707 }
1740 1708
1741 if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) 1709 if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
@@ -1745,27 +1713,31 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1745 if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { 1713 if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
1746 if (vol_args->size > PAGE_CACHE_SIZE) { 1714 if (vol_args->size > PAGE_CACHE_SIZE) {
1747 ret = -EINVAL; 1715 ret = -EINVAL;
1748 goto out; 1716 goto free_args;
1749 } 1717 }
1750 inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); 1718 inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
1751 if (IS_ERR(inherit)) { 1719 if (IS_ERR(inherit)) {
1752 ret = PTR_ERR(inherit); 1720 ret = PTR_ERR(inherit);
1753 goto out; 1721 goto free_args;
1754 } 1722 }
1755 } 1723 }
1756 1724
1757 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1725 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1758 vol_args->fd, subvol, ptr, 1726 vol_args->fd, subvol, ptr,
1759 readonly, inherit); 1727 readonly, inherit);
1728 if (ret)
1729 goto free_inherit;
1760 1730
1761 if (ret == 0 && ptr && 1731 if (ptr && copy_to_user(arg +
1762 copy_to_user(arg + 1732 offsetof(struct btrfs_ioctl_vol_args_v2,
1763 offsetof(struct btrfs_ioctl_vol_args_v2, 1733 transid),
1764 transid), ptr, sizeof(*ptr))) 1734 ptr, sizeof(*ptr)))
1765 ret = -EFAULT; 1735 ret = -EFAULT;
1766out: 1736
1767 kfree(vol_args); 1737free_inherit:
1768 kfree(inherit); 1738 kfree(inherit);
1739free_args:
1740 kfree(vol_args);
1769 return ret; 1741 return ret;
1770} 1742}
1771 1743
@@ -2685,7 +2657,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
2685 vol_args = memdup_user(arg, sizeof(*vol_args)); 2657 vol_args = memdup_user(arg, sizeof(*vol_args));
2686 if (IS_ERR(vol_args)) { 2658 if (IS_ERR(vol_args)) {
2687 ret = PTR_ERR(vol_args); 2659 ret = PTR_ERR(vol_args);
2688 goto out; 2660 goto err_drop;
2689 } 2661 }
2690 2662
2691 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2663 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
@@ -2703,6 +2675,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
2703 2675
2704out: 2676out:
2705 kfree(vol_args); 2677 kfree(vol_args);
2678err_drop:
2706 mnt_drop_write_file(file); 2679 mnt_drop_write_file(file);
2707 return ret; 2680 return ret;
2708} 2681}
@@ -3527,7 +3500,8 @@ process_slot:
3527 btrfs_mark_buffer_dirty(leaf); 3500 btrfs_mark_buffer_dirty(leaf);
3528 btrfs_release_path(path); 3501 btrfs_release_path(path);
3529 3502
3530 last_dest_end = new_key.offset + datal; 3503 last_dest_end = ALIGN(new_key.offset + datal,
3504 root->sectorsize);
3531 ret = clone_finish_inode_update(trans, inode, 3505 ret = clone_finish_inode_update(trans, inode,
3532 last_dest_end, 3506 last_dest_end,
3533 destoff, olen); 3507 destoff, olen);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 963895c1f801..ac734ec4cc20 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -615,6 +615,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
615 spin_unlock(&root->ordered_extent_lock); 615 spin_unlock(&root->ordered_extent_lock);
616 616
617 btrfs_init_work(&ordered->flush_work, 617 btrfs_init_work(&ordered->flush_work,
618 btrfs_flush_delalloc_helper,
618 btrfs_run_ordered_extent_work, NULL, NULL); 619 btrfs_run_ordered_extent_work, NULL, NULL);
619 list_add_tail(&ordered->work_list, &works); 620 list_add_tail(&ordered->work_list, &works);
620 btrfs_queue_work(root->fs_info->flush_workers, 621 btrfs_queue_work(root->fs_info->flush_workers,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b497498484be..ded5c601d916 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1973,7 +1973,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
1973 elem.seq, &roots); 1973 elem.seq, &roots);
1974 btrfs_put_tree_mod_seq(fs_info, &elem); 1974 btrfs_put_tree_mod_seq(fs_info, &elem);
1975 if (ret < 0) 1975 if (ret < 0)
1976 return ret; 1976 goto out;
1977 1977
1978 if (roots->nnodes != 1) 1978 if (roots->nnodes != 1)
1979 goto out; 1979 goto out;
@@ -2720,6 +2720,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2720 memset(&fs_info->qgroup_rescan_work, 0, 2720 memset(&fs_info->qgroup_rescan_work, 0,
2721 sizeof(fs_info->qgroup_rescan_work)); 2721 sizeof(fs_info->qgroup_rescan_work));
2722 btrfs_init_work(&fs_info->qgroup_rescan_work, 2722 btrfs_init_work(&fs_info->qgroup_rescan_work,
2723 btrfs_qgroup_rescan_helper,
2723 btrfs_qgroup_rescan_worker, NULL, NULL); 2724 btrfs_qgroup_rescan_worker, NULL, NULL);
2724 2725
2725 if (ret) { 2726 if (ret) {
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 4a88f073fdd7..0a6b6e4bcbb9 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1416,7 +1416,8 @@ cleanup:
1416 1416
1417static void async_rmw_stripe(struct btrfs_raid_bio *rbio) 1417static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
1418{ 1418{
1419 btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); 1419 btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1420 rmw_work, NULL, NULL);
1420 1421
1421 btrfs_queue_work(rbio->fs_info->rmw_workers, 1422 btrfs_queue_work(rbio->fs_info->rmw_workers,
1422 &rbio->work); 1423 &rbio->work);
@@ -1424,7 +1425,8 @@ static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
1424 1425
1425static void async_read_rebuild(struct btrfs_raid_bio *rbio) 1426static void async_read_rebuild(struct btrfs_raid_bio *rbio)
1426{ 1427{
1427 btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); 1428 btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1429 read_rebuild_work, NULL, NULL);
1428 1430
1429 btrfs_queue_work(rbio->fs_info->rmw_workers, 1431 btrfs_queue_work(rbio->fs_info->rmw_workers,
1430 &rbio->work); 1432 &rbio->work);
@@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
1665 plug = container_of(cb, struct btrfs_plug_cb, cb); 1667 plug = container_of(cb, struct btrfs_plug_cb, cb);
1666 1668
1667 if (from_schedule) { 1669 if (from_schedule) {
1668 btrfs_init_work(&plug->work, unplug_work, NULL, NULL); 1670 btrfs_init_work(&plug->work, btrfs_rmw_helper,
1671 unplug_work, NULL, NULL);
1669 btrfs_queue_work(plug->info->rmw_workers, 1672 btrfs_queue_work(plug->info->rmw_workers,
1670 &plug->work); 1673 &plug->work);
1671 return; 1674 return;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 09230cf3a244..20408c6b665a 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -798,7 +798,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
798 /* FIXME we cannot handle this properly right now */ 798 /* FIXME we cannot handle this properly right now */
799 BUG(); 799 BUG();
800 } 800 }
801 btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); 801 btrfs_init_work(&rmw->work, btrfs_readahead_helper,
802 reada_start_machine_worker, NULL, NULL);
802 rmw->fs_info = fs_info; 803 rmw->fs_info = fs_info;
803 804
804 btrfs_queue_work(fs_info->readahead_workers, &rmw->work); 805 btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b6d198f5181e..f4a41f37be22 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -428,8 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
428 sbio->index = i; 428 sbio->index = i;
429 sbio->sctx = sctx; 429 sbio->sctx = sctx;
430 sbio->page_count = 0; 430 sbio->page_count = 0;
431 btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, 431 btrfs_init_work(&sbio->work, btrfs_scrub_helper,
432 NULL, NULL); 432 scrub_bio_end_io_worker, NULL, NULL);
433 433
434 if (i != SCRUB_BIOS_PER_SCTX - 1) 434 if (i != SCRUB_BIOS_PER_SCTX - 1)
435 sctx->bios[i]->next_free = i + 1; 435 sctx->bios[i]->next_free = i + 1;
@@ -999,8 +999,8 @@ nodatasum_case:
999 fixup_nodatasum->root = fs_info->extent_root; 999 fixup_nodatasum->root = fs_info->extent_root;
1000 fixup_nodatasum->mirror_num = failed_mirror_index + 1; 1000 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
1001 scrub_pending_trans_workers_inc(sctx); 1001 scrub_pending_trans_workers_inc(sctx);
1002 btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, 1002 btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
1003 NULL, NULL); 1003 scrub_fixup_nodatasum, NULL, NULL);
1004 btrfs_queue_work(fs_info->scrub_workers, 1004 btrfs_queue_work(fs_info->scrub_workers,
1005 &fixup_nodatasum->work); 1005 &fixup_nodatasum->work);
1006 goto out; 1006 goto out;
@@ -1616,7 +1616,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err)
1616 sbio->err = err; 1616 sbio->err = err;
1617 sbio->bio = bio; 1617 sbio->bio = bio;
1618 1618
1619 btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); 1619 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
1620 scrub_wr_bio_end_io_worker, NULL, NULL);
1620 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); 1621 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1621} 1622}
1622 1623
@@ -2904,6 +2905,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2904 struct scrub_ctx *sctx; 2905 struct scrub_ctx *sctx;
2905 int ret; 2906 int ret;
2906 struct btrfs_device *dev; 2907 struct btrfs_device *dev;
2908 struct rcu_string *name;
2907 2909
2908 if (btrfs_fs_closing(fs_info)) 2910 if (btrfs_fs_closing(fs_info))
2909 return -EINVAL; 2911 return -EINVAL;
@@ -2965,6 +2967,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2965 return -ENODEV; 2967 return -ENODEV;
2966 } 2968 }
2967 2969
2970 if (!is_dev_replace && !readonly && !dev->writeable) {
2971 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2972 rcu_read_lock();
2973 name = rcu_dereference(dev->name);
2974 btrfs_err(fs_info, "scrub: device %s is not writable",
2975 name->str);
2976 rcu_read_unlock();
2977 return -EROFS;
2978 }
2979
2968 mutex_lock(&fs_info->scrub_lock); 2980 mutex_lock(&fs_info->scrub_lock);
2969 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { 2981 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
2970 mutex_unlock(&fs_info->scrub_lock); 2982 mutex_unlock(&fs_info->scrub_lock);
@@ -3203,7 +3215,8 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3203 nocow_ctx->len = len; 3215 nocow_ctx->len = len;
3204 nocow_ctx->mirror_num = mirror_num; 3216 nocow_ctx->mirror_num = mirror_num;
3205 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; 3217 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3206 btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); 3218 btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
3219 copy_nocow_pages_worker, NULL, NULL);
3207 INIT_LIST_HEAD(&nocow_ctx->inodes); 3220 INIT_LIST_HEAD(&nocow_ctx->inodes);
3208 btrfs_queue_work(fs_info->scrub_nocow_workers, 3221 btrfs_queue_work(fs_info->scrub_nocow_workers,
3209 &nocow_ctx->work); 3222 &nocow_ctx->work);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 78699364f537..12e53556e214 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -614,7 +614,7 @@ int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
614 if (!fs_info->device_dir_kobj) 614 if (!fs_info->device_dir_kobj)
615 return -EINVAL; 615 return -EINVAL;
616 616
617 if (one_device) { 617 if (one_device && one_device->bdev) {
618 disk = one_device->bdev->bd_part; 618 disk = one_device->bdev->bd_part;
619 disk_kobj = &part_to_dev(disk)->kobj; 619 disk_kobj = &part_to_dev(disk)->kobj;
620 620
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9e1f2cd5e67a..1d1ba083ca6e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -94,8 +94,10 @@
94#define LOG_WALK_REPLAY_ALL 3 94#define LOG_WALK_REPLAY_ALL 3
95 95
96static int btrfs_log_inode(struct btrfs_trans_handle *trans, 96static int btrfs_log_inode(struct btrfs_trans_handle *trans,
97 struct btrfs_root *root, struct inode *inode, 97 struct btrfs_root *root, struct inode *inode,
98 int inode_only); 98 int inode_only,
99 const loff_t start,
100 const loff_t end);
99static int link_to_fixup_dir(struct btrfs_trans_handle *trans, 101static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
100 struct btrfs_root *root, 102 struct btrfs_root *root,
101 struct btrfs_path *path, u64 objectid); 103 struct btrfs_path *path, u64 objectid);
@@ -3298,7 +3300,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3298 struct list_head ordered_sums; 3300 struct list_head ordered_sums;
3299 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3301 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3300 bool has_extents = false; 3302 bool has_extents = false;
3301 bool need_find_last_extent = (*last_extent == 0); 3303 bool need_find_last_extent = true;
3302 bool done = false; 3304 bool done = false;
3303 3305
3304 INIT_LIST_HEAD(&ordered_sums); 3306 INIT_LIST_HEAD(&ordered_sums);
@@ -3352,8 +3354,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3352 */ 3354 */
3353 if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { 3355 if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
3354 has_extents = true; 3356 has_extents = true;
3355 if (need_find_last_extent && 3357 if (first_key.objectid == (u64)-1)
3356 first_key.objectid == (u64)-1)
3357 first_key = ins_keys[i]; 3358 first_key = ins_keys[i];
3358 } else { 3359 } else {
3359 need_find_last_extent = false; 3360 need_find_last_extent = false;
@@ -3427,6 +3428,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3427 if (!has_extents) 3428 if (!has_extents)
3428 return ret; 3429 return ret;
3429 3430
3431 if (need_find_last_extent && *last_extent == first_key.offset) {
3432 /*
3433 * We don't have any leafs between our current one and the one
3434 * we processed before that can have file extent items for our
3435 * inode (and have a generation number smaller than our current
3436 * transaction id).
3437 */
3438 need_find_last_extent = false;
3439 }
3440
3430 /* 3441 /*
3431 * Because we use btrfs_search_forward we could skip leaves that were 3442 * Because we use btrfs_search_forward we could skip leaves that were
3432 * not modified and then assume *last_extent is valid when it really 3443 * not modified and then assume *last_extent is valid when it really
@@ -3537,7 +3548,7 @@ fill_holes:
3537 0, 0); 3548 0, 0);
3538 if (ret) 3549 if (ret)
3539 break; 3550 break;
3540 *last_extent = offset + len; 3551 *last_extent = extent_end;
3541 } 3552 }
3542 /* 3553 /*
3543 * Need to let the callers know we dropped the path so they should 3554 * Need to let the callers know we dropped the path so they should
@@ -3849,8 +3860,10 @@ process:
3849 * This handles both files and directories. 3860 * This handles both files and directories.
3850 */ 3861 */
3851static int btrfs_log_inode(struct btrfs_trans_handle *trans, 3862static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3852 struct btrfs_root *root, struct inode *inode, 3863 struct btrfs_root *root, struct inode *inode,
3853 int inode_only) 3864 int inode_only,
3865 const loff_t start,
3866 const loff_t end)
3854{ 3867{
3855 struct btrfs_path *path; 3868 struct btrfs_path *path;
3856 struct btrfs_path *dst_path; 3869 struct btrfs_path *dst_path;
@@ -3867,6 +3880,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3867 int ins_nr; 3880 int ins_nr;
3868 bool fast_search = false; 3881 bool fast_search = false;
3869 u64 ino = btrfs_ino(inode); 3882 u64 ino = btrfs_ino(inode);
3883 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
3870 3884
3871 path = btrfs_alloc_path(); 3885 path = btrfs_alloc_path();
3872 if (!path) 3886 if (!path)
@@ -4040,13 +4054,35 @@ log_extents:
4040 goto out_unlock; 4054 goto out_unlock;
4041 } 4055 }
4042 } else if (inode_only == LOG_INODE_ALL) { 4056 } else if (inode_only == LOG_INODE_ALL) {
4043 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
4044 struct extent_map *em, *n; 4057 struct extent_map *em, *n;
4045 4058
4046 write_lock(&tree->lock); 4059 write_lock(&em_tree->lock);
4047 list_for_each_entry_safe(em, n, &tree->modified_extents, list) 4060 /*
4048 list_del_init(&em->list); 4061 * We can't just remove every em if we're called for a ranged
4049 write_unlock(&tree->lock); 4062 * fsync - that is, one that doesn't cover the whole possible
4063 * file range (0 to LLONG_MAX). This is because we can have
4064 * em's that fall outside the range we're logging and therefore
4065 * their ordered operations haven't completed yet
4066 * (btrfs_finish_ordered_io() not invoked yet). This means we
4067 * didn't get their respective file extent item in the fs/subvol
4068 * tree yet, and need to let the next fast fsync (one which
4069 * consults the list of modified extent maps) find the em so
4070 * that it logs a matching file extent item and waits for the
4071 * respective ordered operation to complete (if it's still
4072 * running).
4073 *
4074 * Removing every em outside the range we're logging would make
4075 * the next fast fsync not log their matching file extent items,
4076 * therefore making us lose data after a log replay.
4077 */
4078 list_for_each_entry_safe(em, n, &em_tree->modified_extents,
4079 list) {
4080 const u64 mod_end = em->mod_start + em->mod_len - 1;
4081
4082 if (em->mod_start >= start && mod_end <= end)
4083 list_del_init(&em->list);
4084 }
4085 write_unlock(&em_tree->lock);
4050 } 4086 }
4051 4087
4052 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 4088 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
@@ -4056,6 +4092,7 @@ log_extents:
4056 goto out_unlock; 4092 goto out_unlock;
4057 } 4093 }
4058 } 4094 }
4095
4059 BTRFS_I(inode)->logged_trans = trans->transid; 4096 BTRFS_I(inode)->logged_trans = trans->transid;
4060 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 4097 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
4061out_unlock: 4098out_unlock:
@@ -4152,7 +4189,10 @@ out:
4152 */ 4189 */
4153static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 4190static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4154 struct btrfs_root *root, struct inode *inode, 4191 struct btrfs_root *root, struct inode *inode,
4155 struct dentry *parent, int exists_only, 4192 struct dentry *parent,
4193 const loff_t start,
4194 const loff_t end,
4195 int exists_only,
4156 struct btrfs_log_ctx *ctx) 4196 struct btrfs_log_ctx *ctx)
4157{ 4197{
4158 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; 4198 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
@@ -4198,7 +4238,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4198 if (ret) 4238 if (ret)
4199 goto end_no_trans; 4239 goto end_no_trans;
4200 4240
4201 ret = btrfs_log_inode(trans, root, inode, inode_only); 4241 ret = btrfs_log_inode(trans, root, inode, inode_only, start, end);
4202 if (ret) 4242 if (ret)
4203 goto end_trans; 4243 goto end_trans;
4204 4244
@@ -4226,7 +4266,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4226 4266
4227 if (BTRFS_I(inode)->generation > 4267 if (BTRFS_I(inode)->generation >
4228 root->fs_info->last_trans_committed) { 4268 root->fs_info->last_trans_committed) {
4229 ret = btrfs_log_inode(trans, root, inode, inode_only); 4269 ret = btrfs_log_inode(trans, root, inode, inode_only,
4270 0, LLONG_MAX);
4230 if (ret) 4271 if (ret)
4231 goto end_trans; 4272 goto end_trans;
4232 } 4273 }
@@ -4260,13 +4301,15 @@ end_no_trans:
4260 */ 4301 */
4261int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 4302int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
4262 struct btrfs_root *root, struct dentry *dentry, 4303 struct btrfs_root *root, struct dentry *dentry,
4304 const loff_t start,
4305 const loff_t end,
4263 struct btrfs_log_ctx *ctx) 4306 struct btrfs_log_ctx *ctx)
4264{ 4307{
4265 struct dentry *parent = dget_parent(dentry); 4308 struct dentry *parent = dget_parent(dentry);
4266 int ret; 4309 int ret;
4267 4310
4268 ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 4311 ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
4269 0, ctx); 4312 start, end, 0, ctx);
4270 dput(parent); 4313 dput(parent);
4271 4314
4272 return ret; 4315 return ret;
@@ -4503,6 +4546,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
4503 root->fs_info->last_trans_committed)) 4546 root->fs_info->last_trans_committed))
4504 return 0; 4547 return 0;
4505 4548
4506 return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); 4549 return btrfs_log_inode_parent(trans, root, inode, parent, 0,
4550 LLONG_MAX, 1, NULL);
4507} 4551}
4508 4552
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 7f5b41bd5373..e2e798ae7cd7 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -59,6 +59,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
59int btrfs_recover_log_trees(struct btrfs_root *tree_root); 59int btrfs_recover_log_trees(struct btrfs_root *tree_root);
60int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 60int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
61 struct btrfs_root *root, struct dentry *dentry, 61 struct btrfs_root *root, struct dentry *dentry,
62 const loff_t start,
63 const loff_t end,
62 struct btrfs_log_ctx *ctx); 64 struct btrfs_log_ctx *ctx);
63int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, 65int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
64 struct btrfs_root *root, 66 struct btrfs_root *root,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6cb82f62cb7c..2c2d6d1d8eee 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -508,6 +508,43 @@ static noinline int device_list_add(const char *path,
508 ret = 1; 508 ret = 1;
509 device->fs_devices = fs_devices; 509 device->fs_devices = fs_devices;
510 } else if (!device->name || strcmp(device->name->str, path)) { 510 } else if (!device->name || strcmp(device->name->str, path)) {
511 /*
512 * When FS is already mounted.
513 * 1. If you are here and if the device->name is NULL that
514 * means this device was missing at time of FS mount.
515 * 2. If you are here and if the device->name is different
516 * from 'path' that means either
517 * a. The same device disappeared and reappeared with
518 * different name. or
519 * b. The missing-disk-which-was-replaced, has
520 * reappeared now.
521 *
522 * We must allow 1 and 2a above. But 2b would be a spurious
523 * and unintentional.
524 *
525 * Further in case of 1 and 2a above, the disk at 'path'
526 * would have missed some transaction when it was away and
527 * in case of 2a the stale bdev has to be updated as well.
528 * 2b must not be allowed at all time.
529 */
530
531 /*
532 * For now, we do allow update to btrfs_fs_device through the
533 * btrfs dev scan cli after FS has been mounted. We're still
534 * tracking a problem where systems fail mount by subvolume id
535 * when we reject replacement on a mounted FS.
536 */
537 if (!fs_devices->opened && found_transid < device->generation) {
538 /*
539 * That is if the FS is _not_ mounted and if you
540 * are here, that means there is more than one
541 * disk with same uuid and devid.We keep the one
542 * with larger generation number or the last-in if
543 * generation are equal.
544 */
545 return -EEXIST;
546 }
547
511 name = rcu_string_strdup(path, GFP_NOFS); 548 name = rcu_string_strdup(path, GFP_NOFS);
512 if (!name) 549 if (!name)
513 return -ENOMEM; 550 return -ENOMEM;
@@ -519,6 +556,15 @@ static noinline int device_list_add(const char *path,
519 } 556 }
520 } 557 }
521 558
559 /*
560 * Unmount does not free the btrfs_device struct but would zero
561 * generation along with most of the other members. So just update
562 * it back. We need it to pick the disk with largest generation
563 * (as above).
564 */
565 if (!fs_devices->opened)
566 device->generation = found_transid;
567
522 if (found_transid > fs_devices->latest_trans) { 568 if (found_transid > fs_devices->latest_trans) {
523 fs_devices->latest_devid = devid; 569 fs_devices->latest_devid = devid;
524 fs_devices->latest_trans = found_transid; 570 fs_devices->latest_trans = found_transid;
@@ -1436,7 +1482,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans,
1436 btrfs_set_device_io_align(leaf, dev_item, device->io_align); 1482 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1437 btrfs_set_device_io_width(leaf, dev_item, device->io_width); 1483 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1438 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); 1484 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1439 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); 1485 btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
1440 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); 1486 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1441 btrfs_set_device_group(leaf, dev_item, 0); 1487 btrfs_set_device_group(leaf, dev_item, 0);
1442 btrfs_set_device_seek_speed(leaf, dev_item, 0); 1488 btrfs_set_device_seek_speed(leaf, dev_item, 0);
@@ -1671,7 +1717,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1671 device->fs_devices->total_devices--; 1717 device->fs_devices->total_devices--;
1672 1718
1673 if (device->missing) 1719 if (device->missing)
1674 root->fs_info->fs_devices->missing_devices--; 1720 device->fs_devices->missing_devices--;
1675 1721
1676 next_device = list_entry(root->fs_info->fs_devices->devices.next, 1722 next_device = list_entry(root->fs_info->fs_devices->devices.next,
1677 struct btrfs_device, dev_list); 1723 struct btrfs_device, dev_list);
@@ -1801,8 +1847,12 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
1801 if (srcdev->bdev) { 1847 if (srcdev->bdev) {
1802 fs_info->fs_devices->open_devices--; 1848 fs_info->fs_devices->open_devices--;
1803 1849
1804 /* zero out the old super */ 1850 /*
1805 btrfs_scratch_superblock(srcdev); 1851 * zero out the old super if it is not writable
1852 * (e.g. seed device)
1853 */
1854 if (srcdev->writeable)
1855 btrfs_scratch_superblock(srcdev);
1806 } 1856 }
1807 1857
1808 call_rcu(&srcdev->rcu, free_device); 1858 call_rcu(&srcdev->rcu, free_device);
@@ -1941,6 +1991,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
1941 fs_devices->seeding = 0; 1991 fs_devices->seeding = 0;
1942 fs_devices->num_devices = 0; 1992 fs_devices->num_devices = 0;
1943 fs_devices->open_devices = 0; 1993 fs_devices->open_devices = 0;
1994 fs_devices->missing_devices = 0;
1995 fs_devices->num_can_discard = 0;
1996 fs_devices->rotating = 0;
1944 fs_devices->seed = seed_devices; 1997 fs_devices->seed = seed_devices;
1945 1998
1946 generate_random_uuid(fs_devices->fsid); 1999 generate_random_uuid(fs_devices->fsid);
@@ -5800,7 +5853,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
5800 else 5853 else
5801 generate_random_uuid(dev->uuid); 5854 generate_random_uuid(dev->uuid);
5802 5855
5803 btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); 5856 btrfs_init_work(&dev->work, btrfs_submit_helper,
5857 pending_bios_fn, NULL, NULL);
5804 5858
5805 return dev; 5859 return dev;
5806} 5860}
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 603f18a65c12..a2172f3f69e3 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -22,6 +22,11 @@ config CIFS
22 support for OS/2 and Windows ME and similar servers is provided as 22 support for OS/2 and Windows ME and similar servers is provided as
23 well. 23 well.
24 24
25 The module also provides optional support for the followon
26 protocols for CIFS including SMB3, which enables
27 useful performance and security features (see the description
28 of CONFIG_CIFS_SMB2).
29
25 The cifs module provides an advanced network file system 30 The cifs module provides an advanced network file system
26 client for mounting to CIFS compliant servers. It includes 31 client for mounting to CIFS compliant servers. It includes
27 support for DFS (hierarchical name space), secure per-user 32 support for DFS (hierarchical name space), secure per-user
@@ -121,7 +126,8 @@ config CIFS_ACL
121 depends on CIFS_XATTR && KEYS 126 depends on CIFS_XATTR && KEYS
122 help 127 help
123 Allows fetching CIFS/NTFS ACL from the server. The DACL blob 128 Allows fetching CIFS/NTFS ACL from the server. The DACL blob
124 is handed over to the application/caller. 129 is handed over to the application/caller. See the man
130 page for getcifsacl for more information.
125 131
126config CIFS_DEBUG 132config CIFS_DEBUG
127 bool "Enable CIFS debugging routines" 133 bool "Enable CIFS debugging routines"
@@ -162,7 +168,7 @@ config CIFS_NFSD_EXPORT
162 Allows NFS server to export a CIFS mounted share (nfsd over cifs) 168 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
163 169
164config CIFS_SMB2 170config CIFS_SMB2
165 bool "SMB2 network file system support" 171 bool "SMB2 and SMB3 network file system support"
166 depends on CIFS && INET 172 depends on CIFS && INET
167 select NLS 173 select NLS
168 select KEYS 174 select KEYS
@@ -170,16 +176,21 @@ config CIFS_SMB2
170 select DNS_RESOLVER 176 select DNS_RESOLVER
171 177
172 help 178 help
173 This enables experimental support for the SMB2 (Server Message Block 179 This enables support for the Server Message Block version 2
174 version 2) protocol. The SMB2 protocol is the successor to the 180 family of protocols, including SMB3. SMB3 support is
175 popular CIFS and SMB network file sharing protocols. SMB2 is the 181 enabled on mount by specifying "vers=3.0" in the mount
176 native file sharing mechanism for recent versions of Windows 182 options. These protocols are the successors to the popular
177 operating systems (since Vista). SMB2 enablement will eventually 183 CIFS and SMB network file sharing protocols. SMB3 is the
178 allow users better performance, security and features, than would be 184 native file sharing mechanism for the more recent
179 possible with cifs. Note that smb2 mount options also are simpler 185 versions of Windows (Windows 8 and Windows 2012 and
180 (compared to cifs) due to protocol improvements. 186 later) and Samba server and many others support SMB3 well.
181 187 In general SMB3 enables better performance, security
182 Unless you are a developer or tester, say N. 188 and features, than would be possible with CIFS (Note that
189 when mounting to Samba, due to the CIFS POSIX extensions,
190 CIFS mounts can provide slightly better POSIX compatibility
191 than SMB3 mounts do though). Note that SMB2/SMB3 mount
192 options are also slightly simpler (compared to CIFS) due
193 to protocol improvements.
183 194
184config CIFS_FSCACHE 195config CIFS_FSCACHE
185 bool "Provide CIFS client caching support" 196 bool "Provide CIFS client caching support"
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ac4f260155c8..889b98455750 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -207,6 +207,19 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
207 return 0; 207 return 0;
208} 208}
209 209
210static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
211{
212 struct super_block *sb = file->f_path.dentry->d_sb;
213 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
214 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
215 struct TCP_Server_Info *server = tcon->ses->server;
216
217 if (server->ops->fallocate)
218 return server->ops->fallocate(file, tcon, mode, off, len);
219
220 return -EOPNOTSUPP;
221}
222
210static int cifs_permission(struct inode *inode, int mask) 223static int cifs_permission(struct inode *inode, int mask)
211{ 224{
212 struct cifs_sb_info *cifs_sb; 225 struct cifs_sb_info *cifs_sb;
@@ -812,8 +825,9 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
812 if (!(S_ISREG(inode->i_mode))) 825 if (!(S_ISREG(inode->i_mode)))
813 return -EINVAL; 826 return -EINVAL;
814 827
815 /* check if file is oplocked */ 828 /* Check if file is oplocked if this is request for new lease */
816 if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) || 829 if (arg == F_UNLCK ||
830 ((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) ||
817 ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode)))) 831 ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode))))
818 return generic_setlease(file, arg, lease); 832 return generic_setlease(file, arg, lease);
819 else if (tlink_tcon(cfile->tlink)->local_lease && 833 else if (tlink_tcon(cfile->tlink)->local_lease &&
@@ -908,6 +922,7 @@ const struct file_operations cifs_file_ops = {
908 .unlocked_ioctl = cifs_ioctl, 922 .unlocked_ioctl = cifs_ioctl,
909#endif /* CONFIG_CIFS_POSIX */ 923#endif /* CONFIG_CIFS_POSIX */
910 .setlease = cifs_setlease, 924 .setlease = cifs_setlease,
925 .fallocate = cifs_fallocate,
911}; 926};
912 927
913const struct file_operations cifs_file_strict_ops = { 928const struct file_operations cifs_file_strict_ops = {
@@ -927,6 +942,7 @@ const struct file_operations cifs_file_strict_ops = {
927 .unlocked_ioctl = cifs_ioctl, 942 .unlocked_ioctl = cifs_ioctl,
928#endif /* CONFIG_CIFS_POSIX */ 943#endif /* CONFIG_CIFS_POSIX */
929 .setlease = cifs_setlease, 944 .setlease = cifs_setlease,
945 .fallocate = cifs_fallocate,
930}; 946};
931 947
932const struct file_operations cifs_file_direct_ops = { 948const struct file_operations cifs_file_direct_ops = {
@@ -947,6 +963,7 @@ const struct file_operations cifs_file_direct_ops = {
947#endif /* CONFIG_CIFS_POSIX */ 963#endif /* CONFIG_CIFS_POSIX */
948 .llseek = cifs_llseek, 964 .llseek = cifs_llseek,
949 .setlease = cifs_setlease, 965 .setlease = cifs_setlease,
966 .fallocate = cifs_fallocate,
950}; 967};
951 968
952const struct file_operations cifs_file_nobrl_ops = { 969const struct file_operations cifs_file_nobrl_ops = {
@@ -965,6 +982,7 @@ const struct file_operations cifs_file_nobrl_ops = {
965 .unlocked_ioctl = cifs_ioctl, 982 .unlocked_ioctl = cifs_ioctl,
966#endif /* CONFIG_CIFS_POSIX */ 983#endif /* CONFIG_CIFS_POSIX */
967 .setlease = cifs_setlease, 984 .setlease = cifs_setlease,
985 .fallocate = cifs_fallocate,
968}; 986};
969 987
970const struct file_operations cifs_file_strict_nobrl_ops = { 988const struct file_operations cifs_file_strict_nobrl_ops = {
@@ -983,6 +1001,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
983 .unlocked_ioctl = cifs_ioctl, 1001 .unlocked_ioctl = cifs_ioctl,
984#endif /* CONFIG_CIFS_POSIX */ 1002#endif /* CONFIG_CIFS_POSIX */
985 .setlease = cifs_setlease, 1003 .setlease = cifs_setlease,
1004 .fallocate = cifs_fallocate,
986}; 1005};
987 1006
988const struct file_operations cifs_file_direct_nobrl_ops = { 1007const struct file_operations cifs_file_direct_nobrl_ops = {
@@ -1002,6 +1021,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
1002#endif /* CONFIG_CIFS_POSIX */ 1021#endif /* CONFIG_CIFS_POSIX */
1003 .llseek = cifs_llseek, 1022 .llseek = cifs_llseek,
1004 .setlease = cifs_setlease, 1023 .setlease = cifs_setlease,
1024 .fallocate = cifs_fallocate,
1005}; 1025};
1006 1026
1007const struct file_operations cifs_dir_ops = { 1027const struct file_operations cifs_dir_ops = {
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index b0fafa499505..002e0c173939 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
136extern const struct export_operations cifs_export_ops; 136extern const struct export_operations cifs_export_ops;
137#endif /* CONFIG_CIFS_NFSD_EXPORT */ 137#endif /* CONFIG_CIFS_NFSD_EXPORT */
138 138
139#define CIFS_VERSION "2.04" 139#define CIFS_VERSION "2.05"
140#endif /* _CIFSFS_H */ 140#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 0012e1e291d4..25b8392bfdd2 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -70,11 +70,6 @@
70#define SERVER_NAME_LENGTH 40 70#define SERVER_NAME_LENGTH 40
71#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) 71#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1)
72 72
73/* used to define string lengths for reversing unicode strings */
74/* (256+1)*2 = 514 */
75/* (max path length + 1 for null) * 2 for unicode */
76#define MAX_NAME 514
77
78/* SMB echo "timeout" -- FIXME: tunable? */ 73/* SMB echo "timeout" -- FIXME: tunable? */
79#define SMB_ECHO_INTERVAL (60 * HZ) 74#define SMB_ECHO_INTERVAL (60 * HZ)
80 75
@@ -409,6 +404,10 @@ struct smb_version_operations {
409 /* get mtu credits */ 404 /* get mtu credits */
410 int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int, 405 int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int,
411 unsigned int *, unsigned int *); 406 unsigned int *, unsigned int *);
407 /* check if we need to issue closedir */
408 bool (*dir_needs_close)(struct cifsFileInfo *);
409 long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t,
410 loff_t);
412}; 411};
413 412
414struct smb_version_values { 413struct smb_version_values {
@@ -883,6 +882,7 @@ struct cifs_tcon {
883 for this mount even if server would support */ 882 for this mount even if server would support */
884 bool local_lease:1; /* check leases (only) on local system not remote */ 883 bool local_lease:1; /* check leases (only) on local system not remote */
885 bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ 884 bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
885 bool broken_sparse_sup; /* if server or share does not support sparse */
886 bool need_reconnect:1; /* connection reset, tid now invalid */ 886 bool need_reconnect:1; /* connection reset, tid now invalid */
887#ifdef CONFIG_CIFS_SMB2 887#ifdef CONFIG_CIFS_SMB2
888 bool print:1; /* set if connection to printer share */ 888 bool print:1; /* set if connection to printer share */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 33df36ef9d52..5f9822ac0245 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2253,6 +2253,29 @@ typedef struct {
2253/* minimum includes first three fields, and empty FS Name */ 2253/* minimum includes first three fields, and empty FS Name */
2254#define MIN_FS_ATTR_INFO_SIZE 12 2254#define MIN_FS_ATTR_INFO_SIZE 12
2255 2255
2256
2257/* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */
2258#define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000
2259#define FILE_SUPPORTS_USN_JOURNAL 0x02000000
2260#define FILE_SUPPORTS_OPEN_BY_FILE_ID 0x01000000
2261#define FILE_SUPPORTS_EXTENDED_ATTRIBUTES 0x00800000
2262#define FILE_SUPPORTS_HARD_LINKS 0x00400000
2263#define FILE_SUPPORTS_TRANSACTIONS 0x00200000
2264#define FILE_SEQUENTIAL_WRITE_ONCE 0x00100000
2265#define FILE_READ_ONLY_VOLUME 0x00080000
2266#define FILE_NAMED_STREAMS 0x00040000
2267#define FILE_SUPPORTS_ENCRYPTION 0x00020000
2268#define FILE_SUPPORTS_OBJECT_IDS 0x00010000
2269#define FILE_VOLUME_IS_COMPRESSED 0x00008000
2270#define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100
2271#define FILE_SUPPORTS_REPARSE_POINTS 0x00000080
2272#define FILE_SUPPORTS_SPARSE_FILES 0x00000040
2273#define FILE_VOLUME_QUOTAS 0x00000020
2274#define FILE_FILE_COMPRESSION 0x00000010
2275#define FILE_PERSISTENT_ACLS 0x00000008
2276#define FILE_UNICODE_ON_DISK 0x00000004
2277#define FILE_CASE_PRESERVED_NAMES 0x00000002
2278#define FILE_CASE_SENSITIVE_SEARCH 0x00000001
2256typedef struct { 2279typedef struct {
2257 __le32 Attributes; 2280 __le32 Attributes;
2258 __le32 MaxPathNameComponentLength; 2281 __le32 MaxPathNameComponentLength;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 03ed8a09581c..36ca2045009b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1600,6 +1600,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1600 tmp_end++; 1600 tmp_end++;
1601 if (!(tmp_end < end && tmp_end[1] == delim)) { 1601 if (!(tmp_end < end && tmp_end[1] == delim)) {
1602 /* No it is not. Set the password to NULL */ 1602 /* No it is not. Set the password to NULL */
1603 kfree(vol->password);
1603 vol->password = NULL; 1604 vol->password = NULL;
1604 break; 1605 break;
1605 } 1606 }
@@ -1637,6 +1638,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1637 options = end; 1638 options = end;
1638 } 1639 }
1639 1640
1641 kfree(vol->password);
1640 /* Now build new password string */ 1642 /* Now build new password string */
1641 temp_len = strlen(value); 1643 temp_len = strlen(value);
1642 vol->password = kzalloc(temp_len+1, GFP_KERNEL); 1644 vol->password = kzalloc(temp_len+1, GFP_KERNEL);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3db0c5fd9a11..6cbd9c688cfe 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -497,6 +497,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
497 goto out; 497 goto out;
498 } 498 }
499 499
500 if (file->f_flags & O_DIRECT &&
501 CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
502 if (CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
503 file->f_op = &cifs_file_direct_nobrl_ops;
504 else
505 file->f_op = &cifs_file_direct_ops;
506 }
507
500 file_info = cifs_new_fileinfo(&fid, file, tlink, oplock); 508 file_info = cifs_new_fileinfo(&fid, file, tlink, oplock);
501 if (file_info == NULL) { 509 if (file_info == NULL) {
502 if (server->ops->close) 510 if (server->ops->close)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4ab2f79ffa7a..7c018a1c52f7 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -467,6 +467,14 @@ int cifs_open(struct inode *inode, struct file *file)
467 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", 467 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
468 inode, file->f_flags, full_path); 468 inode, file->f_flags, full_path);
469 469
470 if (file->f_flags & O_DIRECT &&
471 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
472 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
473 file->f_op = &cifs_file_direct_nobrl_ops;
474 else
475 file->f_op = &cifs_file_direct_ops;
476 }
477
470 if (server->oplocks) 478 if (server->oplocks)
471 oplock = REQ_OPLOCK; 479 oplock = REQ_OPLOCK;
472 else 480 else
@@ -762,7 +770,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
762 770
763 cifs_dbg(FYI, "Freeing private data in close dir\n"); 771 cifs_dbg(FYI, "Freeing private data in close dir\n");
764 spin_lock(&cifs_file_list_lock); 772 spin_lock(&cifs_file_list_lock);
765 if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { 773 if (server->ops->dir_needs_close(cfile)) {
766 cfile->invalidHandle = true; 774 cfile->invalidHandle = true;
767 spin_unlock(&cifs_file_list_lock); 775 spin_unlock(&cifs_file_list_lock);
768 if (server->ops->close_dir) 776 if (server->ops->close_dir)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 426d6c6ad8bf..7899a40465b3 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1720,13 +1720,22 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
1720unlink_target: 1720unlink_target:
1721 /* Try unlinking the target dentry if it's not negative */ 1721 /* Try unlinking the target dentry if it's not negative */
1722 if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) { 1722 if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) {
1723 tmprc = cifs_unlink(target_dir, target_dentry); 1723 if (d_is_dir(target_dentry))
1724 tmprc = cifs_rmdir(target_dir, target_dentry);
1725 else
1726 tmprc = cifs_unlink(target_dir, target_dentry);
1724 if (tmprc) 1727 if (tmprc)
1725 goto cifs_rename_exit; 1728 goto cifs_rename_exit;
1726 rc = cifs_do_rename(xid, source_dentry, from_name, 1729 rc = cifs_do_rename(xid, source_dentry, from_name,
1727 target_dentry, to_name); 1730 target_dentry, to_name);
1728 } 1731 }
1729 1732
1733 /* force revalidate to go get info when needed */
1734 CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
1735
1736 source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime =
1737 target_dir->i_mtime = current_fs_time(source_dir->i_sb);
1738
1730cifs_rename_exit: 1739cifs_rename_exit:
1731 kfree(info_buf_source); 1740 kfree(info_buf_source);
1732 kfree(from_name); 1741 kfree(from_name);
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 68559fd557fb..5657416d3483 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -213,8 +213,12 @@ create_mf_symlink(const unsigned int xid, struct cifs_tcon *tcon,
213 if (rc) 213 if (rc)
214 goto out; 214 goto out;
215 215
216 rc = tcon->ses->server->ops->create_mf_symlink(xid, tcon, cifs_sb, 216 if (tcon->ses->server->ops->create_mf_symlink)
217 fromName, buf, &bytes_written); 217 rc = tcon->ses->server->ops->create_mf_symlink(xid, tcon,
218 cifs_sb, fromName, buf, &bytes_written);
219 else
220 rc = -EOPNOTSUPP;
221
218 if (rc) 222 if (rc)
219 goto out; 223 goto out;
220 224
@@ -339,9 +343,11 @@ cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
339 if (rc) 343 if (rc)
340 return rc; 344 return rc;
341 345
342 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) 346 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
347 rc = -ENOENT;
343 /* it's not a symlink */ 348 /* it's not a symlink */
344 goto out; 349 goto out;
350 }
345 351
346 io_parms.netfid = fid.netfid; 352 io_parms.netfid = fid.netfid;
347 io_parms.pid = current->tgid; 353 io_parms.pid = current->tgid;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 81340c6253eb..b7415d596dbd 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -574,13 +574,6 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
574 cinode->oplock = 0; 574 cinode->oplock = 0;
575} 575}
576 576
577static int
578cifs_oplock_break_wait(void *unused)
579{
580 schedule();
581 return signal_pending(current) ? -ERESTARTSYS : 0;
582}
583
584/* 577/*
585 * We wait for oplock breaks to be processed before we attempt to perform 578 * We wait for oplock breaks to be processed before we attempt to perform
586 * writes. 579 * writes.
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 6834b9c3bec1..b333ff60781d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -925,11 +925,23 @@ cifs_NTtimeToUnix(__le64 ntutc)
925 /* BB what about the timezone? BB */ 925 /* BB what about the timezone? BB */
926 926
927 /* Subtract the NTFS time offset, then convert to 1s intervals. */ 927 /* Subtract the NTFS time offset, then convert to 1s intervals. */
928 u64 t; 928 s64 t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET;
929
930 /*
931 * Unfortunately can not use normal 64 bit division on 32 bit arch, but
932 * the alternative, do_div, does not work with negative numbers so have
933 * to special case them
934 */
935 if (t < 0) {
936 t = -t;
937 ts.tv_nsec = (long)(do_div(t, 10000000) * 100);
938 ts.tv_nsec = -ts.tv_nsec;
939 ts.tv_sec = -t;
940 } else {
941 ts.tv_nsec = (long)do_div(t, 10000000) * 100;
942 ts.tv_sec = t;
943 }
929 944
930 t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET;
931 ts.tv_nsec = do_div(t, 10000000) * 100;
932 ts.tv_sec = t;
933 return ts; 945 return ts;
934} 946}
935 947
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b15862e0f68c..b334a89d6a66 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -593,11 +593,11 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
593 /* close and restart search */ 593 /* close and restart search */
594 cifs_dbg(FYI, "search backing up - close and restart search\n"); 594 cifs_dbg(FYI, "search backing up - close and restart search\n");
595 spin_lock(&cifs_file_list_lock); 595 spin_lock(&cifs_file_list_lock);
596 if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { 596 if (server->ops->dir_needs_close(cfile)) {
597 cfile->invalidHandle = true; 597 cfile->invalidHandle = true;
598 spin_unlock(&cifs_file_list_lock); 598 spin_unlock(&cifs_file_list_lock);
599 if (server->ops->close) 599 if (server->ops->close_dir)
600 server->ops->close(xid, tcon, &cfile->fid); 600 server->ops->close_dir(xid, tcon, &cfile->fid);
601 } else 601 } else
602 spin_unlock(&cifs_file_list_lock); 602 spin_unlock(&cifs_file_list_lock);
603 if (cfile->srch_inf.ntwrk_buf_start) { 603 if (cfile->srch_inf.ntwrk_buf_start) {
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 39ee32688eac..57db63ff88da 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -243,10 +243,11 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft,
243 kfree(ses->serverOS); 243 kfree(ses->serverOS);
244 244
245 ses->serverOS = kzalloc(len + 1, GFP_KERNEL); 245 ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
246 if (ses->serverOS) 246 if (ses->serverOS) {
247 strncpy(ses->serverOS, bcc_ptr, len); 247 strncpy(ses->serverOS, bcc_ptr, len);
248 if (strncmp(ses->serverOS, "OS/2", 4) == 0) 248 if (strncmp(ses->serverOS, "OS/2", 4) == 0)
249 cifs_dbg(FYI, "OS/2 server\n"); 249 cifs_dbg(FYI, "OS/2 server\n");
250 }
250 251
251 bcc_ptr += len + 1; 252 bcc_ptr += len + 1;
252 bleft -= len + 1; 253 bleft -= len + 1;
@@ -744,14 +745,6 @@ out:
744 sess_free_buffer(sess_data); 745 sess_free_buffer(sess_data);
745} 746}
746 747
747#else
748
749static void
750sess_auth_lanman(struct sess_data *sess_data)
751{
752 sess_data->result = -EOPNOTSUPP;
753 sess_data->func = NULL;
754}
755#endif 748#endif
756 749
757static void 750static void
@@ -1102,15 +1095,6 @@ out:
1102 ses->auth_key.response = NULL; 1095 ses->auth_key.response = NULL;
1103} 1096}
1104 1097
1105#else
1106
1107static void
1108sess_auth_kerberos(struct sess_data *sess_data)
1109{
1110 cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
1111 sess_data->result = -ENOSYS;
1112 sess_data->func = NULL;
1113}
1114#endif /* ! CONFIG_CIFS_UPCALL */ 1098#endif /* ! CONFIG_CIFS_UPCALL */
1115 1099
1116/* 1100/*
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 5e8c22d6c7b9..1a6df4b03f67 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1015,6 +1015,12 @@ cifs_wp_retry_size(struct inode *inode)
1015 return CIFS_SB(inode->i_sb)->wsize; 1015 return CIFS_SB(inode->i_sb)->wsize;
1016} 1016}
1017 1017
1018static bool
1019cifs_dir_needs_close(struct cifsFileInfo *cfile)
1020{
1021 return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
1022}
1023
1018struct smb_version_operations smb1_operations = { 1024struct smb_version_operations smb1_operations = {
1019 .send_cancel = send_nt_cancel, 1025 .send_cancel = send_nt_cancel,
1020 .compare_fids = cifs_compare_fids, 1026 .compare_fids = cifs_compare_fids,
@@ -1086,6 +1092,7 @@ struct smb_version_operations smb1_operations = {
1086 .create_mf_symlink = cifs_create_mf_symlink, 1092 .create_mf_symlink = cifs_create_mf_symlink,
1087 .is_read_op = cifs_is_read_op, 1093 .is_read_op = cifs_is_read_op,
1088 .wp_retry_size = cifs_wp_retry_size, 1094 .wp_retry_size = cifs_wp_retry_size,
1095 .dir_needs_close = cifs_dir_needs_close,
1089#ifdef CONFIG_CIFS_XATTR 1096#ifdef CONFIG_CIFS_XATTR
1090 .query_all_EAs = CIFSSMBQAllEAs, 1097 .query_all_EAs = CIFSSMBQAllEAs,
1091 .set_EA = CIFSSMBSetEA, 1098 .set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 3f17b4550831..45992944e238 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -50,7 +50,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
50 goto out; 50 goto out;
51 } 51 }
52 52
53 smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, 53 smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
54 GFP_KERNEL); 54 GFP_KERNEL);
55 if (smb2_data == NULL) { 55 if (smb2_data == NULL) {
56 rc = -ENOMEM; 56 rc = -ENOMEM;
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 0150182a4494..899bbc86f73e 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -131,7 +131,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
131 *adjust_tz = false; 131 *adjust_tz = false;
132 *symlink = false; 132 *symlink = false;
133 133
134 smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, 134 smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
135 GFP_KERNEL); 135 GFP_KERNEL);
136 if (smb2_data == NULL) 136 if (smb2_data == NULL)
137 return -ENOMEM; 137 return -ENOMEM;
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index e31a9dfdcd39..af59d03db492 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
214 {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"}, 214 {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"},
215 {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"}, 215 {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"},
216 {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"}, 216 {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"},
217 {STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"}, 217 {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"},
218 {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"}, 218 {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"},
219 {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"}, 219 {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"},
220 {STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"}, 220 {STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"},
@@ -298,7 +298,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
298 {STATUS_INVALID_PARAMETER, -EINVAL, "STATUS_INVALID_PARAMETER"}, 298 {STATUS_INVALID_PARAMETER, -EINVAL, "STATUS_INVALID_PARAMETER"},
299 {STATUS_NO_SUCH_DEVICE, -ENODEV, "STATUS_NO_SUCH_DEVICE"}, 299 {STATUS_NO_SUCH_DEVICE, -ENODEV, "STATUS_NO_SUCH_DEVICE"},
300 {STATUS_NO_SUCH_FILE, -ENOENT, "STATUS_NO_SUCH_FILE"}, 300 {STATUS_NO_SUCH_FILE, -ENOENT, "STATUS_NO_SUCH_FILE"},
301 {STATUS_INVALID_DEVICE_REQUEST, -EIO, "STATUS_INVALID_DEVICE_REQUEST"}, 301 {STATUS_INVALID_DEVICE_REQUEST, -EOPNOTSUPP, "STATUS_INVALID_DEVICE_REQUEST"},
302 {STATUS_END_OF_FILE, -ENODATA, "STATUS_END_OF_FILE"}, 302 {STATUS_END_OF_FILE, -ENODATA, "STATUS_END_OF_FILE"},
303 {STATUS_WRONG_VOLUME, -EIO, "STATUS_WRONG_VOLUME"}, 303 {STATUS_WRONG_VOLUME, -EIO, "STATUS_WRONG_VOLUME"},
304 {STATUS_NO_MEDIA_IN_DEVICE, -EIO, "STATUS_NO_MEDIA_IN_DEVICE"}, 304 {STATUS_NO_MEDIA_IN_DEVICE, -EIO, "STATUS_NO_MEDIA_IN_DEVICE"},
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index f2e6ac29a8d6..4aa7a0f07d6e 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -178,9 +178,24 @@ smb2_check_message(char *buf, unsigned int length)
178 /* Windows 7 server returns 24 bytes more */ 178 /* Windows 7 server returns 24 bytes more */
179 if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) 179 if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE)
180 return 0; 180 return 0;
181 /* server can return one byte more */ 181 /* server can return one byte more due to implied bcc[0] */
182 if (clc_len == 4 + len + 1) 182 if (clc_len == 4 + len + 1)
183 return 0; 183 return 0;
184
185 /*
186 * MacOS server pads after SMB2.1 write response with 3 bytes
187 * of junk. Other servers match RFC1001 len to actual
188 * SMB2/SMB3 frame length (header + smb2 response specific data)
189 * Log the server error (once), but allow it and continue
190 * since the frame is parseable.
191 */
192 if (clc_len < 4 /* RFC1001 header size */ + len) {
193 printk_once(KERN_WARNING
194 "SMB2 server sent bad RFC1001 len %d not %d\n",
195 len, clc_len - 4);
196 return 0;
197 }
198
184 return 1; 199 return 1;
185 } 200 }
186 return 0; 201 return 0;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 77f8aeb9c2fc..f522193b7184 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -389,7 +389,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
389 int rc; 389 int rc;
390 struct smb2_file_all_info *smb2_data; 390 struct smb2_file_all_info *smb2_data;
391 391
392 smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, 392 smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
393 GFP_KERNEL); 393 GFP_KERNEL);
394 if (smb2_data == NULL) 394 if (smb2_data == NULL)
395 return -ENOMEM; 395 return -ENOMEM;
@@ -731,11 +731,72 @@ smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile,
731 return SMB2_write(xid, parms, written, iov, nr_segs); 731 return SMB2_write(xid, parms, written, iov, nr_segs);
732} 732}
733 733
734/* Set or clear the SPARSE_FILE attribute based on value passed in setsparse */
735static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
736 struct cifsFileInfo *cfile, struct inode *inode, __u8 setsparse)
737{
738 struct cifsInodeInfo *cifsi;
739 int rc;
740
741 cifsi = CIFS_I(inode);
742
743 /* if file already sparse don't bother setting sparse again */
744 if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && setsparse)
745 return true; /* already sparse */
746
747 if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && !setsparse)
748 return true; /* already not sparse */
749
750 /*
751 * Can't check for sparse support on share the usual way via the
752 * FS attribute info (FILE_SUPPORTS_SPARSE_FILES) on the share
753 * since Samba server doesn't set the flag on the share, yet
754 * supports the set sparse FSCTL and returns sparse correctly
755 * in the file attributes. If we fail setting sparse though we
756 * mark that server does not support sparse files for this share
757 * to avoid repeatedly sending the unsupported fsctl to server
758 * if the file is repeatedly extended.
759 */
760 if (tcon->broken_sparse_sup)
761 return false;
762
763 rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
764 cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
765 true /* is_fctl */, &setsparse, 1, NULL, NULL);
766 if (rc) {
767 tcon->broken_sparse_sup = true;
768 cifs_dbg(FYI, "set sparse rc = %d\n", rc);
769 return false;
770 }
771
772 if (setsparse)
773 cifsi->cifsAttrs |= FILE_ATTRIBUTE_SPARSE_FILE;
774 else
775 cifsi->cifsAttrs &= (~FILE_ATTRIBUTE_SPARSE_FILE);
776
777 return true;
778}
779
734static int 780static int
735smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, 781smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
736 struct cifsFileInfo *cfile, __u64 size, bool set_alloc) 782 struct cifsFileInfo *cfile, __u64 size, bool set_alloc)
737{ 783{
738 __le64 eof = cpu_to_le64(size); 784 __le64 eof = cpu_to_le64(size);
785 struct inode *inode;
786
787 /*
788 * If extending file more than one page make sparse. Many Linux fs
789 * make files sparse by default when extending via ftruncate
790 */
791 inode = cfile->dentry->d_inode;
792
793 if (!set_alloc && (size > inode->i_size + 8192)) {
794 __u8 set_sparse = 1;
795
796 /* whether set sparse succeeds or not, extend the file */
797 smb2_set_sparse(xid, tcon, cfile, inode, set_sparse);
798 }
799
739 return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, 800 return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
740 cfile->fid.volatile_fid, cfile->pid, &eof, false); 801 cfile->fid.volatile_fid, cfile->pid, &eof, false);
741} 802}
@@ -954,6 +1015,105 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
954 return rc; 1015 return rc;
955} 1016}
956 1017
1018static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
1019 loff_t offset, loff_t len, bool keep_size)
1020{
1021 struct inode *inode;
1022 struct cifsInodeInfo *cifsi;
1023 struct cifsFileInfo *cfile = file->private_data;
1024 struct file_zero_data_information fsctl_buf;
1025 long rc;
1026 unsigned int xid;
1027
1028 xid = get_xid();
1029
1030 inode = cfile->dentry->d_inode;
1031 cifsi = CIFS_I(inode);
1032
1033 /* if file not oplocked can't be sure whether asking to extend size */
1034 if (!CIFS_CACHE_READ(cifsi))
1035 if (keep_size == false)
1036 return -EOPNOTSUPP;
1037
1038 /*
1039 * Must check if file sparse since fallocate -z (zero range) assumes
1040 * non-sparse allocation
1041 */
1042 if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE))
1043 return -EOPNOTSUPP;
1044
1045 /*
1046 * need to make sure we are not asked to extend the file since the SMB3
1047 * fsctl does not change the file size. In the future we could change
1048 * this to zero the first part of the range then set the file size
1049 * which for a non sparse file would zero the newly extended range
1050 */
1051 if (keep_size == false)
1052 if (i_size_read(inode) < offset + len)
1053 return -EOPNOTSUPP;
1054
1055 cifs_dbg(FYI, "offset %lld len %lld", offset, len);
1056
1057 fsctl_buf.FileOffset = cpu_to_le64(offset);
1058 fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
1059
1060 rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
1061 cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
1062 true /* is_fctl */, (char *)&fsctl_buf,
1063 sizeof(struct file_zero_data_information), NULL, NULL);
1064 free_xid(xid);
1065 return rc;
1066}
1067
1068static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
1069 loff_t offset, loff_t len)
1070{
1071 struct inode *inode;
1072 struct cifsInodeInfo *cifsi;
1073 struct cifsFileInfo *cfile = file->private_data;
1074 struct file_zero_data_information fsctl_buf;
1075 long rc;
1076 unsigned int xid;
1077 __u8 set_sparse = 1;
1078
1079 xid = get_xid();
1080
1081 inode = cfile->dentry->d_inode;
1082 cifsi = CIFS_I(inode);
1083
1084 /* Need to make file sparse, if not already, before freeing range. */
1085 /* Consider adding equivalent for compressed since it could also work */
1086 if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse))
1087 return -EOPNOTSUPP;
1088
1089 cifs_dbg(FYI, "offset %lld len %lld", offset, len);
1090
1091 fsctl_buf.FileOffset = cpu_to_le64(offset);
1092 fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
1093
1094 rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
1095 cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
1096 true /* is_fctl */, (char *)&fsctl_buf,
1097 sizeof(struct file_zero_data_information), NULL, NULL);
1098 free_xid(xid);
1099 return rc;
1100}
1101
1102static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
1103 loff_t off, loff_t len)
1104{
1105 /* KEEP_SIZE already checked for by do_fallocate */
1106 if (mode & FALLOC_FL_PUNCH_HOLE)
1107 return smb3_punch_hole(file, tcon, off, len);
1108 else if (mode & FALLOC_FL_ZERO_RANGE) {
1109 if (mode & FALLOC_FL_KEEP_SIZE)
1110 return smb3_zero_range(file, tcon, off, len, true);
1111 return smb3_zero_range(file, tcon, off, len, false);
1112 }
1113
1114 return -EOPNOTSUPP;
1115}
1116
957static void 1117static void
958smb2_downgrade_oplock(struct TCP_Server_Info *server, 1118smb2_downgrade_oplock(struct TCP_Server_Info *server,
959 struct cifsInodeInfo *cinode, bool set_level2) 1119 struct cifsInodeInfo *cinode, bool set_level2)
@@ -1161,6 +1321,12 @@ smb2_wp_retry_size(struct inode *inode)
1161 SMB2_MAX_BUFFER_SIZE); 1321 SMB2_MAX_BUFFER_SIZE);
1162} 1322}
1163 1323
1324static bool
1325smb2_dir_needs_close(struct cifsFileInfo *cfile)
1326{
1327 return !cfile->invalidHandle;
1328}
1329
1164struct smb_version_operations smb20_operations = { 1330struct smb_version_operations smb20_operations = {
1165 .compare_fids = smb2_compare_fids, 1331 .compare_fids = smb2_compare_fids,
1166 .setup_request = smb2_setup_request, 1332 .setup_request = smb2_setup_request,
@@ -1236,6 +1402,7 @@ struct smb_version_operations smb20_operations = {
1236 .parse_lease_buf = smb2_parse_lease_buf, 1402 .parse_lease_buf = smb2_parse_lease_buf,
1237 .clone_range = smb2_clone_range, 1403 .clone_range = smb2_clone_range,
1238 .wp_retry_size = smb2_wp_retry_size, 1404 .wp_retry_size = smb2_wp_retry_size,
1405 .dir_needs_close = smb2_dir_needs_close,
1239}; 1406};
1240 1407
1241struct smb_version_operations smb21_operations = { 1408struct smb_version_operations smb21_operations = {
@@ -1313,6 +1480,7 @@ struct smb_version_operations smb21_operations = {
1313 .parse_lease_buf = smb2_parse_lease_buf, 1480 .parse_lease_buf = smb2_parse_lease_buf,
1314 .clone_range = smb2_clone_range, 1481 .clone_range = smb2_clone_range,
1315 .wp_retry_size = smb2_wp_retry_size, 1482 .wp_retry_size = smb2_wp_retry_size,
1483 .dir_needs_close = smb2_dir_needs_close,
1316}; 1484};
1317 1485
1318struct smb_version_operations smb30_operations = { 1486struct smb_version_operations smb30_operations = {
@@ -1393,6 +1561,8 @@ struct smb_version_operations smb30_operations = {
1393 .clone_range = smb2_clone_range, 1561 .clone_range = smb2_clone_range,
1394 .validate_negotiate = smb3_validate_negotiate, 1562 .validate_negotiate = smb3_validate_negotiate,
1395 .wp_retry_size = smb2_wp_retry_size, 1563 .wp_retry_size = smb2_wp_retry_size,
1564 .dir_needs_close = smb2_dir_needs_close,
1565 .fallocate = smb3_fallocate,
1396}; 1566};
1397 1567
1398struct smb_version_values smb20_values = { 1568struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 42ebc1a8be6c..74b3a6684383 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -530,7 +530,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
530 struct smb2_sess_setup_rsp *rsp = NULL; 530 struct smb2_sess_setup_rsp *rsp = NULL;
531 struct kvec iov[2]; 531 struct kvec iov[2];
532 int rc = 0; 532 int rc = 0;
533 int resp_buftype; 533 int resp_buftype = CIFS_NO_BUFFER;
534 __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ 534 __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
535 struct TCP_Server_Info *server = ses->server; 535 struct TCP_Server_Info *server = ses->server;
536 u16 blob_length = 0; 536 u16 blob_length = 0;
@@ -907,7 +907,8 @@ tcon_exit:
907tcon_error_exit: 907tcon_error_exit:
908 if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) { 908 if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) {
909 cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); 909 cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
910 tcon->bad_network_name = true; 910 if (tcon)
911 tcon->bad_network_name = true;
911 } 912 }
912 goto tcon_exit; 913 goto tcon_exit;
913} 914}
@@ -1224,7 +1225,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1224 1225
1225 cifs_dbg(FYI, "SMB2 IOCTL\n"); 1226 cifs_dbg(FYI, "SMB2 IOCTL\n");
1226 1227
1227 *out_data = NULL; 1228 if (out_data != NULL)
1229 *out_data = NULL;
1230
1228 /* zero out returned data len, in case of error */ 1231 /* zero out returned data len, in case of error */
1229 if (plen) 1232 if (plen)
1230 *plen = 0; 1233 *plen = 0;
@@ -1400,8 +1403,7 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
1400 rsp = (struct smb2_close_rsp *)iov[0].iov_base; 1403 rsp = (struct smb2_close_rsp *)iov[0].iov_base;
1401 1404
1402 if (rc != 0) { 1405 if (rc != 0) {
1403 if (tcon) 1406 cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE);
1404 cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE);
1405 goto close_exit; 1407 goto close_exit;
1406 } 1408 }
1407 1409
@@ -1530,7 +1532,7 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
1530{ 1532{
1531 return query_info(xid, tcon, persistent_fid, volatile_fid, 1533 return query_info(xid, tcon, persistent_fid, volatile_fid,
1532 FILE_ALL_INFORMATION, 1534 FILE_ALL_INFORMATION,
1533 sizeof(struct smb2_file_all_info) + MAX_NAME * 2, 1535 sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
1534 sizeof(struct smb2_file_all_info), data); 1536 sizeof(struct smb2_file_all_info), data);
1535} 1537}
1536 1538
@@ -2177,6 +2179,10 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
2177 rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base; 2179 rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
2178 2180
2179 if (rc) { 2181 if (rc) {
2182 if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) {
2183 srch_inf->endOfSearch = true;
2184 rc = 0;
2185 }
2180 cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); 2186 cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
2181 goto qdir_exit; 2187 goto qdir_exit;
2182 } 2188 }
@@ -2214,11 +2220,6 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
2214 else 2220 else
2215 cifs_dbg(VFS, "illegal search buffer type\n"); 2221 cifs_dbg(VFS, "illegal search buffer type\n");
2216 2222
2217 if (rsp->hdr.Status == STATUS_NO_MORE_FILES)
2218 srch_inf->endOfSearch = 1;
2219 else
2220 srch_inf->endOfSearch = 0;
2221
2222 return rc; 2223 return rc;
2223 2224
2224qdir_exit: 2225qdir_exit:
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 69f3595d3952..fbe486c285a9 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -573,6 +573,12 @@ struct copychunk_ioctl {
573 __u32 Reserved2; 573 __u32 Reserved2;
574} __packed; 574} __packed;
575 575
576/* this goes in the ioctl buffer when doing FSCTL_SET_ZERO_DATA */
577struct file_zero_data_information {
578 __le64 FileOffset;
579 __le64 BeyondFinalZero;
580} __packed;
581
576struct copychunk_ioctl_rsp { 582struct copychunk_ioctl_rsp {
577 __le32 ChunksWritten; 583 __le32 ChunksWritten;
578 __le32 ChunkBytesWritten; 584 __le32 ChunkBytesWritten;
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
index 0e538b5c9622..83efa59535be 100644
--- a/fs/cifs/smbfsctl.h
+++ b/fs/cifs/smbfsctl.h
@@ -63,7 +63,7 @@
63#define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */ 63#define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */
64#define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */ 64#define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */
65#define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */ 65#define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */
66#define FSCTL_SET_ZERO_DATA 0x000900C8 /* BB add struct */ 66#define FSCTL_SET_ZERO_DATA 0x000980C8
67#define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */ 67#define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */
68#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */ 68#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */
69#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */ 69#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */
diff --git a/fs/dcache.c b/fs/dcache.c
index d30ce699ae4b..7a5b51440afa 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -106,8 +106,7 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
106 unsigned int hash) 106 unsigned int hash)
107{ 107{
108 hash += (unsigned long) parent / L1_CACHE_BYTES; 108 hash += (unsigned long) parent / L1_CACHE_BYTES;
109 hash = hash + (hash >> d_hash_shift); 109 return dentry_hashtable + hash_32(hash, d_hash_shift);
110 return dentry_hashtable + (hash & d_hash_mask);
111} 110}
112 111
113/* Statistics gathering. */ 112/* Statistics gathering. */
@@ -2656,6 +2655,12 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2656 dentry->d_parent = dentry; 2655 dentry->d_parent = dentry;
2657 list_del_init(&dentry->d_u.d_child); 2656 list_del_init(&dentry->d_u.d_child);
2658 anon->d_parent = dparent; 2657 anon->d_parent = dparent;
2658 if (likely(!d_unhashed(anon))) {
2659 hlist_bl_lock(&anon->d_sb->s_anon);
2660 __hlist_bl_del(&anon->d_hash);
2661 anon->d_hash.pprev = NULL;
2662 hlist_bl_unlock(&anon->d_sb->s_anon);
2663 }
2659 list_move(&anon->d_u.d_child, &dparent->d_subdirs); 2664 list_move(&anon->d_u.d_child, &dparent->d_subdirs);
2660 2665
2661 write_seqcount_end(&dentry->d_seq); 2666 write_seqcount_end(&dentry->d_seq);
@@ -2714,7 +2719,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
2714 write_seqlock(&rename_lock); 2719 write_seqlock(&rename_lock);
2715 __d_materialise_dentry(dentry, new); 2720 __d_materialise_dentry(dentry, new);
2716 write_sequnlock(&rename_lock); 2721 write_sequnlock(&rename_lock);
2717 __d_drop(new);
2718 _d_rehash(new); 2722 _d_rehash(new);
2719 spin_unlock(&new->d_lock); 2723 spin_unlock(&new->d_lock);
2720 spin_unlock(&inode->i_lock); 2724 spin_unlock(&inode->i_lock);
@@ -2778,7 +2782,6 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2778 * could splice into our tree? */ 2782 * could splice into our tree? */
2779 __d_materialise_dentry(dentry, alias); 2783 __d_materialise_dentry(dentry, alias);
2780 write_sequnlock(&rename_lock); 2784 write_sequnlock(&rename_lock);
2781 __d_drop(alias);
2782 goto found; 2785 goto found;
2783 } else { 2786 } else {
2784 /* Nope, but we must(!) avoid directory 2787 /* Nope, but we must(!) avoid directory
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b10b48c2a7af..7bcfff900f05 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1852,7 +1852,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1852 goto error_tgt_fput; 1852 goto error_tgt_fput;
1853 1853
1854 /* Check if EPOLLWAKEUP is allowed */ 1854 /* Check if EPOLLWAKEUP is allowed */
1855 ep_take_care_of_epollwakeup(&epds); 1855 if (ep_op_has_event(op))
1856 ep_take_care_of_epollwakeup(&epds);
1856 1857
1857 /* 1858 /*
1858 * We have to check that the file structure underneath the file descriptor 1859 * We have to check that the file structure underneath the file descriptor
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 08cdfe5461e3..622e88249024 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2828,8 +2828,9 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2828 */ 2828 */
2829 overhead += ngroups * (2 + sbi->s_itb_per_group); 2829 overhead += ngroups * (2 + sbi->s_itb_per_group);
2830 2830
2831 /* Add the journal blocks as well */ 2831 /* Add the internal journal blocks as well */
2832 overhead += sbi->s_journal->j_maxlen; 2832 if (sbi->s_journal && !sbi->journal_bdev)
2833 overhead += sbi->s_journal->j_maxlen;
2833 2834
2834 sbi->s_overhead_last = overhead; 2835 sbi->s_overhead_last = overhead;
2835 smp_wmb(); 2836 smp_wmb();
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5b19760b1de5..b0c225cdb52c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1825,7 +1825,7 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
1825/* 1825/*
1826 * Special error return code only used by dx_probe() and its callers. 1826 * Special error return code only used by dx_probe() and its callers.
1827 */ 1827 */
1828#define ERR_BAD_DX_DIR -75000 1828#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
1829 1829
1830/* 1830/*
1831 * Timeout and state flag for lazy initialization inode thread. 1831 * Timeout and state flag for lazy initialization inode thread.
@@ -2454,6 +2454,22 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2454 up_write(&EXT4_I(inode)->i_data_sem); 2454 up_write(&EXT4_I(inode)->i_data_sem);
2455} 2455}
2456 2456
2457/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */
2458static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
2459{
2460 int changed = 0;
2461
2462 if (newsize > inode->i_size) {
2463 i_size_write(inode, newsize);
2464 changed = 1;
2465 }
2466 if (newsize > EXT4_I(inode)->i_disksize) {
2467 ext4_update_i_disksize(inode, newsize);
2468 changed |= 2;
2469 }
2470 return changed;
2471}
2472
2457struct ext4_group_info { 2473struct ext4_group_info {
2458 unsigned long bb_state; 2474 unsigned long bb_state;
2459 struct rb_root bb_free_root; 2475 struct rb_root bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 76c2df382b7d..74292a71b384 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4665,7 +4665,8 @@ retry:
4665} 4665}
4666 4666
4667static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, 4667static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4668 ext4_lblk_t len, int flags, int mode) 4668 ext4_lblk_t len, loff_t new_size,
4669 int flags, int mode)
4669{ 4670{
4670 struct inode *inode = file_inode(file); 4671 struct inode *inode = file_inode(file);
4671 handle_t *handle; 4672 handle_t *handle;
@@ -4674,8 +4675,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4674 int retries = 0; 4675 int retries = 0;
4675 struct ext4_map_blocks map; 4676 struct ext4_map_blocks map;
4676 unsigned int credits; 4677 unsigned int credits;
4678 loff_t epos;
4677 4679
4678 map.m_lblk = offset; 4680 map.m_lblk = offset;
4681 map.m_len = len;
4679 /* 4682 /*
4680 * Don't normalize the request if it can fit in one extent so 4683 * Don't normalize the request if it can fit in one extent so
4681 * that it doesn't get unnecessarily split into multiple 4684 * that it doesn't get unnecessarily split into multiple
@@ -4690,9 +4693,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4690 credits = ext4_chunk_trans_blocks(inode, len); 4693 credits = ext4_chunk_trans_blocks(inode, len);
4691 4694
4692retry: 4695retry:
4693 while (ret >= 0 && ret < len) { 4696 while (ret >= 0 && len) {
4694 map.m_lblk = map.m_lblk + ret;
4695 map.m_len = len = len - ret;
4696 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, 4697 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4697 credits); 4698 credits);
4698 if (IS_ERR(handle)) { 4699 if (IS_ERR(handle)) {
@@ -4709,6 +4710,21 @@ retry:
4709 ret2 = ext4_journal_stop(handle); 4710 ret2 = ext4_journal_stop(handle);
4710 break; 4711 break;
4711 } 4712 }
4713 map.m_lblk += ret;
4714 map.m_len = len = len - ret;
4715 epos = (loff_t)map.m_lblk << inode->i_blkbits;
4716 inode->i_ctime = ext4_current_time(inode);
4717 if (new_size) {
4718 if (epos > new_size)
4719 epos = new_size;
4720 if (ext4_update_inode_size(inode, epos) & 0x1)
4721 inode->i_mtime = inode->i_ctime;
4722 } else {
4723 if (epos > inode->i_size)
4724 ext4_set_inode_flag(inode,
4725 EXT4_INODE_EOFBLOCKS);
4726 }
4727 ext4_mark_inode_dirty(handle, inode);
4712 ret2 = ext4_journal_stop(handle); 4728 ret2 = ext4_journal_stop(handle);
4713 if (ret2) 4729 if (ret2)
4714 break; 4730 break;
@@ -4731,7 +4747,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4731 loff_t new_size = 0; 4747 loff_t new_size = 0;
4732 int ret = 0; 4748 int ret = 0;
4733 int flags; 4749 int flags;
4734 int partial; 4750 int credits;
4751 int partial_begin, partial_end;
4735 loff_t start, end; 4752 loff_t start, end;
4736 ext4_lblk_t lblk; 4753 ext4_lblk_t lblk;
4737 struct address_space *mapping = inode->i_mapping; 4754 struct address_space *mapping = inode->i_mapping;
@@ -4771,7 +4788,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4771 4788
4772 if (start < offset || end > offset + len) 4789 if (start < offset || end > offset + len)
4773 return -EINVAL; 4790 return -EINVAL;
4774 partial = (offset + len) & ((1 << blkbits) - 1); 4791 partial_begin = offset & ((1 << blkbits) - 1);
4792 partial_end = (offset + len) & ((1 << blkbits) - 1);
4775 4793
4776 lblk = start >> blkbits; 4794 lblk = start >> blkbits;
4777 max_blocks = (end >> blkbits); 4795 max_blocks = (end >> blkbits);
@@ -4805,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4805 * If we have a partial block after EOF we have to allocate 4823 * If we have a partial block after EOF we have to allocate
4806 * the entire block. 4824 * the entire block.
4807 */ 4825 */
4808 if (partial) 4826 if (partial_end)
4809 max_blocks += 1; 4827 max_blocks += 1;
4810 } 4828 }
4811 4829
@@ -4813,6 +4831,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4813 4831
4814 /* Now release the pages and zero block aligned part of pages*/ 4832 /* Now release the pages and zero block aligned part of pages*/
4815 truncate_pagecache_range(inode, start, end - 1); 4833 truncate_pagecache_range(inode, start, end - 1);
4834 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4816 4835
4817 /* Wait all existing dio workers, newcomers will block on i_mutex */ 4836 /* Wait all existing dio workers, newcomers will block on i_mutex */
4818 ext4_inode_block_unlocked_dio(inode); 4837 ext4_inode_block_unlocked_dio(inode);
@@ -4825,13 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4825 if (ret) 4844 if (ret)
4826 goto out_dio; 4845 goto out_dio;
4827 4846
4828 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, 4847 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4829 mode); 4848 flags, mode);
4830 if (ret) 4849 if (ret)
4831 goto out_dio; 4850 goto out_dio;
4832 } 4851 }
4852 if (!partial_begin && !partial_end)
4853 goto out_dio;
4833 4854
4834 handle = ext4_journal_start(inode, EXT4_HT_MISC, 4); 4855 /*
4856 * In worst case we have to writeout two nonadjacent unwritten
4857 * blocks and update the inode
4858 */
4859 credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
4860 if (ext4_should_journal_data(inode))
4861 credits += 2;
4862 handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
4835 if (IS_ERR(handle)) { 4863 if (IS_ERR(handle)) {
4836 ret = PTR_ERR(handle); 4864 ret = PTR_ERR(handle);
4837 ext4_std_error(inode->i_sb, ret); 4865 ext4_std_error(inode->i_sb, ret);
@@ -4839,12 +4867,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4839 } 4867 }
4840 4868
4841 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4869 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4842
4843 if (new_size) { 4870 if (new_size) {
4844 if (new_size > i_size_read(inode)) 4871 ext4_update_inode_size(inode, new_size);
4845 i_size_write(inode, new_size);
4846 if (new_size > EXT4_I(inode)->i_disksize)
4847 ext4_update_i_disksize(inode, new_size);
4848 } else { 4872 } else {
4849 /* 4873 /*
4850 * Mark that we allocate beyond EOF so the subsequent truncate 4874 * Mark that we allocate beyond EOF so the subsequent truncate
@@ -4853,7 +4877,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4853 if ((offset + len) > i_size_read(inode)) 4877 if ((offset + len) > i_size_read(inode))
4854 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 4878 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4855 } 4879 }
4856
4857 ext4_mark_inode_dirty(handle, inode); 4880 ext4_mark_inode_dirty(handle, inode);
4858 4881
4859 /* Zero out partial block at the edges of the range */ 4882 /* Zero out partial block at the edges of the range */
@@ -4880,13 +4903,11 @@ out_mutex:
4880long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 4903long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4881{ 4904{
4882 struct inode *inode = file_inode(file); 4905 struct inode *inode = file_inode(file);
4883 handle_t *handle;
4884 loff_t new_size = 0; 4906 loff_t new_size = 0;
4885 unsigned int max_blocks; 4907 unsigned int max_blocks;
4886 int ret = 0; 4908 int ret = 0;
4887 int flags; 4909 int flags;
4888 ext4_lblk_t lblk; 4910 ext4_lblk_t lblk;
4889 struct timespec tv;
4890 unsigned int blkbits = inode->i_blkbits; 4911 unsigned int blkbits = inode->i_blkbits;
4891 4912
4892 /* Return error if mode is not supported */ 4913 /* Return error if mode is not supported */
@@ -4937,36 +4958,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4937 goto out; 4958 goto out;
4938 } 4959 }
4939 4960
4940 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); 4961 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4962 flags, mode);
4941 if (ret) 4963 if (ret)
4942 goto out; 4964 goto out;
4943 4965
4944 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 4966 if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
4945 if (IS_ERR(handle)) 4967 ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
4946 goto out; 4968 EXT4_I(inode)->i_sync_tid);
4947
4948 tv = inode->i_ctime = ext4_current_time(inode);
4949
4950 if (new_size) {
4951 if (new_size > i_size_read(inode)) {
4952 i_size_write(inode, new_size);
4953 inode->i_mtime = tv;
4954 }
4955 if (new_size > EXT4_I(inode)->i_disksize)
4956 ext4_update_i_disksize(inode, new_size);
4957 } else {
4958 /*
4959 * Mark that we allocate beyond EOF so the subsequent truncate
4960 * can proceed even if the new size is the same as i_size.
4961 */
4962 if ((offset + len) > i_size_read(inode))
4963 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4964 } 4969 }
4965 ext4_mark_inode_dirty(handle, inode);
4966 if (file->f_flags & O_SYNC)
4967 ext4_handle_sync(handle);
4968
4969 ext4_journal_stop(handle);
4970out: 4970out:
4971 mutex_unlock(&inode->i_mutex); 4971 mutex_unlock(&inode->i_mutex);
4972 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); 4972 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 367a60c07cf0..3aa26e9117c4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1055,27 +1055,11 @@ static int ext4_write_end(struct file *file,
1055 } else 1055 } else
1056 copied = block_write_end(file, mapping, pos, 1056 copied = block_write_end(file, mapping, pos,
1057 len, copied, page, fsdata); 1057 len, copied, page, fsdata);
1058
1059 /* 1058 /*
1060 * No need to use i_size_read() here, the i_size 1059 * it's important to update i_size while still holding page lock:
1061 * cannot change under us because we hole i_mutex.
1062 *
1063 * But it's important to update i_size while still holding page lock:
1064 * page writeout could otherwise come in and zero beyond i_size. 1060 * page writeout could otherwise come in and zero beyond i_size.
1065 */ 1061 */
1066 if (pos + copied > inode->i_size) { 1062 i_size_changed = ext4_update_inode_size(inode, pos + copied);
1067 i_size_write(inode, pos + copied);
1068 i_size_changed = 1;
1069 }
1070
1071 if (pos + copied > EXT4_I(inode)->i_disksize) {
1072 /* We need to mark inode dirty even if
1073 * new_i_size is less that inode->i_size
1074 * but greater than i_disksize. (hint delalloc)
1075 */
1076 ext4_update_i_disksize(inode, (pos + copied));
1077 i_size_changed = 1;
1078 }
1079 unlock_page(page); 1063 unlock_page(page);
1080 page_cache_release(page); 1064 page_cache_release(page);
1081 1065
@@ -1123,7 +1107,7 @@ static int ext4_journalled_write_end(struct file *file,
1123 int ret = 0, ret2; 1107 int ret = 0, ret2;
1124 int partial = 0; 1108 int partial = 0;
1125 unsigned from, to; 1109 unsigned from, to;
1126 loff_t new_i_size; 1110 int size_changed = 0;
1127 1111
1128 trace_ext4_journalled_write_end(inode, pos, len, copied); 1112 trace_ext4_journalled_write_end(inode, pos, len, copied);
1129 from = pos & (PAGE_CACHE_SIZE - 1); 1113 from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1146,20 +1130,18 @@ static int ext4_journalled_write_end(struct file *file,
1146 if (!partial) 1130 if (!partial)
1147 SetPageUptodate(page); 1131 SetPageUptodate(page);
1148 } 1132 }
1149 new_i_size = pos + copied; 1133 size_changed = ext4_update_inode_size(inode, pos + copied);
1150 if (new_i_size > inode->i_size)
1151 i_size_write(inode, pos+copied);
1152 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 1134 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1153 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; 1135 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1154 if (new_i_size > EXT4_I(inode)->i_disksize) { 1136 unlock_page(page);
1155 ext4_update_i_disksize(inode, new_i_size); 1137 page_cache_release(page);
1138
1139 if (size_changed) {
1156 ret2 = ext4_mark_inode_dirty(handle, inode); 1140 ret2 = ext4_mark_inode_dirty(handle, inode);
1157 if (!ret) 1141 if (!ret)
1158 ret = ret2; 1142 ret = ret2;
1159 } 1143 }
1160 1144
1161 unlock_page(page);
1162 page_cache_release(page);
1163 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 1145 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1164 /* if we have allocated more blocks and copied 1146 /* if we have allocated more blocks and copied
1165 * less. We will have blocks allocated outside 1147 * less. We will have blocks allocated outside
@@ -2095,6 +2077,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2095 struct ext4_map_blocks *map = &mpd->map; 2077 struct ext4_map_blocks *map = &mpd->map;
2096 int err; 2078 int err;
2097 loff_t disksize; 2079 loff_t disksize;
2080 int progress = 0;
2098 2081
2099 mpd->io_submit.io_end->offset = 2082 mpd->io_submit.io_end->offset =
2100 ((loff_t)map->m_lblk) << inode->i_blkbits; 2083 ((loff_t)map->m_lblk) << inode->i_blkbits;
@@ -2111,8 +2094,11 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2111 * is non-zero, a commit should free up blocks. 2094 * is non-zero, a commit should free up blocks.
2112 */ 2095 */
2113 if ((err == -ENOMEM) || 2096 if ((err == -ENOMEM) ||
2114 (err == -ENOSPC && ext4_count_free_clusters(sb))) 2097 (err == -ENOSPC && ext4_count_free_clusters(sb))) {
2098 if (progress)
2099 goto update_disksize;
2115 return err; 2100 return err;
2101 }
2116 ext4_msg(sb, KERN_CRIT, 2102 ext4_msg(sb, KERN_CRIT,
2117 "Delayed block allocation failed for " 2103 "Delayed block allocation failed for "
2118 "inode %lu at logical offset %llu with" 2104 "inode %lu at logical offset %llu with"
@@ -2129,15 +2115,17 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2129 *give_up_on_write = true; 2115 *give_up_on_write = true;
2130 return err; 2116 return err;
2131 } 2117 }
2118 progress = 1;
2132 /* 2119 /*
2133 * Update buffer state, submit mapped pages, and get us new 2120 * Update buffer state, submit mapped pages, and get us new
2134 * extent to map 2121 * extent to map
2135 */ 2122 */
2136 err = mpage_map_and_submit_buffers(mpd); 2123 err = mpage_map_and_submit_buffers(mpd);
2137 if (err < 0) 2124 if (err < 0)
2138 return err; 2125 goto update_disksize;
2139 } while (map->m_len); 2126 } while (map->m_len);
2140 2127
2128update_disksize:
2141 /* 2129 /*
2142 * Update on-disk size after IO is submitted. Races with 2130 * Update on-disk size after IO is submitted. Races with
2143 * truncate are avoided by checking i_size under i_data_sem. 2131 * truncate are avoided by checking i_size under i_data_sem.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 956027711faf..8b0f9ef517d6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1412,6 +1412,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1412 int last = first + count - 1; 1412 int last = first + count - 1;
1413 struct super_block *sb = e4b->bd_sb; 1413 struct super_block *sb = e4b->bd_sb;
1414 1414
1415 if (WARN_ON(count == 0))
1416 return;
1415 BUG_ON(last >= (sb->s_blocksize << 3)); 1417 BUG_ON(last >= (sb->s_blocksize << 3));
1416 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); 1418 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1417 /* Don't bother if the block group is corrupt. */ 1419 /* Don't bother if the block group is corrupt. */
@@ -3221,6 +3223,8 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3221 int err; 3223 int err;
3222 3224
3223 if (pa == NULL) { 3225 if (pa == NULL) {
3226 if (ac->ac_f_ex.fe_len == 0)
3227 return;
3224 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b); 3228 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3225 if (err) { 3229 if (err) {
3226 /* 3230 /*
@@ -3235,6 +3239,7 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3235 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start, 3239 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3236 ac->ac_f_ex.fe_len); 3240 ac->ac_f_ex.fe_len);
3237 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group); 3241 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3242 ext4_mb_unload_buddy(&e4b);
3238 return; 3243 return;
3239 } 3244 }
3240 if (pa->pa_type == MB_INODE_PA) 3245 if (pa->pa_type == MB_INODE_PA)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b147a67baa0d..603e4ebbd0ac 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1227,7 +1227,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1227 buffer */ 1227 buffer */
1228 int num = 0; 1228 int num = 0;
1229 ext4_lblk_t nblocks; 1229 ext4_lblk_t nblocks;
1230 int i, err; 1230 int i, err = 0;
1231 int namelen; 1231 int namelen;
1232 1232
1233 *res_dir = NULL; 1233 *res_dir = NULL;
@@ -1264,7 +1264,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1264 * return. Otherwise, fall back to doing a search the 1264 * return. Otherwise, fall back to doing a search the
1265 * old fashioned way. 1265 * old fashioned way.
1266 */ 1266 */
1267 if (bh || (err != ERR_BAD_DX_DIR)) 1267 if (err == -ENOENT)
1268 return NULL;
1269 if (err && err != ERR_BAD_DX_DIR)
1270 return ERR_PTR(err);
1271 if (bh)
1268 return bh; 1272 return bh;
1269 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " 1273 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1270 "falling back\n")); 1274 "falling back\n"));
@@ -1295,6 +1299,11 @@ restart:
1295 } 1299 }
1296 num++; 1300 num++;
1297 bh = ext4_getblk(NULL, dir, b++, 0, &err); 1301 bh = ext4_getblk(NULL, dir, b++, 0, &err);
1302 if (unlikely(err)) {
1303 if (ra_max == 0)
1304 return ERR_PTR(err);
1305 break;
1306 }
1298 bh_use[ra_max] = bh; 1307 bh_use[ra_max] = bh;
1299 if (bh) 1308 if (bh)
1300 ll_rw_block(READ | REQ_META | REQ_PRIO, 1309 ll_rw_block(READ | REQ_META | REQ_PRIO,
@@ -1417,6 +1426,8 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
1417 return ERR_PTR(-ENAMETOOLONG); 1426 return ERR_PTR(-ENAMETOOLONG);
1418 1427
1419 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 1428 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
1429 if (IS_ERR(bh))
1430 return (struct dentry *) bh;
1420 inode = NULL; 1431 inode = NULL;
1421 if (bh) { 1432 if (bh) {
1422 __u32 ino = le32_to_cpu(de->inode); 1433 __u32 ino = le32_to_cpu(de->inode);
@@ -1450,6 +1461,8 @@ struct dentry *ext4_get_parent(struct dentry *child)
1450 struct buffer_head *bh; 1461 struct buffer_head *bh;
1451 1462
1452 bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL); 1463 bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
1464 if (IS_ERR(bh))
1465 return (struct dentry *) bh;
1453 if (!bh) 1466 if (!bh)
1454 return ERR_PTR(-ENOENT); 1467 return ERR_PTR(-ENOENT);
1455 ino = le32_to_cpu(de->inode); 1468 ino = le32_to_cpu(de->inode);
@@ -2727,6 +2740,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2727 2740
2728 retval = -ENOENT; 2741 retval = -ENOENT;
2729 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 2742 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2743 if (IS_ERR(bh))
2744 return PTR_ERR(bh);
2730 if (!bh) 2745 if (!bh)
2731 goto end_rmdir; 2746 goto end_rmdir;
2732 2747
@@ -2794,6 +2809,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2794 2809
2795 retval = -ENOENT; 2810 retval = -ENOENT;
2796 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 2811 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2812 if (IS_ERR(bh))
2813 return PTR_ERR(bh);
2797 if (!bh) 2814 if (!bh)
2798 goto end_unlink; 2815 goto end_unlink;
2799 2816
@@ -3121,6 +3138,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3121 struct ext4_dir_entry_2 *de; 3138 struct ext4_dir_entry_2 *de;
3122 3139
3123 bh = ext4_find_entry(dir, d_name, &de, NULL); 3140 bh = ext4_find_entry(dir, d_name, &de, NULL);
3141 if (IS_ERR(bh))
3142 return PTR_ERR(bh);
3124 if (bh) { 3143 if (bh) {
3125 retval = ext4_delete_entry(handle, dir, de, bh); 3144 retval = ext4_delete_entry(handle, dir, de, bh);
3126 brelse(bh); 3145 brelse(bh);
@@ -3128,7 +3147,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3128 return retval; 3147 return retval;
3129} 3148}
3130 3149
3131static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) 3150static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
3151 int force_reread)
3132{ 3152{
3133 int retval; 3153 int retval;
3134 /* 3154 /*
@@ -3140,7 +3160,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent)
3140 if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino || 3160 if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
3141 ent->de->name_len != ent->dentry->d_name.len || 3161 ent->de->name_len != ent->dentry->d_name.len ||
3142 strncmp(ent->de->name, ent->dentry->d_name.name, 3162 strncmp(ent->de->name, ent->dentry->d_name.name,
3143 ent->de->name_len)) { 3163 ent->de->name_len) ||
3164 force_reread) {
3144 retval = ext4_find_delete_entry(handle, ent->dir, 3165 retval = ext4_find_delete_entry(handle, ent->dir,
3145 &ent->dentry->d_name); 3166 &ent->dentry->d_name);
3146 } else { 3167 } else {
@@ -3191,6 +3212,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3191 .dentry = new_dentry, 3212 .dentry = new_dentry,
3192 .inode = new_dentry->d_inode, 3213 .inode = new_dentry->d_inode,
3193 }; 3214 };
3215 int force_reread;
3194 int retval; 3216 int retval;
3195 3217
3196 dquot_initialize(old.dir); 3218 dquot_initialize(old.dir);
@@ -3202,6 +3224,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3202 dquot_initialize(new.inode); 3224 dquot_initialize(new.inode);
3203 3225
3204 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); 3226 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3227 if (IS_ERR(old.bh))
3228 return PTR_ERR(old.bh);
3205 /* 3229 /*
3206 * Check for inode number is _not_ due to possible IO errors. 3230 * Check for inode number is _not_ due to possible IO errors.
3207 * We might rmdir the source, keep it as pwd of some process 3231 * We might rmdir the source, keep it as pwd of some process
@@ -3214,6 +3238,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3214 3238
3215 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, 3239 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3216 &new.de, &new.inlined); 3240 &new.de, &new.inlined);
3241 if (IS_ERR(new.bh)) {
3242 retval = PTR_ERR(new.bh);
3243 new.bh = NULL;
3244 goto end_rename;
3245 }
3217 if (new.bh) { 3246 if (new.bh) {
3218 if (!new.inode) { 3247 if (!new.inode) {
3219 brelse(new.bh); 3248 brelse(new.bh);
@@ -3246,6 +3275,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3246 if (retval) 3275 if (retval)
3247 goto end_rename; 3276 goto end_rename;
3248 } 3277 }
3278 /*
3279 * If we're renaming a file within an inline_data dir and adding or
3280 * setting the new dirent causes a conversion from inline_data to
3281 * extents/blockmap, we need to force the dirent delete code to
3282 * re-read the directory, or else we end up trying to delete a dirent
3283 * from what is now the extent tree root (or a block map).
3284 */
3285 force_reread = (new.dir->i_ino == old.dir->i_ino &&
3286 ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
3249 if (!new.bh) { 3287 if (!new.bh) {
3250 retval = ext4_add_entry(handle, new.dentry, old.inode); 3288 retval = ext4_add_entry(handle, new.dentry, old.inode);
3251 if (retval) 3289 if (retval)
@@ -3256,6 +3294,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3256 if (retval) 3294 if (retval)
3257 goto end_rename; 3295 goto end_rename;
3258 } 3296 }
3297 if (force_reread)
3298 force_reread = !ext4_test_inode_flag(new.dir,
3299 EXT4_INODE_INLINE_DATA);
3259 3300
3260 /* 3301 /*
3261 * Like most other Unix systems, set the ctime for inodes on a 3302 * Like most other Unix systems, set the ctime for inodes on a
@@ -3267,7 +3308,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3267 /* 3308 /*
3268 * ok, that's it 3309 * ok, that's it
3269 */ 3310 */
3270 ext4_rename_delete(handle, &old); 3311 ext4_rename_delete(handle, &old, force_reread);
3271 3312
3272 if (new.inode) { 3313 if (new.inode) {
3273 ext4_dec_count(handle, new.inode); 3314 ext4_dec_count(handle, new.inode);
@@ -3330,6 +3371,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3330 3371
3331 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, 3372 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
3332 &old.de, &old.inlined); 3373 &old.de, &old.inlined);
3374 if (IS_ERR(old.bh))
3375 return PTR_ERR(old.bh);
3333 /* 3376 /*
3334 * Check for inode number is _not_ due to possible IO errors. 3377 * Check for inode number is _not_ due to possible IO errors.
3335 * We might rmdir the source, keep it as pwd of some process 3378 * We might rmdir the source, keep it as pwd of some process
@@ -3342,6 +3385,11 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3342 3385
3343 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, 3386 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3344 &new.de, &new.inlined); 3387 &new.de, &new.inlined);
3388 if (IS_ERR(new.bh)) {
3389 retval = PTR_ERR(new.bh);
3390 new.bh = NULL;
3391 goto end_rename;
3392 }
3345 3393
3346 /* RENAME_EXCHANGE case: old *and* new must both exist */ 3394 /* RENAME_EXCHANGE case: old *and* new must both exist */
3347 if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino) 3395 if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index bb0e80f03e2e..1e43b905ff98 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -575,6 +575,7 @@ handle_bb:
575 bh = bclean(handle, sb, block); 575 bh = bclean(handle, sb, block);
576 if (IS_ERR(bh)) { 576 if (IS_ERR(bh)) {
577 err = PTR_ERR(bh); 577 err = PTR_ERR(bh);
578 bh = NULL;
578 goto out; 579 goto out;
579 } 580 }
580 overhead = ext4_group_overhead_blocks(sb, group); 581 overhead = ext4_group_overhead_blocks(sb, group);
@@ -603,6 +604,7 @@ handle_ib:
603 bh = bclean(handle, sb, block); 604 bh = bclean(handle, sb, block);
604 if (IS_ERR(bh)) { 605 if (IS_ERR(bh)) {
605 err = PTR_ERR(bh); 606 err = PTR_ERR(bh);
607 bh = NULL;
606 goto out; 608 goto out;
607 } 609 }
608 610
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32b43ad154b9..0b28b36e7915 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3181 3181
3182 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3182 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3183 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 3183 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3184 /* journal checksum v2 */ 3184 /* journal checksum v3 */
3185 compat = 0; 3185 compat = 0;
3186 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; 3186 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
3187 } else { 3187 } else {
3188 /* journal checksum v1 */ 3188 /* journal checksum v1 */
3189 compat = JBD2_FEATURE_COMPAT_CHECKSUM; 3189 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
@@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3205 jbd2_journal_clear_features(sbi->s_journal, 3205 jbd2_journal_clear_features(sbi->s_journal,
3206 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3206 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3207 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | 3207 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3208 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3208 JBD2_FEATURE_INCOMPAT_CSUM_V2); 3209 JBD2_FEATURE_INCOMPAT_CSUM_V2);
3209 } 3210 }
3210 3211
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 214fe1054fce..736a348509f7 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -23,7 +23,7 @@ config F2FS_STAT_FS
23 mounted as f2fs. Each file shows the whole f2fs information. 23 mounted as f2fs. Each file shows the whole f2fs information.
24 24
25 /sys/kernel/debug/f2fs/status includes: 25 /sys/kernel/debug/f2fs/status includes:
26 - major file system information managed by f2fs currently 26 - major filesystem information managed by f2fs currently
27 - average SIT information about whole segments 27 - average SIT information about whole segments
28 - current memory footprint consumed by f2fs. 28 - current memory footprint consumed by f2fs.
29 29
@@ -68,6 +68,6 @@ config F2FS_CHECK_FS
68 bool "F2FS consistency checking feature" 68 bool "F2FS consistency checking feature"
69 depends on F2FS_FS 69 depends on F2FS_FS
70 help 70 help
71 Enables BUG_ONs which check the file system consistency in runtime. 71 Enables BUG_ONs which check the filesystem consistency in runtime.
72 72
73 If you want to improve the performance, say N. 73 If you want to improve the performance, say N.
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6aeed5bada52..ec3b7a5381fa 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -160,14 +160,11 @@ static int f2fs_write_meta_page(struct page *page,
160 goto redirty_out; 160 goto redirty_out;
161 if (wbc->for_reclaim) 161 if (wbc->for_reclaim)
162 goto redirty_out; 162 goto redirty_out;
163 163 if (unlikely(f2fs_cp_error(sbi)))
164 /* Should not write any meta pages, if any IO error was occurred */ 164 goto redirty_out;
165 if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
166 goto no_write;
167 165
168 f2fs_wait_on_page_writeback(page, META); 166 f2fs_wait_on_page_writeback(page, META);
169 write_meta_page(sbi, page); 167 write_meta_page(sbi, page);
170no_write:
171 dec_page_count(sbi, F2FS_DIRTY_META); 168 dec_page_count(sbi, F2FS_DIRTY_META);
172 unlock_page(page); 169 unlock_page(page);
173 return 0; 170 return 0;
@@ -348,7 +345,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
348 return e ? true : false; 345 return e ? true : false;
349} 346}
350 347
351static void release_dirty_inode(struct f2fs_sb_info *sbi) 348void release_dirty_inode(struct f2fs_sb_info *sbi)
352{ 349{
353 struct ino_entry *e, *tmp; 350 struct ino_entry *e, *tmp;
354 int i; 351 int i;
@@ -446,8 +443,8 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
446 struct f2fs_orphan_block *orphan_blk = NULL; 443 struct f2fs_orphan_block *orphan_blk = NULL;
447 unsigned int nentries = 0; 444 unsigned int nentries = 0;
448 unsigned short index; 445 unsigned short index;
449 unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + 446 unsigned short orphan_blocks =
450 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); 447 (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans);
451 struct page *page = NULL; 448 struct page *page = NULL;
452 struct ino_entry *orphan = NULL; 449 struct ino_entry *orphan = NULL;
453 450
@@ -737,7 +734,7 @@ retry:
737/* 734/*
738 * Freeze all the FS-operations for checkpoint. 735 * Freeze all the FS-operations for checkpoint.
739 */ 736 */
740static void block_operations(struct f2fs_sb_info *sbi) 737static int block_operations(struct f2fs_sb_info *sbi)
741{ 738{
742 struct writeback_control wbc = { 739 struct writeback_control wbc = {
743 .sync_mode = WB_SYNC_ALL, 740 .sync_mode = WB_SYNC_ALL,
@@ -745,6 +742,7 @@ static void block_operations(struct f2fs_sb_info *sbi)
745 .for_reclaim = 0, 742 .for_reclaim = 0,
746 }; 743 };
747 struct blk_plug plug; 744 struct blk_plug plug;
745 int err = 0;
748 746
749 blk_start_plug(&plug); 747 blk_start_plug(&plug);
750 748
@@ -754,11 +752,15 @@ retry_flush_dents:
754 if (get_pages(sbi, F2FS_DIRTY_DENTS)) { 752 if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
755 f2fs_unlock_all(sbi); 753 f2fs_unlock_all(sbi);
756 sync_dirty_dir_inodes(sbi); 754 sync_dirty_dir_inodes(sbi);
755 if (unlikely(f2fs_cp_error(sbi))) {
756 err = -EIO;
757 goto out;
758 }
757 goto retry_flush_dents; 759 goto retry_flush_dents;
758 } 760 }
759 761
760 /* 762 /*
761 * POR: we should ensure that there is no dirty node pages 763 * POR: we should ensure that there are no dirty node pages
762 * until finishing nat/sit flush. 764 * until finishing nat/sit flush.
763 */ 765 */
764retry_flush_nodes: 766retry_flush_nodes:
@@ -767,9 +769,16 @@ retry_flush_nodes:
767 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 769 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
768 up_write(&sbi->node_write); 770 up_write(&sbi->node_write);
769 sync_node_pages(sbi, 0, &wbc); 771 sync_node_pages(sbi, 0, &wbc);
772 if (unlikely(f2fs_cp_error(sbi))) {
773 f2fs_unlock_all(sbi);
774 err = -EIO;
775 goto out;
776 }
770 goto retry_flush_nodes; 777 goto retry_flush_nodes;
771 } 778 }
779out:
772 blk_finish_plug(&plug); 780 blk_finish_plug(&plug);
781 return err;
773} 782}
774 783
775static void unblock_operations(struct f2fs_sb_info *sbi) 784static void unblock_operations(struct f2fs_sb_info *sbi)
@@ -813,8 +822,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
813 discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); 822 discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
814 823
815 /* Flush all the NAT/SIT pages */ 824 /* Flush all the NAT/SIT pages */
816 while (get_pages(sbi, F2FS_DIRTY_META)) 825 while (get_pages(sbi, F2FS_DIRTY_META)) {
817 sync_meta_pages(sbi, META, LONG_MAX); 826 sync_meta_pages(sbi, META, LONG_MAX);
827 if (unlikely(f2fs_cp_error(sbi)))
828 return;
829 }
818 830
819 next_free_nid(sbi, &last_nid); 831 next_free_nid(sbi, &last_nid);
820 832
@@ -825,7 +837,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
825 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); 837 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
826 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); 838 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
827 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); 839 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
828 for (i = 0; i < 3; i++) { 840 for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
829 ckpt->cur_node_segno[i] = 841 ckpt->cur_node_segno[i] =
830 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); 842 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
831 ckpt->cur_node_blkoff[i] = 843 ckpt->cur_node_blkoff[i] =
@@ -833,7 +845,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
833 ckpt->alloc_type[i + CURSEG_HOT_NODE] = 845 ckpt->alloc_type[i + CURSEG_HOT_NODE] =
834 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); 846 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
835 } 847 }
836 for (i = 0; i < 3; i++) { 848 for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
837 ckpt->cur_data_segno[i] = 849 ckpt->cur_data_segno[i] =
838 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); 850 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
839 ckpt->cur_data_blkoff[i] = 851 ckpt->cur_data_blkoff[i] =
@@ -848,24 +860,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
848 860
849 /* 2 cp + n data seg summary + orphan inode blocks */ 861 /* 2 cp + n data seg summary + orphan inode blocks */
850 data_sum_blocks = npages_for_summary_flush(sbi); 862 data_sum_blocks = npages_for_summary_flush(sbi);
851 if (data_sum_blocks < 3) 863 if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
852 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 864 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
853 else 865 else
854 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 866 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
855 867
856 orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) 868 orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans);
857 / F2FS_ORPHANS_PER_BLOCK;
858 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + 869 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
859 orphan_blocks); 870 orphan_blocks);
860 871
861 if (is_umount) { 872 if (is_umount) {
862 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 873 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
863 ckpt->cp_pack_total_block_count = cpu_to_le32(2 + 874 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
864 cp_payload_blks + data_sum_blocks + 875 cp_payload_blks + data_sum_blocks +
865 orphan_blocks + NR_CURSEG_NODE_TYPE); 876 orphan_blocks + NR_CURSEG_NODE_TYPE);
866 } else { 877 } else {
867 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); 878 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
868 ckpt->cp_pack_total_block_count = cpu_to_le32(2 + 879 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
869 cp_payload_blks + data_sum_blocks + 880 cp_payload_blks + data_sum_blocks +
870 orphan_blocks); 881 orphan_blocks);
871 } 882 }
@@ -924,6 +935,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
924 /* wait for previous submitted node/meta pages writeback */ 935 /* wait for previous submitted node/meta pages writeback */
925 wait_on_all_pages_writeback(sbi); 936 wait_on_all_pages_writeback(sbi);
926 937
938 if (unlikely(f2fs_cp_error(sbi)))
939 return;
940
927 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); 941 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
928 filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); 942 filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
929 943
@@ -934,15 +948,17 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
934 /* Here, we only have one bio having CP pack */ 948 /* Here, we only have one bio having CP pack */
935 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 949 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
936 950
937 if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { 951 release_dirty_inode(sbi);
938 clear_prefree_segments(sbi); 952
939 release_dirty_inode(sbi); 953 if (unlikely(f2fs_cp_error(sbi)))
940 F2FS_RESET_SB_DIRT(sbi); 954 return;
941 } 955
956 clear_prefree_segments(sbi);
957 F2FS_RESET_SB_DIRT(sbi);
942} 958}
943 959
944/* 960/*
945 * We guarantee that this checkpoint procedure should not fail. 961 * We guarantee that this checkpoint procedure will not fail.
946 */ 962 */
947void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 963void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
948{ 964{
@@ -952,7 +968,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
952 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); 968 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
953 969
954 mutex_lock(&sbi->cp_mutex); 970 mutex_lock(&sbi->cp_mutex);
955 block_operations(sbi); 971
972 if (!sbi->s_dirty)
973 goto out;
974 if (unlikely(f2fs_cp_error(sbi)))
975 goto out;
976 if (block_operations(sbi))
977 goto out;
956 978
957 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); 979 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
958 980
@@ -976,9 +998,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
976 do_checkpoint(sbi, is_umount); 998 do_checkpoint(sbi, is_umount);
977 999
978 unblock_operations(sbi); 1000 unblock_operations(sbi);
979 mutex_unlock(&sbi->cp_mutex);
980
981 stat_inc_cp_count(sbi->stat_info); 1001 stat_inc_cp_count(sbi->stat_info);
1002out:
1003 mutex_unlock(&sbi->cp_mutex);
982 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 1004 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
983} 1005}
984 1006
@@ -999,8 +1021,8 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
999 * for cp pack we can have max 1020*504 orphan entries 1021 * for cp pack we can have max 1020*504 orphan entries
1000 */ 1022 */
1001 sbi->n_orphans = 0; 1023 sbi->n_orphans = 0;
1002 sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) 1024 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1003 * F2FS_ORPHANS_PER_BLOCK; 1025 NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
1004} 1026}
1005 1027
1006int __init create_checkpoint_caches(void) 1028int __init create_checkpoint_caches(void)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 03313099c51c..76de83e25a89 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -53,7 +53,7 @@ static void f2fs_write_end_io(struct bio *bio, int err)
53 struct page *page = bvec->bv_page; 53 struct page *page = bvec->bv_page;
54 54
55 if (unlikely(err)) { 55 if (unlikely(err)) {
56 SetPageError(page); 56 set_page_dirty(page);
57 set_bit(AS_EIO, &page->mapping->flags); 57 set_bit(AS_EIO, &page->mapping->flags);
58 f2fs_stop_checkpoint(sbi); 58 f2fs_stop_checkpoint(sbi);
59 } 59 }
@@ -691,7 +691,7 @@ get_next:
691 allocated = true; 691 allocated = true;
692 blkaddr = dn.data_blkaddr; 692 blkaddr = dn.data_blkaddr;
693 } 693 }
694 /* Give more consecutive addresses for the read ahead */ 694 /* Give more consecutive addresses for the readahead */
695 if (blkaddr == (bh_result->b_blocknr + ofs)) { 695 if (blkaddr == (bh_result->b_blocknr + ofs)) {
696 ofs++; 696 ofs++;
697 dn.ofs_in_node++; 697 dn.ofs_in_node++;
@@ -739,7 +739,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
739 739
740 trace_f2fs_readpage(page, DATA); 740 trace_f2fs_readpage(page, DATA);
741 741
742 /* If the file has inline data, try to read it directlly */ 742 /* If the file has inline data, try to read it directly */
743 if (f2fs_has_inline_data(inode)) 743 if (f2fs_has_inline_data(inode))
744 ret = f2fs_read_inline_data(inode, page); 744 ret = f2fs_read_inline_data(inode, page);
745 else 745 else
@@ -836,10 +836,19 @@ write:
836 836
837 /* Dentry blocks are controlled by checkpoint */ 837 /* Dentry blocks are controlled by checkpoint */
838 if (S_ISDIR(inode->i_mode)) { 838 if (S_ISDIR(inode->i_mode)) {
839 if (unlikely(f2fs_cp_error(sbi)))
840 goto redirty_out;
839 err = do_write_data_page(page, &fio); 841 err = do_write_data_page(page, &fio);
840 goto done; 842 goto done;
841 } 843 }
842 844
845 /* we should bypass data pages to proceed the kworkder jobs */
846 if (unlikely(f2fs_cp_error(sbi))) {
847 SetPageError(page);
848 unlock_page(page);
849 return 0;
850 }
851
843 if (!wbc->for_reclaim) 852 if (!wbc->for_reclaim)
844 need_balance_fs = true; 853 need_balance_fs = true;
845 else if (has_not_enough_free_secs(sbi, 0)) 854 else if (has_not_enough_free_secs(sbi, 0))
@@ -927,7 +936,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
927 936
928 if (to > inode->i_size) { 937 if (to > inode->i_size) {
929 truncate_pagecache(inode, inode->i_size); 938 truncate_pagecache(inode, inode->i_size);
930 truncate_blocks(inode, inode->i_size); 939 truncate_blocks(inode, inode->i_size, true);
931 } 940 }
932} 941}
933 942
@@ -946,7 +955,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
946 955
947 f2fs_balance_fs(sbi); 956 f2fs_balance_fs(sbi);
948repeat: 957repeat:
949 err = f2fs_convert_inline_data(inode, pos + len); 958 err = f2fs_convert_inline_data(inode, pos + len, NULL);
950 if (err) 959 if (err)
951 goto fail; 960 goto fail;
952 961
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a441ba33be11..fecebdbfd781 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -32,7 +32,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
32 struct f2fs_stat_info *si = F2FS_STAT(sbi); 32 struct f2fs_stat_info *si = F2FS_STAT(sbi);
33 int i; 33 int i;
34 34
35 /* valid check of the segment numbers */ 35 /* validation check of the segment numbers */
36 si->hit_ext = sbi->read_hit_ext; 36 si->hit_ext = sbi->read_hit_ext;
37 si->total_ext = sbi->total_hit_ext; 37 si->total_ext = sbi->total_hit_ext;
38 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); 38 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
@@ -152,7 +152,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
152 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); 152 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi));
153 si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); 153 si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi));
154 154
155 /* buld nm */ 155 /* build nm */
156 si->base_mem += sizeof(struct f2fs_nm_info); 156 si->base_mem += sizeof(struct f2fs_nm_info);
157 si->base_mem += __bitmap_size(sbi, NAT_BITMAP); 157 si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
158 158
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index bcf893c3d903..155fb056b7f1 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -124,7 +124,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
124 124
125 /* 125 /*
126 * For the most part, it should be a bug when name_len is zero. 126 * For the most part, it should be a bug when name_len is zero.
127 * We stop here for figuring out where the bugs are occurred. 127 * We stop here for figuring out where the bugs has occurred.
128 */ 128 */
129 f2fs_bug_on(!de->name_len); 129 f2fs_bug_on(!de->name_len);
130 130
@@ -391,7 +391,7 @@ put_error:
391error: 391error:
392 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ 392 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
393 truncate_inode_pages(&inode->i_data, 0); 393 truncate_inode_pages(&inode->i_data, 0);
394 truncate_blocks(inode, 0); 394 truncate_blocks(inode, 0, false);
395 remove_dirty_dir_inode(inode); 395 remove_dirty_dir_inode(inode);
396 remove_inode_page(inode); 396 remove_inode_page(inode);
397 return ERR_PTR(err); 397 return ERR_PTR(err);
@@ -563,7 +563,7 @@ fail:
563} 563}
564 564
565/* 565/*
566 * It only removes the dentry from the dentry page,corresponding name 566 * It only removes the dentry from the dentry page, corresponding name
567 * entry in name page does not need to be touched during deletion. 567 * entry in name page does not need to be touched during deletion.
568 */ 568 */
569void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, 569void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4dab5338a97a..e921242186f6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -24,7 +24,7 @@
24#define f2fs_bug_on(condition) BUG_ON(condition) 24#define f2fs_bug_on(condition) BUG_ON(condition)
25#define f2fs_down_write(x, y) down_write_nest_lock(x, y) 25#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
26#else 26#else
27#define f2fs_bug_on(condition) 27#define f2fs_bug_on(condition) WARN_ON(condition)
28#define f2fs_down_write(x, y) down_write(x) 28#define f2fs_down_write(x, y) down_write(x)
29#endif 29#endif
30 30
@@ -395,7 +395,7 @@ enum count_type {
395}; 395};
396 396
397/* 397/*
398 * The below are the page types of bios used in submti_bio(). 398 * The below are the page types of bios used in submit_bio().
399 * The available types are: 399 * The available types are:
400 * DATA User data pages. It operates as async mode. 400 * DATA User data pages. It operates as async mode.
401 * NODE Node pages. It operates as async mode. 401 * NODE Node pages. It operates as async mode.
@@ -470,7 +470,7 @@ struct f2fs_sb_info {
470 struct list_head dir_inode_list; /* dir inode list */ 470 struct list_head dir_inode_list; /* dir inode list */
471 spinlock_t dir_inode_lock; /* for dir inode list lock */ 471 spinlock_t dir_inode_lock; /* for dir inode list lock */
472 472
473 /* basic file system units */ 473 /* basic filesystem units */
474 unsigned int log_sectors_per_block; /* log2 sectors per block */ 474 unsigned int log_sectors_per_block; /* log2 sectors per block */
475 unsigned int log_blocksize; /* log2 block size */ 475 unsigned int log_blocksize; /* log2 block size */
476 unsigned int blocksize; /* block size */ 476 unsigned int blocksize; /* block size */
@@ -799,7 +799,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
799 799
800 /* 800 /*
801 * odd numbered checkpoint should at cp segment 0 801 * odd numbered checkpoint should at cp segment 0
802 * and even segent must be at cp segment 1 802 * and even segment must be at cp segment 1
803 */ 803 */
804 if (!(ckpt_version & 1)) 804 if (!(ckpt_version & 1))
805 start_addr += sbi->blocks_per_seg; 805 start_addr += sbi->blocks_per_seg;
@@ -1096,6 +1096,11 @@ static inline int f2fs_readonly(struct super_block *sb)
1096 return sb->s_flags & MS_RDONLY; 1096 return sb->s_flags & MS_RDONLY;
1097} 1097}
1098 1098
1099static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi)
1100{
1101 return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
1102}
1103
1099static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) 1104static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
1100{ 1105{
1101 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 1106 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
@@ -1117,7 +1122,7 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
1117 */ 1122 */
1118int f2fs_sync_file(struct file *, loff_t, loff_t, int); 1123int f2fs_sync_file(struct file *, loff_t, loff_t, int);
1119void truncate_data_blocks(struct dnode_of_data *); 1124void truncate_data_blocks(struct dnode_of_data *);
1120int truncate_blocks(struct inode *, u64); 1125int truncate_blocks(struct inode *, u64, bool);
1121void f2fs_truncate(struct inode *); 1126void f2fs_truncate(struct inode *);
1122int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 1127int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
1123int f2fs_setattr(struct dentry *, struct iattr *); 1128int f2fs_setattr(struct dentry *, struct iattr *);
@@ -1202,10 +1207,8 @@ int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
1202bool alloc_nid(struct f2fs_sb_info *, nid_t *); 1207bool alloc_nid(struct f2fs_sb_info *, nid_t *);
1203void alloc_nid_done(struct f2fs_sb_info *, nid_t); 1208void alloc_nid_done(struct f2fs_sb_info *, nid_t);
1204void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1209void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
1205void recover_node_page(struct f2fs_sb_info *, struct page *,
1206 struct f2fs_summary *, struct node_info *, block_t);
1207void recover_inline_xattr(struct inode *, struct page *); 1210void recover_inline_xattr(struct inode *, struct page *);
1208bool recover_xattr_data(struct inode *, struct page *, block_t); 1211void recover_xattr_data(struct inode *, struct page *, block_t);
1209int recover_inode_page(struct f2fs_sb_info *, struct page *); 1212int recover_inode_page(struct f2fs_sb_info *, struct page *);
1210int restore_node_summary(struct f2fs_sb_info *, unsigned int, 1213int restore_node_summary(struct f2fs_sb_info *, unsigned int,
1211 struct f2fs_summary_block *); 1214 struct f2fs_summary_block *);
@@ -1238,8 +1241,6 @@ void write_data_page(struct page *, struct dnode_of_data *, block_t *,
1238void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); 1241void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *);
1239void recover_data_page(struct f2fs_sb_info *, struct page *, 1242void recover_data_page(struct f2fs_sb_info *, struct page *,
1240 struct f2fs_summary *, block_t, block_t); 1243 struct f2fs_summary *, block_t, block_t);
1241void rewrite_node_page(struct f2fs_sb_info *, struct page *,
1242 struct f2fs_summary *, block_t, block_t);
1243void allocate_data_block(struct f2fs_sb_info *, struct page *, 1244void allocate_data_block(struct f2fs_sb_info *, struct page *,
1244 block_t, block_t *, struct f2fs_summary *, int); 1245 block_t, block_t *, struct f2fs_summary *, int);
1245void f2fs_wait_on_page_writeback(struct page *, enum page_type); 1246void f2fs_wait_on_page_writeback(struct page *, enum page_type);
@@ -1262,6 +1263,7 @@ int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
1262long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1263long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1263void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1264void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1264void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); 1265void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1266void release_dirty_inode(struct f2fs_sb_info *);
1265bool exist_written_data(struct f2fs_sb_info *, nid_t, int); 1267bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
1266int acquire_orphan_inode(struct f2fs_sb_info *); 1268int acquire_orphan_inode(struct f2fs_sb_info *);
1267void release_orphan_inode(struct f2fs_sb_info *); 1269void release_orphan_inode(struct f2fs_sb_info *);
@@ -1439,8 +1441,8 @@ extern const struct inode_operations f2fs_special_inode_operations;
1439 */ 1441 */
1440bool f2fs_may_inline(struct inode *); 1442bool f2fs_may_inline(struct inode *);
1441int f2fs_read_inline_data(struct inode *, struct page *); 1443int f2fs_read_inline_data(struct inode *, struct page *);
1442int f2fs_convert_inline_data(struct inode *, pgoff_t); 1444int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *);
1443int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); 1445int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
1444void truncate_inline_data(struct inode *, u64); 1446void truncate_inline_data(struct inode *, u64);
1445int recover_inline_data(struct inode *, struct page *); 1447bool recover_inline_data(struct inode *, struct page *);
1446#endif 1448#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 208f1a9bd569..060aee65aee8 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -41,6 +41,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
41 41
42 sb_start_pagefault(inode->i_sb); 42 sb_start_pagefault(inode->i_sb);
43 43
44 /* force to convert with normal data indices */
45 err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page);
46 if (err)
47 goto out;
48
44 /* block allocation */ 49 /* block allocation */
45 f2fs_lock_op(sbi); 50 f2fs_lock_op(sbi);
46 set_new_dnode(&dn, inode, NULL, NULL, 0); 51 set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -110,6 +115,25 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
110 return 1; 115 return 1;
111} 116}
112 117
118static inline bool need_do_checkpoint(struct inode *inode)
119{
120 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
121 bool need_cp = false;
122
123 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
124 need_cp = true;
125 else if (file_wrong_pino(inode))
126 need_cp = true;
127 else if (!space_for_roll_forward(sbi))
128 need_cp = true;
129 else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
130 need_cp = true;
131 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
132 need_cp = true;
133
134 return need_cp;
135}
136
113int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 137int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
114{ 138{
115 struct inode *inode = file->f_mapping->host; 139 struct inode *inode = file->f_mapping->host;
@@ -154,23 +178,12 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
154 /* guarantee free sections for fsync */ 178 /* guarantee free sections for fsync */
155 f2fs_balance_fs(sbi); 179 f2fs_balance_fs(sbi);
156 180
157 down_read(&fi->i_sem);
158
159 /* 181 /*
160 * Both of fdatasync() and fsync() are able to be recovered from 182 * Both of fdatasync() and fsync() are able to be recovered from
161 * sudden-power-off. 183 * sudden-power-off.
162 */ 184 */
163 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) 185 down_read(&fi->i_sem);
164 need_cp = true; 186 need_cp = need_do_checkpoint(inode);
165 else if (file_wrong_pino(inode))
166 need_cp = true;
167 else if (!space_for_roll_forward(sbi))
168 need_cp = true;
169 else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
170 need_cp = true;
171 else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
172 need_cp = true;
173
174 up_read(&fi->i_sem); 187 up_read(&fi->i_sem);
175 188
176 if (need_cp) { 189 if (need_cp) {
@@ -288,7 +301,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
288 if (err && err != -ENOENT) { 301 if (err && err != -ENOENT) {
289 goto fail; 302 goto fail;
290 } else if (err == -ENOENT) { 303 } else if (err == -ENOENT) {
291 /* direct node is not exist */ 304 /* direct node does not exists */
292 if (whence == SEEK_DATA) { 305 if (whence == SEEK_DATA) {
293 pgofs = PGOFS_OF_NEXT_DNODE(pgofs, 306 pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
294 F2FS_I(inode)); 307 F2FS_I(inode));
@@ -417,7 +430,7 @@ out:
417 f2fs_put_page(page, 1); 430 f2fs_put_page(page, 1);
418} 431}
419 432
420int truncate_blocks(struct inode *inode, u64 from) 433int truncate_blocks(struct inode *inode, u64 from, bool lock)
421{ 434{
422 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 435 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
423 unsigned int blocksize = inode->i_sb->s_blocksize; 436 unsigned int blocksize = inode->i_sb->s_blocksize;
@@ -433,14 +446,16 @@ int truncate_blocks(struct inode *inode, u64 from)
433 free_from = (pgoff_t) 446 free_from = (pgoff_t)
434 ((from + blocksize - 1) >> (sbi->log_blocksize)); 447 ((from + blocksize - 1) >> (sbi->log_blocksize));
435 448
436 f2fs_lock_op(sbi); 449 if (lock)
450 f2fs_lock_op(sbi);
437 451
438 set_new_dnode(&dn, inode, NULL, NULL, 0); 452 set_new_dnode(&dn, inode, NULL, NULL, 0);
439 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); 453 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
440 if (err) { 454 if (err) {
441 if (err == -ENOENT) 455 if (err == -ENOENT)
442 goto free_next; 456 goto free_next;
443 f2fs_unlock_op(sbi); 457 if (lock)
458 f2fs_unlock_op(sbi);
444 trace_f2fs_truncate_blocks_exit(inode, err); 459 trace_f2fs_truncate_blocks_exit(inode, err);
445 return err; 460 return err;
446 } 461 }
@@ -458,7 +473,8 @@ int truncate_blocks(struct inode *inode, u64 from)
458 f2fs_put_dnode(&dn); 473 f2fs_put_dnode(&dn);
459free_next: 474free_next:
460 err = truncate_inode_blocks(inode, free_from); 475 err = truncate_inode_blocks(inode, free_from);
461 f2fs_unlock_op(sbi); 476 if (lock)
477 f2fs_unlock_op(sbi);
462done: 478done:
463 /* lastly zero out the first data page */ 479 /* lastly zero out the first data page */
464 truncate_partial_data_page(inode, from); 480 truncate_partial_data_page(inode, from);
@@ -475,7 +491,7 @@ void f2fs_truncate(struct inode *inode)
475 491
476 trace_f2fs_truncate(inode); 492 trace_f2fs_truncate(inode);
477 493
478 if (!truncate_blocks(inode, i_size_read(inode))) { 494 if (!truncate_blocks(inode, i_size_read(inode), true)) {
479 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 495 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
480 mark_inode_dirty(inode); 496 mark_inode_dirty(inode);
481 } 497 }
@@ -533,7 +549,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
533 549
534 if ((attr->ia_valid & ATTR_SIZE) && 550 if ((attr->ia_valid & ATTR_SIZE) &&
535 attr->ia_size != i_size_read(inode)) { 551 attr->ia_size != i_size_read(inode)) {
536 err = f2fs_convert_inline_data(inode, attr->ia_size); 552 err = f2fs_convert_inline_data(inode, attr->ia_size, NULL);
537 if (err) 553 if (err)
538 return err; 554 return err;
539 555
@@ -622,7 +638,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
622 loff_t off_start, off_end; 638 loff_t off_start, off_end;
623 int ret = 0; 639 int ret = 0;
624 640
625 ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1); 641 ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
626 if (ret) 642 if (ret)
627 return ret; 643 return ret;
628 644
@@ -678,7 +694,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
678 if (ret) 694 if (ret)
679 return ret; 695 return ret;
680 696
681 ret = f2fs_convert_inline_data(inode, offset + len); 697 ret = f2fs_convert_inline_data(inode, offset + len, NULL);
682 if (ret) 698 if (ret)
683 return ret; 699 return ret;
684 700
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d7947d90ccc3..943a31db7cc3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -58,7 +58,7 @@ static int gc_thread_func(void *data)
58 * 3. IO subsystem is idle by checking the # of requests in 58 * 3. IO subsystem is idle by checking the # of requests in
59 * bdev's request list. 59 * bdev's request list.
60 * 60 *
61 * Note) We have to avoid triggering GCs too much frequently. 61 * Note) We have to avoid triggering GCs frequently.
62 * Because it is possible that some segments can be 62 * Because it is possible that some segments can be
63 * invalidated soon after by user update or deletion. 63 * invalidated soon after by user update or deletion.
64 * So, I'd like to wait some time to collect dirty segments. 64 * So, I'd like to wait some time to collect dirty segments.
@@ -222,7 +222,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
222 222
223 u = (vblocks * 100) >> sbi->log_blocks_per_seg; 223 u = (vblocks * 100) >> sbi->log_blocks_per_seg;
224 224
225 /* Handle if the system time is changed by user */ 225 /* Handle if the system time has changed by the user */
226 if (mtime < sit_i->min_mtime) 226 if (mtime < sit_i->min_mtime)
227 sit_i->min_mtime = mtime; 227 sit_i->min_mtime = mtime;
228 if (mtime > sit_i->max_mtime) 228 if (mtime > sit_i->max_mtime)
@@ -593,7 +593,7 @@ next_step:
593 593
594 if (phase == 2) { 594 if (phase == 2) {
595 inode = f2fs_iget(sb, dni.ino); 595 inode = f2fs_iget(sb, dni.ino);
596 if (IS_ERR(inode)) 596 if (IS_ERR(inode) || is_bad_inode(inode))
597 continue; 597 continue;
598 598
599 start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); 599 start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
@@ -693,7 +693,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
693gc_more: 693gc_more:
694 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 694 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
695 goto stop; 695 goto stop;
696 if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) 696 if (unlikely(f2fs_cp_error(sbi)))
697 goto stop; 697 goto stop;
698 698
699 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 699 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 5d5eb6047bf4..16f0b2b22999 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -91,7 +91,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
91 block_t invalid_user_blocks = sbi->user_block_count - 91 block_t invalid_user_blocks = sbi->user_block_count -
92 written_block_count(sbi); 92 written_block_count(sbi);
93 /* 93 /*
94 * Background GC is triggered with the following condition. 94 * Background GC is triggered with the following conditions.
95 * 1. There are a number of invalid blocks. 95 * 1. There are a number of invalid blocks.
96 * 2. There is not enough free space. 96 * 2. There is not enough free space.
97 */ 97 */
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 948d17bf7281..a844fcfb9a8d 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -42,7 +42,8 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[])
42 buf[1] += b1; 42 buf[1] += b1;
43} 43}
44 44
45static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) 45static void str2hashbuf(const unsigned char *msg, size_t len,
46 unsigned int *buf, int num)
46{ 47{
47 unsigned pad, val; 48 unsigned pad, val;
48 int i; 49 int i;
@@ -73,9 +74,9 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
73{ 74{
74 __u32 hash; 75 __u32 hash;
75 f2fs_hash_t f2fs_hash; 76 f2fs_hash_t f2fs_hash;
76 const char *p; 77 const unsigned char *p;
77 __u32 in[8], buf[4]; 78 __u32 in[8], buf[4];
78 const char *name = name_info->name; 79 const unsigned char *name = name_info->name;
79 size_t len = name_info->len; 80 size_t len = name_info->len;
80 81
81 if ((len <= 2) && (name[0] == '.') && 82 if ((len <= 2) && (name[0] == '.') &&
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 5beeccef9ae1..3e8ecdf3742b 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -68,7 +68,7 @@ out:
68 68
69static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) 69static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
70{ 70{
71 int err; 71 int err = 0;
72 struct page *ipage; 72 struct page *ipage;
73 struct dnode_of_data dn; 73 struct dnode_of_data dn;
74 void *src_addr, *dst_addr; 74 void *src_addr, *dst_addr;
@@ -86,6 +86,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
86 goto out; 86 goto out;
87 } 87 }
88 88
89 /* someone else converted inline_data already */
90 if (!f2fs_has_inline_data(inode))
91 goto out;
92
89 /* 93 /*
90 * i_addr[0] is not used for inline data, 94 * i_addr[0] is not used for inline data,
91 * so reserving new block will not destroy inline data 95 * so reserving new block will not destroy inline data
@@ -124,9 +128,10 @@ out:
124 return err; 128 return err;
125} 129}
126 130
127int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) 131int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size,
132 struct page *page)
128{ 133{
129 struct page *page; 134 struct page *new_page = page;
130 int err; 135 int err;
131 136
132 if (!f2fs_has_inline_data(inode)) 137 if (!f2fs_has_inline_data(inode))
@@ -134,17 +139,20 @@ int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size)
134 else if (to_size <= MAX_INLINE_DATA) 139 else if (to_size <= MAX_INLINE_DATA)
135 return 0; 140 return 0;
136 141
137 page = grab_cache_page(inode->i_mapping, 0); 142 if (!page || page->index != 0) {
138 if (!page) 143 new_page = grab_cache_page(inode->i_mapping, 0);
139 return -ENOMEM; 144 if (!new_page)
145 return -ENOMEM;
146 }
140 147
141 err = __f2fs_convert_inline_data(inode, page); 148 err = __f2fs_convert_inline_data(inode, new_page);
142 f2fs_put_page(page, 1); 149 if (!page || page->index != 0)
150 f2fs_put_page(new_page, 1);
143 return err; 151 return err;
144} 152}
145 153
146int f2fs_write_inline_data(struct inode *inode, 154int f2fs_write_inline_data(struct inode *inode,
147 struct page *page, unsigned size) 155 struct page *page, unsigned size)
148{ 156{
149 void *src_addr, *dst_addr; 157 void *src_addr, *dst_addr;
150 struct page *ipage; 158 struct page *ipage;
@@ -199,7 +207,7 @@ void truncate_inline_data(struct inode *inode, u64 from)
199 f2fs_put_page(ipage, 1); 207 f2fs_put_page(ipage, 1);
200} 208}
201 209
202int recover_inline_data(struct inode *inode, struct page *npage) 210bool recover_inline_data(struct inode *inode, struct page *npage)
203{ 211{
204 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 212 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
205 struct f2fs_inode *ri = NULL; 213 struct f2fs_inode *ri = NULL;
@@ -218,7 +226,7 @@ int recover_inline_data(struct inode *inode, struct page *npage)
218 ri = F2FS_INODE(npage); 226 ri = F2FS_INODE(npage);
219 227
220 if (f2fs_has_inline_data(inode) && 228 if (f2fs_has_inline_data(inode) &&
221 ri && ri->i_inline & F2FS_INLINE_DATA) { 229 ri && (ri->i_inline & F2FS_INLINE_DATA)) {
222process_inline: 230process_inline:
223 ipage = get_node_page(sbi, inode->i_ino); 231 ipage = get_node_page(sbi, inode->i_ino);
224 f2fs_bug_on(IS_ERR(ipage)); 232 f2fs_bug_on(IS_ERR(ipage));
@@ -230,7 +238,7 @@ process_inline:
230 memcpy(dst_addr, src_addr, MAX_INLINE_DATA); 238 memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
231 update_inode(inode, ipage); 239 update_inode(inode, ipage);
232 f2fs_put_page(ipage, 1); 240 f2fs_put_page(ipage, 1);
233 return -1; 241 return true;
234 } 242 }
235 243
236 if (f2fs_has_inline_data(inode)) { 244 if (f2fs_has_inline_data(inode)) {
@@ -242,10 +250,10 @@ process_inline:
242 clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 250 clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
243 update_inode(inode, ipage); 251 update_inode(inode, ipage);
244 f2fs_put_page(ipage, 1); 252 f2fs_put_page(ipage, 1);
245 } else if (ri && ri->i_inline & F2FS_INLINE_DATA) { 253 } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
246 truncate_blocks(inode, 0); 254 truncate_blocks(inode, 0, false);
247 set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 255 set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
248 goto process_inline; 256 goto process_inline;
249 } 257 }
250 return 0; 258 return false;
251} 259}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 27b03776ffd2..ee103fd7283c 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -134,9 +134,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
134 return 0; 134 return 0;
135out: 135out:
136 clear_nlink(inode); 136 clear_nlink(inode);
137 unlock_new_inode(inode); 137 iget_failed(inode);
138 make_bad_inode(inode);
139 iput(inode);
140 alloc_nid_failed(sbi, ino); 138 alloc_nid_failed(sbi, ino);
141 return err; 139 return err;
142} 140}
@@ -229,7 +227,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
229 f2fs_delete_entry(de, page, inode); 227 f2fs_delete_entry(de, page, inode);
230 f2fs_unlock_op(sbi); 228 f2fs_unlock_op(sbi);
231 229
232 /* In order to evict this inode, we set it dirty */ 230 /* In order to evict this inode, we set it dirty */
233 mark_inode_dirty(inode); 231 mark_inode_dirty(inode);
234fail: 232fail:
235 trace_f2fs_unlink_exit(inode, err); 233 trace_f2fs_unlink_exit(inode, err);
@@ -267,9 +265,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
267 return err; 265 return err;
268out: 266out:
269 clear_nlink(inode); 267 clear_nlink(inode);
270 unlock_new_inode(inode); 268 iget_failed(inode);
271 make_bad_inode(inode);
272 iput(inode);
273 alloc_nid_failed(sbi, inode->i_ino); 269 alloc_nid_failed(sbi, inode->i_ino);
274 return err; 270 return err;
275} 271}
@@ -308,9 +304,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
308out_fail: 304out_fail:
309 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 305 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
310 clear_nlink(inode); 306 clear_nlink(inode);
311 unlock_new_inode(inode); 307 iget_failed(inode);
312 make_bad_inode(inode);
313 iput(inode);
314 alloc_nid_failed(sbi, inode->i_ino); 308 alloc_nid_failed(sbi, inode->i_ino);
315 return err; 309 return err;
316} 310}
@@ -354,9 +348,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
354 return 0; 348 return 0;
355out: 349out:
356 clear_nlink(inode); 350 clear_nlink(inode);
357 unlock_new_inode(inode); 351 iget_failed(inode);
358 make_bad_inode(inode);
359 iput(inode);
360 alloc_nid_failed(sbi, inode->i_ino); 352 alloc_nid_failed(sbi, inode->i_ino);
361 return err; 353 return err;
362} 354}
@@ -688,9 +680,7 @@ release_out:
688out: 680out:
689 f2fs_unlock_op(sbi); 681 f2fs_unlock_op(sbi);
690 clear_nlink(inode); 682 clear_nlink(inode);
691 unlock_new_inode(inode); 683 iget_failed(inode);
692 make_bad_inode(inode);
693 iput(inode);
694 alloc_nid_failed(sbi, inode->i_ino); 684 alloc_nid_failed(sbi, inode->i_ino);
695 return err; 685 return err;
696} 686}
@@ -704,7 +694,6 @@ const struct inode_operations f2fs_dir_inode_operations = {
704 .mkdir = f2fs_mkdir, 694 .mkdir = f2fs_mkdir,
705 .rmdir = f2fs_rmdir, 695 .rmdir = f2fs_rmdir,
706 .mknod = f2fs_mknod, 696 .mknod = f2fs_mknod,
707 .rename = f2fs_rename,
708 .rename2 = f2fs_rename2, 697 .rename2 = f2fs_rename2,
709 .tmpfile = f2fs_tmpfile, 698 .tmpfile = f2fs_tmpfile,
710 .getattr = f2fs_getattr, 699 .getattr = f2fs_getattr,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d3d90d284631..45378196e19a 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -237,7 +237,7 @@ retry:
237 nat_get_blkaddr(e) != NULL_ADDR && 237 nat_get_blkaddr(e) != NULL_ADDR &&
238 new_blkaddr == NEW_ADDR); 238 new_blkaddr == NEW_ADDR);
239 239
240 /* increament version no as node is removed */ 240 /* increment version no as node is removed */
241 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { 241 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
242 unsigned char version = nat_get_version(e); 242 unsigned char version = nat_get_version(e);
243 nat_set_version(e, inc_node_version(version)); 243 nat_set_version(e, inc_node_version(version));
@@ -274,7 +274,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
274} 274}
275 275
276/* 276/*
277 * This function returns always success 277 * This function always returns success
278 */ 278 */
279void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) 279void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
280{ 280{
@@ -650,7 +650,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
650 650
651 /* get indirect nodes in the path */ 651 /* get indirect nodes in the path */
652 for (i = 0; i < idx + 1; i++) { 652 for (i = 0; i < idx + 1; i++) {
653 /* refernece count'll be increased */ 653 /* reference count'll be increased */
654 pages[i] = get_node_page(sbi, nid[i]); 654 pages[i] = get_node_page(sbi, nid[i]);
655 if (IS_ERR(pages[i])) { 655 if (IS_ERR(pages[i])) {
656 err = PTR_ERR(pages[i]); 656 err = PTR_ERR(pages[i]);
@@ -823,22 +823,26 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
823 */ 823 */
824void remove_inode_page(struct inode *inode) 824void remove_inode_page(struct inode *inode)
825{ 825{
826 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
827 struct page *page;
828 nid_t ino = inode->i_ino;
829 struct dnode_of_data dn; 826 struct dnode_of_data dn;
830 827
831 page = get_node_page(sbi, ino); 828 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
832 if (IS_ERR(page)) 829 if (get_dnode_of_data(&dn, 0, LOOKUP_NODE))
833 return; 830 return;
834 831
835 if (truncate_xattr_node(inode, page)) { 832 if (truncate_xattr_node(inode, dn.inode_page)) {
836 f2fs_put_page(page, 1); 833 f2fs_put_dnode(&dn);
837 return; 834 return;
838 } 835 }
839 /* 0 is possible, after f2fs_new_inode() is failed */ 836
837 /* remove potential inline_data blocks */
838 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
839 S_ISLNK(inode->i_mode))
840 truncate_data_blocks_range(&dn, 1);
841
842 /* 0 is possible, after f2fs_new_inode() has failed */
840 f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); 843 f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
841 set_new_dnode(&dn, inode, page, page, ino); 844
845 /* will put inode & node pages */
842 truncate_node(&dn); 846 truncate_node(&dn);
843} 847}
844 848
@@ -1129,8 +1133,11 @@ continue_unlock:
1129 set_fsync_mark(page, 0); 1133 set_fsync_mark(page, 0);
1130 set_dentry_mark(page, 0); 1134 set_dentry_mark(page, 0);
1131 } 1135 }
1132 NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); 1136
1133 wrote++; 1137 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
1138 unlock_page(page);
1139 else
1140 wrote++;
1134 1141
1135 if (--wbc->nr_to_write == 0) 1142 if (--wbc->nr_to_write == 0)
1136 break; 1143 break;
@@ -1212,6 +1219,8 @@ static int f2fs_write_node_page(struct page *page,
1212 1219
1213 if (unlikely(sbi->por_doing)) 1220 if (unlikely(sbi->por_doing))
1214 goto redirty_out; 1221 goto redirty_out;
1222 if (unlikely(f2fs_cp_error(sbi)))
1223 goto redirty_out;
1215 1224
1216 f2fs_wait_on_page_writeback(page, NODE); 1225 f2fs_wait_on_page_writeback(page, NODE);
1217 1226
@@ -1540,15 +1549,6 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1540 kmem_cache_free(free_nid_slab, i); 1549 kmem_cache_free(free_nid_slab, i);
1541} 1550}
1542 1551
1543void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1544 struct f2fs_summary *sum, struct node_info *ni,
1545 block_t new_blkaddr)
1546{
1547 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1548 set_node_addr(sbi, ni, new_blkaddr, false);
1549 clear_node_page_dirty(page);
1550}
1551
1552void recover_inline_xattr(struct inode *inode, struct page *page) 1552void recover_inline_xattr(struct inode *inode, struct page *page)
1553{ 1553{
1554 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1554 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -1557,40 +1557,33 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
1557 struct page *ipage; 1557 struct page *ipage;
1558 struct f2fs_inode *ri; 1558 struct f2fs_inode *ri;
1559 1559
1560 if (!f2fs_has_inline_xattr(inode))
1561 return;
1562
1563 if (!IS_INODE(page))
1564 return;
1565
1566 ri = F2FS_INODE(page);
1567 if (!(ri->i_inline & F2FS_INLINE_XATTR))
1568 return;
1569
1570 ipage = get_node_page(sbi, inode->i_ino); 1560 ipage = get_node_page(sbi, inode->i_ino);
1571 f2fs_bug_on(IS_ERR(ipage)); 1561 f2fs_bug_on(IS_ERR(ipage));
1572 1562
1563 ri = F2FS_INODE(page);
1564 if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
1565 clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR);
1566 goto update_inode;
1567 }
1568
1573 dst_addr = inline_xattr_addr(ipage); 1569 dst_addr = inline_xattr_addr(ipage);
1574 src_addr = inline_xattr_addr(page); 1570 src_addr = inline_xattr_addr(page);
1575 inline_size = inline_xattr_size(inode); 1571 inline_size = inline_xattr_size(inode);
1576 1572
1577 f2fs_wait_on_page_writeback(ipage, NODE); 1573 f2fs_wait_on_page_writeback(ipage, NODE);
1578 memcpy(dst_addr, src_addr, inline_size); 1574 memcpy(dst_addr, src_addr, inline_size);
1579 1575update_inode:
1580 update_inode(inode, ipage); 1576 update_inode(inode, ipage);
1581 f2fs_put_page(ipage, 1); 1577 f2fs_put_page(ipage, 1);
1582} 1578}
1583 1579
1584bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) 1580void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1585{ 1581{
1586 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1582 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1587 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; 1583 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1588 nid_t new_xnid = nid_of_node(page); 1584 nid_t new_xnid = nid_of_node(page);
1589 struct node_info ni; 1585 struct node_info ni;
1590 1586
1591 if (!f2fs_has_xattr_block(ofs_of_node(page)))
1592 return false;
1593
1594 /* 1: invalidate the previous xattr nid */ 1587 /* 1: invalidate the previous xattr nid */
1595 if (!prev_xnid) 1588 if (!prev_xnid)
1596 goto recover_xnid; 1589 goto recover_xnid;
@@ -1618,7 +1611,6 @@ recover_xnid:
1618 set_node_addr(sbi, &ni, blkaddr, false); 1611 set_node_addr(sbi, &ni, blkaddr, false);
1619 1612
1620 update_inode_page(inode); 1613 update_inode_page(inode);
1621 return true;
1622} 1614}
1623 1615
1624int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 1616int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -1637,7 +1629,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1637 if (!ipage) 1629 if (!ipage)
1638 return -ENOMEM; 1630 return -ENOMEM;
1639 1631
1640 /* Should not use this inode from free nid list */ 1632 /* Should not use this inode from free nid list */
1641 remove_free_nid(NM_I(sbi), ino); 1633 remove_free_nid(NM_I(sbi), ino);
1642 1634
1643 SetPageUptodate(ipage); 1635 SetPageUptodate(ipage);
@@ -1651,6 +1643,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1651 dst->i_blocks = cpu_to_le64(1); 1643 dst->i_blocks = cpu_to_le64(1);
1652 dst->i_links = cpu_to_le32(1); 1644 dst->i_links = cpu_to_le32(1);
1653 dst->i_xattr_nid = 0; 1645 dst->i_xattr_nid = 0;
1646 dst->i_inline = src->i_inline & F2FS_INLINE_XATTR;
1654 1647
1655 new_ni = old_ni; 1648 new_ni = old_ni;
1656 new_ni.ino = ino; 1649 new_ni.ino = ino;
@@ -1659,13 +1652,14 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1659 WARN_ON(1); 1652 WARN_ON(1);
1660 set_node_addr(sbi, &new_ni, NEW_ADDR, false); 1653 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1661 inc_valid_inode_count(sbi); 1654 inc_valid_inode_count(sbi);
1655 set_page_dirty(ipage);
1662 f2fs_put_page(ipage, 1); 1656 f2fs_put_page(ipage, 1);
1663 return 0; 1657 return 0;
1664} 1658}
1665 1659
1666/* 1660/*
1667 * ra_sum_pages() merge contiguous pages into one bio and submit. 1661 * ra_sum_pages() merge contiguous pages into one bio and submit.
1668 * these pre-readed pages are alloced in bd_inode's mapping tree. 1662 * these pre-read pages are allocated in bd_inode's mapping tree.
1669 */ 1663 */
1670static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, 1664static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
1671 int start, int nrpages) 1665 int start, int nrpages)
@@ -1709,7 +1703,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
1709 for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { 1703 for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
1710 nrpages = min(last_offset - i, bio_blocks); 1704 nrpages = min(last_offset - i, bio_blocks);
1711 1705
1712 /* read ahead node pages */ 1706 /* readahead node pages */
1713 nrpages = ra_sum_pages(sbi, pages, addr, nrpages); 1707 nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
1714 if (!nrpages) 1708 if (!nrpages)
1715 return -ENOMEM; 1709 return -ENOMEM;
@@ -1967,7 +1961,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1967 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; 1961 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
1968 1962
1969 /* not used nids: 0, node, meta, (and root counted as valid node) */ 1963 /* not used nids: 0, node, meta, (and root counted as valid node) */
1970 nm_i->available_nids = nm_i->max_nid - 3; 1964 nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
1971 nm_i->fcnt = 0; 1965 nm_i->fcnt = 0;
1972 nm_i->nat_cnt = 0; 1966 nm_i->nat_cnt = 0;
1973 nm_i->ram_thresh = DEF_RAM_THRESHOLD; 1967 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index fe1c6d921ba2..756c41cd2582 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -62,8 +62,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
62 } 62 }
63retry: 63retry:
64 de = f2fs_find_entry(dir, &name, &page); 64 de = f2fs_find_entry(dir, &name, &page);
65 if (de && inode->i_ino == le32_to_cpu(de->ino)) 65 if (de && inode->i_ino == le32_to_cpu(de->ino)) {
66 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
66 goto out_unmap_put; 67 goto out_unmap_put;
68 }
67 if (de) { 69 if (de) {
68 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); 70 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
69 if (IS_ERR(einode)) { 71 if (IS_ERR(einode)) {
@@ -300,14 +302,19 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
300 struct node_info ni; 302 struct node_info ni;
301 int err = 0, recovered = 0; 303 int err = 0, recovered = 0;
302 304
303 recover_inline_xattr(inode, page); 305 /* step 1: recover xattr */
304 306 if (IS_INODE(page)) {
305 if (recover_inline_data(inode, page)) 307 recover_inline_xattr(inode, page);
308 } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
309 recover_xattr_data(inode, page, blkaddr);
306 goto out; 310 goto out;
311 }
307 312
308 if (recover_xattr_data(inode, page, blkaddr)) 313 /* step 2: recover inline data */
314 if (recover_inline_data(inode, page))
309 goto out; 315 goto out;
310 316
317 /* step 3: recover data indices */
311 start = start_bidx_of_node(ofs_of_node(page), fi); 318 start = start_bidx_of_node(ofs_of_node(page), fi);
312 end = start + ADDRS_PER_PAGE(page, fi); 319 end = start + ADDRS_PER_PAGE(page, fi);
313 320
@@ -364,8 +371,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
364 fill_node_footer(dn.node_page, dn.nid, ni.ino, 371 fill_node_footer(dn.node_page, dn.nid, ni.ino,
365 ofs_of_node(page), false); 372 ofs_of_node(page), false);
366 set_page_dirty(dn.node_page); 373 set_page_dirty(dn.node_page);
367
368 recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
369err: 374err:
370 f2fs_put_dnode(&dn); 375 f2fs_put_dnode(&dn);
371 f2fs_unlock_op(sbi); 376 f2fs_unlock_op(sbi);
@@ -452,6 +457,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
452 /* step #1: find fsynced inode numbers */ 457 /* step #1: find fsynced inode numbers */
453 sbi->por_doing = true; 458 sbi->por_doing = true;
454 459
460 /* prevent checkpoint */
461 mutex_lock(&sbi->cp_mutex);
462
455 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 463 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
456 464
457 err = find_fsync_dnodes(sbi, &inode_list); 465 err = find_fsync_dnodes(sbi, &inode_list);
@@ -465,7 +473,8 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
465 473
466 /* step #2: recover data */ 474 /* step #2: recover data */
467 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); 475 err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
468 f2fs_bug_on(!list_empty(&inode_list)); 476 if (!err)
477 f2fs_bug_on(!list_empty(&inode_list));
469out: 478out:
470 destroy_fsync_dnodes(&inode_list); 479 destroy_fsync_dnodes(&inode_list);
471 kmem_cache_destroy(fsync_entry_slab); 480 kmem_cache_destroy(fsync_entry_slab);
@@ -482,8 +491,13 @@ out:
482 /* Flush all the NAT/SIT pages */ 491 /* Flush all the NAT/SIT pages */
483 while (get_pages(sbi, F2FS_DIRTY_META)) 492 while (get_pages(sbi, F2FS_DIRTY_META))
484 sync_meta_pages(sbi, META, LONG_MAX); 493 sync_meta_pages(sbi, META, LONG_MAX);
494 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
495 mutex_unlock(&sbi->cp_mutex);
485 } else if (need_writecp) { 496 } else if (need_writecp) {
497 mutex_unlock(&sbi->cp_mutex);
486 write_checkpoint(sbi, false); 498 write_checkpoint(sbi, false);
499 } else {
500 mutex_unlock(&sbi->cp_mutex);
487 } 501 }
488 return err; 502 return err;
489} 503}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0dfeebae2a50..0aa337cd5bba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -62,7 +62,7 @@ static inline unsigned long __reverse_ffs(unsigned long word)
62} 62}
63 63
64/* 64/*
65 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue 65 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
66 * f2fs_set_bit makes MSB and LSB reversed in a byte. 66 * f2fs_set_bit makes MSB and LSB reversed in a byte.
67 * Example: 67 * Example:
68 * LSB <--> MSB 68 * LSB <--> MSB
@@ -808,7 +808,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
808} 808}
809 809
810/* 810/*
811 * This function always allocates a used segment (from dirty seglist) by SSR 811 * This function always allocates a used segment(from dirty seglist) by SSR
812 * manner, so it should recover the existing segment information of valid blocks 812 * manner, so it should recover the existing segment information of valid blocks
813 */ 813 */
814static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) 814static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
@@ -1103,55 +1103,6 @@ void recover_data_page(struct f2fs_sb_info *sbi,
1103 mutex_unlock(&curseg->curseg_mutex); 1103 mutex_unlock(&curseg->curseg_mutex);
1104} 1104}
1105 1105
1106void rewrite_node_page(struct f2fs_sb_info *sbi,
1107 struct page *page, struct f2fs_summary *sum,
1108 block_t old_blkaddr, block_t new_blkaddr)
1109{
1110 struct sit_info *sit_i = SIT_I(sbi);
1111 int type = CURSEG_WARM_NODE;
1112 struct curseg_info *curseg;
1113 unsigned int segno, old_cursegno;
1114 block_t next_blkaddr = next_blkaddr_of_node(page);
1115 unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
1116 struct f2fs_io_info fio = {
1117 .type = NODE,
1118 .rw = WRITE_SYNC,
1119 };
1120
1121 curseg = CURSEG_I(sbi, type);
1122
1123 mutex_lock(&curseg->curseg_mutex);
1124 mutex_lock(&sit_i->sentry_lock);
1125
1126 segno = GET_SEGNO(sbi, new_blkaddr);
1127 old_cursegno = curseg->segno;
1128
1129 /* change the current segment */
1130 if (segno != curseg->segno) {
1131 curseg->next_segno = segno;
1132 change_curseg(sbi, type, true);
1133 }
1134 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1135 __add_sum_entry(sbi, type, sum);
1136
1137 /* change the current log to the next block addr in advance */
1138 if (next_segno != segno) {
1139 curseg->next_segno = next_segno;
1140 change_curseg(sbi, type, true);
1141 }
1142 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
1143
1144 /* rewrite node page */
1145 set_page_writeback(page);
1146 f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
1147 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1148 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1149 locate_dirty_segment(sbi, old_cursegno);
1150
1151 mutex_unlock(&sit_i->sentry_lock);
1152 mutex_unlock(&curseg->curseg_mutex);
1153}
1154
1155static inline bool is_merged_page(struct f2fs_sb_info *sbi, 1106static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1156 struct page *page, enum page_type type) 1107 struct page *page, enum page_type type)
1157{ 1108{
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 55973f7b0330..ff483257283b 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -549,7 +549,7 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
549} 549}
550 550
551/* 551/*
552 * Summary block is always treated as invalid block 552 * Summary block is always treated as an invalid block
553 */ 553 */
554static inline void check_block_count(struct f2fs_sb_info *sbi, 554static inline void check_block_count(struct f2fs_sb_info *sbi,
555 int segno, struct f2fs_sit_entry *raw_sit) 555 int segno, struct f2fs_sit_entry *raw_sit)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 657582fc7601..41bdf511003d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -432,9 +432,15 @@ static void f2fs_put_super(struct super_block *sb)
432 stop_gc_thread(sbi); 432 stop_gc_thread(sbi);
433 433
434 /* We don't need to do checkpoint when it's clean */ 434 /* We don't need to do checkpoint when it's clean */
435 if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES)) 435 if (sbi->s_dirty)
436 write_checkpoint(sbi, true); 436 write_checkpoint(sbi, true);
437 437
438 /*
439 * normally superblock is clean, so we need to release this.
440 * In addition, EIO will skip do checkpoint, we need this as well.
441 */
442 release_dirty_inode(sbi);
443
438 iput(sbi->node_inode); 444 iput(sbi->node_inode);
439 iput(sbi->meta_inode); 445 iput(sbi->meta_inode);
440 446
@@ -457,9 +463,6 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
457 463
458 trace_f2fs_sync_fs(sb, sync); 464 trace_f2fs_sync_fs(sb, sync);
459 465
460 if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
461 return 0;
462
463 if (sync) { 466 if (sync) {
464 mutex_lock(&sbi->gc_mutex); 467 mutex_lock(&sbi->gc_mutex);
465 write_checkpoint(sbi, false); 468 write_checkpoint(sbi, false);
@@ -505,8 +508,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
505 buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; 508 buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
506 buf->f_bavail = user_block_count - valid_user_blocks(sbi); 509 buf->f_bavail = user_block_count - valid_user_blocks(sbi);
507 510
508 buf->f_files = sbi->total_node_count; 511 buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
509 buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); 512 buf->f_ffree = buf->f_files - valid_inode_count(sbi);
510 513
511 buf->f_namelen = F2FS_NAME_LEN; 514 buf->f_namelen = F2FS_NAME_LEN;
512 buf->f_fsid.val[0] = (u32)id; 515 buf->f_fsid.val[0] = (u32)id;
@@ -663,7 +666,7 @@ restore_gc:
663 if (need_restart_gc) { 666 if (need_restart_gc) {
664 if (start_gc_thread(sbi)) 667 if (start_gc_thread(sbi))
665 f2fs_msg(sbi->sb, KERN_WARNING, 668 f2fs_msg(sbi->sb, KERN_WARNING,
666 "background gc thread is stop"); 669 "background gc thread has stopped");
667 } else if (need_stop_gc) { 670 } else if (need_stop_gc) {
668 stop_gc_thread(sbi); 671 stop_gc_thread(sbi);
669 } 672 }
@@ -812,7 +815,7 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
812 if (unlikely(fsmeta >= total)) 815 if (unlikely(fsmeta >= total))
813 return 1; 816 return 1;
814 817
815 if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { 818 if (unlikely(f2fs_cp_error(sbi))) {
816 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); 819 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
817 return 1; 820 return 1;
818 } 821 }
@@ -899,8 +902,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
899 struct buffer_head *raw_super_buf; 902 struct buffer_head *raw_super_buf;
900 struct inode *root; 903 struct inode *root;
901 long err = -EINVAL; 904 long err = -EINVAL;
905 bool retry = true;
902 int i; 906 int i;
903 907
908try_onemore:
904 /* allocate memory for f2fs-specific super block info */ 909 /* allocate memory for f2fs-specific super block info */
905 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); 910 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
906 if (!sbi) 911 if (!sbi)
@@ -1080,9 +1085,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1080 /* recover fsynced data */ 1085 /* recover fsynced data */
1081 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { 1086 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1082 err = recover_fsync_data(sbi); 1087 err = recover_fsync_data(sbi);
1083 if (err) 1088 if (err) {
1084 f2fs_msg(sb, KERN_ERR, 1089 f2fs_msg(sb, KERN_ERR,
1085 "Cannot recover all fsync data errno=%ld", err); 1090 "Cannot recover all fsync data errno=%ld", err);
1091 goto free_kobj;
1092 }
1086 } 1093 }
1087 1094
1088 /* 1095 /*
@@ -1123,6 +1130,13 @@ free_sb_buf:
1123 brelse(raw_super_buf); 1130 brelse(raw_super_buf);
1124free_sbi: 1131free_sbi:
1125 kfree(sbi); 1132 kfree(sbi);
1133
1134 /* give only one another chance */
1135 if (retry) {
1136 retry = 0;
1137 shrink_dcache_sb(sb);
1138 goto try_onemore;
1139 }
1126 return err; 1140 return err;
1127} 1141}
1128 1142
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 8bea941ee309..728a5dc3dc16 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -528,7 +528,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
528 int free; 528 int free;
529 /* 529 /*
530 * If value is NULL, it is remove operation. 530 * If value is NULL, it is remove operation.
531 * In case of update operation, we caculate free. 531 * In case of update operation, we calculate free.
532 */ 532 */
533 free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); 533 free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr);
534 if (found) 534 if (found)
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e6ee5b6e8d99..f0b945ab853e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -359,7 +359,7 @@ static inline void release_metapath(struct metapath *mp)
359 * Returns: The length of the extent (minimum of one block) 359 * Returns: The length of the extent (minimum of one block)
360 */ 360 */
361 361
362static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, unsigned limit, int *eob) 362static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
363{ 363{
364 const __be64 *end = (start + len); 364 const __be64 *end = (start + len);
365 const __be64 *first = ptr; 365 const __be64 *first = ptr;
@@ -449,7 +449,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
449 struct buffer_head *bh_map, struct metapath *mp, 449 struct buffer_head *bh_map, struct metapath *mp,
450 const unsigned int sheight, 450 const unsigned int sheight,
451 const unsigned int height, 451 const unsigned int height,
452 const unsigned int maxlen) 452 const size_t maxlen)
453{ 453{
454 struct gfs2_inode *ip = GFS2_I(inode); 454 struct gfs2_inode *ip = GFS2_I(inode);
455 struct gfs2_sbd *sdp = GFS2_SB(inode); 455 struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -483,7 +483,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
483 } else { 483 } else {
484 /* Need to allocate indirect blocks */ 484 /* Need to allocate indirect blocks */
485 ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; 485 ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs;
486 dblks = min(maxlen, ptrs_per_blk - mp->mp_list[end_of_metadata]); 486 dblks = min(maxlen, (size_t)(ptrs_per_blk -
487 mp->mp_list[end_of_metadata]));
487 if (height == ip->i_height) { 488 if (height == ip->i_height) {
488 /* Writing into existing tree, extend tree down */ 489 /* Writing into existing tree, extend tree down */
489 iblks = height - sheight; 490 iblks = height - sheight;
@@ -605,7 +606,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
605 struct gfs2_inode *ip = GFS2_I(inode); 606 struct gfs2_inode *ip = GFS2_I(inode);
606 struct gfs2_sbd *sdp = GFS2_SB(inode); 607 struct gfs2_sbd *sdp = GFS2_SB(inode);
607 unsigned int bsize = sdp->sd_sb.sb_bsize; 608 unsigned int bsize = sdp->sd_sb.sb_bsize;
608 const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; 609 const size_t maxlen = bh_map->b_size >> inode->i_blkbits;
609 const u64 *arr = sdp->sd_heightsize; 610 const u64 *arr = sdp->sd_heightsize;
610 __be64 *ptr; 611 __be64 *ptr;
611 u64 size; 612 u64 size;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 26b3f952e6b1..7f4ed3daa38c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -26,6 +26,7 @@
26#include <linux/dlm.h> 26#include <linux/dlm.h>
27#include <linux/dlm_plock.h> 27#include <linux/dlm_plock.h>
28#include <linux/aio.h> 28#include <linux/aio.h>
29#include <linux/delay.h>
29 30
30#include "gfs2.h" 31#include "gfs2.h"
31#include "incore.h" 32#include "incore.h"
@@ -979,9 +980,10 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
979 unsigned int state; 980 unsigned int state;
980 int flags; 981 int flags;
981 int error = 0; 982 int error = 0;
983 int sleeptime;
982 984
983 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; 985 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
984 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT; 986 flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT;
985 987
986 mutex_lock(&fp->f_fl_mutex); 988 mutex_lock(&fp->f_fl_mutex);
987 989
@@ -1001,7 +1003,14 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
1001 gfs2_holder_init(gl, state, flags, fl_gh); 1003 gfs2_holder_init(gl, state, flags, fl_gh);
1002 gfs2_glock_put(gl); 1004 gfs2_glock_put(gl);
1003 } 1005 }
1004 error = gfs2_glock_nq(fl_gh); 1006 for (sleeptime = 1; sleeptime <= 4; sleeptime <<= 1) {
1007 error = gfs2_glock_nq(fl_gh);
1008 if (error != GLR_TRYFAILED)
1009 break;
1010 fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT;
1011 fl_gh->gh_error = 0;
1012 msleep(sleeptime);
1013 }
1005 if (error) { 1014 if (error) {
1006 gfs2_holder_uninit(fl_gh); 1015 gfs2_holder_uninit(fl_gh);
1007 if (error == GLR_TRYFAILED) 1016 if (error == GLR_TRYFAILED)
@@ -1024,7 +1033,7 @@ static void do_unflock(struct file *file, struct file_lock *fl)
1024 mutex_lock(&fp->f_fl_mutex); 1033 mutex_lock(&fp->f_fl_mutex);
1025 flock_lock_file_wait(file, fl); 1034 flock_lock_file_wait(file, fl);
1026 if (fl_gh->gh_gl) { 1035 if (fl_gh->gh_gl) {
1027 gfs2_glock_dq_wait(fl_gh); 1036 gfs2_glock_dq(fl_gh);
1028 gfs2_holder_uninit(fl_gh); 1037 gfs2_holder_uninit(fl_gh);
1029 } 1038 }
1030 mutex_unlock(&fp->f_fl_mutex); 1039 mutex_unlock(&fp->f_fl_mutex);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 67d310c9ada3..39e7e9959b74 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -262,6 +262,9 @@ struct gfs2_holder {
262 unsigned long gh_ip; 262 unsigned long gh_ip;
263}; 263};
264 264
265/* Number of quota types we support */
266#define GFS2_MAXQUOTAS 2
267
265/* Resource group multi-block reservation, in order of appearance: 268/* Resource group multi-block reservation, in order of appearance:
266 269
267 Step 1. Function prepares to write, allocates a mb, sets the size hint. 270 Step 1. Function prepares to write, allocates a mb, sets the size hint.
@@ -282,8 +285,8 @@ struct gfs2_blkreserv {
282 u64 rs_inum; /* Inode number for reservation */ 285 u64 rs_inum; /* Inode number for reservation */
283 286
284 /* ancillary quota stuff */ 287 /* ancillary quota stuff */
285 struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; 288 struct gfs2_quota_data *rs_qa_qd[2 * GFS2_MAXQUOTAS];
286 struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS]; 289 struct gfs2_holder rs_qa_qd_ghs[2 * GFS2_MAXQUOTAS];
287 unsigned int rs_qa_qd_num; 290 unsigned int rs_qa_qd_num;
288}; 291};
289 292
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e62e59477884..fc8ac2ee0667 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -626,8 +626,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
626 if (!IS_ERR(inode)) { 626 if (!IS_ERR(inode)) {
627 d = d_splice_alias(inode, dentry); 627 d = d_splice_alias(inode, dentry);
628 error = PTR_ERR(d); 628 error = PTR_ERR(d);
629 if (IS_ERR(d)) 629 if (IS_ERR(d)) {
630 inode = ERR_CAST(d);
630 goto fail_gunlock; 631 goto fail_gunlock;
632 }
631 error = 0; 633 error = 0;
632 if (file) { 634 if (file) {
633 if (S_ISREG(inode->i_mode)) { 635 if (S_ISREG(inode->i_mode)) {
@@ -840,8 +842,10 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
840 int error; 842 int error;
841 843
842 inode = gfs2_lookupi(dir, &dentry->d_name, 0); 844 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
843 if (!inode) 845 if (inode == NULL) {
846 d_add(dentry, NULL);
844 return NULL; 847 return NULL;
848 }
845 if (IS_ERR(inode)) 849 if (IS_ERR(inode))
846 return ERR_CAST(inode); 850 return ERR_CAST(inode);
847 851
@@ -854,7 +858,6 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
854 858
855 d = d_splice_alias(inode, dentry); 859 d = d_splice_alias(inode, dentry);
856 if (IS_ERR(d)) { 860 if (IS_ERR(d)) {
857 iput(inode);
858 gfs2_glock_dq_uninit(&gh); 861 gfs2_glock_dq_uninit(&gh);
859 return d; 862 return d;
860 } 863 }
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2607ff13d486..a346f56c4c6d 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1294,7 +1294,7 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1294 int val; 1294 int val;
1295 1295
1296 if (is_ancestor(root, sdp->sd_master_dir)) 1296 if (is_ancestor(root, sdp->sd_master_dir))
1297 seq_printf(s, ",meta"); 1297 seq_puts(s, ",meta");
1298 if (args->ar_lockproto[0]) 1298 if (args->ar_lockproto[0])
1299 seq_printf(s, ",lockproto=%s", args->ar_lockproto); 1299 seq_printf(s, ",lockproto=%s", args->ar_lockproto);
1300 if (args->ar_locktable[0]) 1300 if (args->ar_locktable[0])
@@ -1302,13 +1302,13 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1302 if (args->ar_hostdata[0]) 1302 if (args->ar_hostdata[0])
1303 seq_printf(s, ",hostdata=%s", args->ar_hostdata); 1303 seq_printf(s, ",hostdata=%s", args->ar_hostdata);
1304 if (args->ar_spectator) 1304 if (args->ar_spectator)
1305 seq_printf(s, ",spectator"); 1305 seq_puts(s, ",spectator");
1306 if (args->ar_localflocks) 1306 if (args->ar_localflocks)
1307 seq_printf(s, ",localflocks"); 1307 seq_puts(s, ",localflocks");
1308 if (args->ar_debug) 1308 if (args->ar_debug)
1309 seq_printf(s, ",debug"); 1309 seq_puts(s, ",debug");
1310 if (args->ar_posix_acl) 1310 if (args->ar_posix_acl)
1311 seq_printf(s, ",acl"); 1311 seq_puts(s, ",acl");
1312 if (args->ar_quota != GFS2_QUOTA_DEFAULT) { 1312 if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
1313 char *state; 1313 char *state;
1314 switch (args->ar_quota) { 1314 switch (args->ar_quota) {
@@ -1328,7 +1328,7 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1328 seq_printf(s, ",quota=%s", state); 1328 seq_printf(s, ",quota=%s", state);
1329 } 1329 }
1330 if (args->ar_suiddir) 1330 if (args->ar_suiddir)
1331 seq_printf(s, ",suiddir"); 1331 seq_puts(s, ",suiddir");
1332 if (args->ar_data != GFS2_DATA_DEFAULT) { 1332 if (args->ar_data != GFS2_DATA_DEFAULT) {
1333 char *state; 1333 char *state;
1334 switch (args->ar_data) { 1334 switch (args->ar_data) {
@@ -1345,7 +1345,7 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1345 seq_printf(s, ",data=%s", state); 1345 seq_printf(s, ",data=%s", state);
1346 } 1346 }
1347 if (args->ar_discard) 1347 if (args->ar_discard)
1348 seq_printf(s, ",discard"); 1348 seq_puts(s, ",discard");
1349 val = sdp->sd_tune.gt_logd_secs; 1349 val = sdp->sd_tune.gt_logd_secs;
1350 if (val != 30) 1350 if (val != 30)
1351 seq_printf(s, ",commit=%d", val); 1351 seq_printf(s, ",commit=%d", val);
@@ -1376,11 +1376,11 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1376 seq_printf(s, ",errors=%s", state); 1376 seq_printf(s, ",errors=%s", state);
1377 } 1377 }
1378 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) 1378 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
1379 seq_printf(s, ",nobarrier"); 1379 seq_puts(s, ",nobarrier");
1380 if (test_bit(SDF_DEMOTE, &sdp->sd_flags)) 1380 if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
1381 seq_printf(s, ",demote_interface_used"); 1381 seq_puts(s, ",demote_interface_used");
1382 if (args->ar_rgrplvb) 1382 if (args->ar_rgrplvb)
1383 seq_printf(s, ",rgrplvb"); 1383 seq_puts(s, ",rgrplvb");
1384 return 0; 1384 return 0;
1385} 1385}
1386 1386
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4556ce1af5b0..5ddaf8625d3b 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -61,7 +61,7 @@ static void isofs_put_super(struct super_block *sb)
61 return; 61 return;
62} 62}
63 63
64static int isofs_read_inode(struct inode *); 64static int isofs_read_inode(struct inode *, int relocated);
65static int isofs_statfs (struct dentry *, struct kstatfs *); 65static int isofs_statfs (struct dentry *, struct kstatfs *);
66 66
67static struct kmem_cache *isofs_inode_cachep; 67static struct kmem_cache *isofs_inode_cachep;
@@ -1259,7 +1259,7 @@ out_toomany:
1259 goto out; 1259 goto out;
1260} 1260}
1261 1261
1262static int isofs_read_inode(struct inode *inode) 1262static int isofs_read_inode(struct inode *inode, int relocated)
1263{ 1263{
1264 struct super_block *sb = inode->i_sb; 1264 struct super_block *sb = inode->i_sb;
1265 struct isofs_sb_info *sbi = ISOFS_SB(sb); 1265 struct isofs_sb_info *sbi = ISOFS_SB(sb);
@@ -1404,7 +1404,7 @@ static int isofs_read_inode(struct inode *inode)
1404 */ 1404 */
1405 1405
1406 if (!high_sierra) { 1406 if (!high_sierra) {
1407 parse_rock_ridge_inode(de, inode); 1407 parse_rock_ridge_inode(de, inode, relocated);
1408 /* if we want uid/gid set, override the rock ridge setting */ 1408 /* if we want uid/gid set, override the rock ridge setting */
1409 if (sbi->s_uid_set) 1409 if (sbi->s_uid_set)
1410 inode->i_uid = sbi->s_uid; 1410 inode->i_uid = sbi->s_uid;
@@ -1483,9 +1483,10 @@ static int isofs_iget5_set(struct inode *ino, void *data)
1483 * offset that point to the underlying meta-data for the inode. The 1483 * offset that point to the underlying meta-data for the inode. The
1484 * code below is otherwise similar to the iget() code in 1484 * code below is otherwise similar to the iget() code in
1485 * include/linux/fs.h */ 1485 * include/linux/fs.h */
1486struct inode *isofs_iget(struct super_block *sb, 1486struct inode *__isofs_iget(struct super_block *sb,
1487 unsigned long block, 1487 unsigned long block,
1488 unsigned long offset) 1488 unsigned long offset,
1489 int relocated)
1489{ 1490{
1490 unsigned long hashval; 1491 unsigned long hashval;
1491 struct inode *inode; 1492 struct inode *inode;
@@ -1507,7 +1508,7 @@ struct inode *isofs_iget(struct super_block *sb,
1507 return ERR_PTR(-ENOMEM); 1508 return ERR_PTR(-ENOMEM);
1508 1509
1509 if (inode->i_state & I_NEW) { 1510 if (inode->i_state & I_NEW) {
1510 ret = isofs_read_inode(inode); 1511 ret = isofs_read_inode(inode, relocated);
1511 if (ret < 0) { 1512 if (ret < 0) {
1512 iget_failed(inode); 1513 iget_failed(inode);
1513 inode = ERR_PTR(ret); 1514 inode = ERR_PTR(ret);
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 99167238518d..0ac4c1f73fbd 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -107,7 +107,7 @@ extern int iso_date(char *, int);
107 107
108struct inode; /* To make gcc happy */ 108struct inode; /* To make gcc happy */
109 109
110extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *); 110extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *, int relocated);
111extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *); 111extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *);
112extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *); 112extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *);
113 113
@@ -118,9 +118,24 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int
118extern struct buffer_head *isofs_bread(struct inode *, sector_t); 118extern struct buffer_head *isofs_bread(struct inode *, sector_t);
119extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); 119extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long);
120 120
121extern struct inode *isofs_iget(struct super_block *sb, 121struct inode *__isofs_iget(struct super_block *sb,
122 unsigned long block, 122 unsigned long block,
123 unsigned long offset); 123 unsigned long offset,
124 int relocated);
125
126static inline struct inode *isofs_iget(struct super_block *sb,
127 unsigned long block,
128 unsigned long offset)
129{
130 return __isofs_iget(sb, block, offset, 0);
131}
132
133static inline struct inode *isofs_iget_reloc(struct super_block *sb,
134 unsigned long block,
135 unsigned long offset)
136{
137 return __isofs_iget(sb, block, offset, 1);
138}
124 139
125/* Because the inode number is no longer relevant to finding the 140/* Because the inode number is no longer relevant to finding the
126 * underlying meta-data for an inode, we are free to choose a more 141 * underlying meta-data for an inode, we are free to choose a more
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c0bf42472e40..f488bbae541a 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -288,12 +288,16 @@ eio:
288 goto out; 288 goto out;
289} 289}
290 290
291#define RR_REGARD_XA 1
292#define RR_RELOC_DE 2
293
291static int 294static int
292parse_rock_ridge_inode_internal(struct iso_directory_record *de, 295parse_rock_ridge_inode_internal(struct iso_directory_record *de,
293 struct inode *inode, int regard_xa) 296 struct inode *inode, int flags)
294{ 297{
295 int symlink_len = 0; 298 int symlink_len = 0;
296 int cnt, sig; 299 int cnt, sig;
300 unsigned int reloc_block;
297 struct inode *reloc; 301 struct inode *reloc;
298 struct rock_ridge *rr; 302 struct rock_ridge *rr;
299 int rootflag; 303 int rootflag;
@@ -305,7 +309,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
305 309
306 init_rock_state(&rs, inode); 310 init_rock_state(&rs, inode);
307 setup_rock_ridge(de, inode, &rs); 311 setup_rock_ridge(de, inode, &rs);
308 if (regard_xa) { 312 if (flags & RR_REGARD_XA) {
309 rs.chr += 14; 313 rs.chr += 14;
310 rs.len -= 14; 314 rs.len -= 14;
311 if (rs.len < 0) 315 if (rs.len < 0)
@@ -485,12 +489,22 @@ repeat:
485 "relocated directory\n"); 489 "relocated directory\n");
486 goto out; 490 goto out;
487 case SIG('C', 'L'): 491 case SIG('C', 'L'):
488 ISOFS_I(inode)->i_first_extent = 492 if (flags & RR_RELOC_DE) {
489 isonum_733(rr->u.CL.location); 493 printk(KERN_ERR
490 reloc = 494 "ISOFS: Recursive directory relocation "
491 isofs_iget(inode->i_sb, 495 "is not supported\n");
492 ISOFS_I(inode)->i_first_extent, 496 goto eio;
493 0); 497 }
498 reloc_block = isonum_733(rr->u.CL.location);
499 if (reloc_block == ISOFS_I(inode)->i_iget5_block &&
500 ISOFS_I(inode)->i_iget5_offset == 0) {
501 printk(KERN_ERR
502 "ISOFS: Directory relocation points to "
503 "itself\n");
504 goto eio;
505 }
506 ISOFS_I(inode)->i_first_extent = reloc_block;
507 reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0);
494 if (IS_ERR(reloc)) { 508 if (IS_ERR(reloc)) {
495 ret = PTR_ERR(reloc); 509 ret = PTR_ERR(reloc);
496 goto out; 510 goto out;
@@ -637,9 +651,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
637 return rpnt; 651 return rpnt;
638} 652}
639 653
640int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) 654int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode,
655 int relocated)
641{ 656{
642 int result = parse_rock_ridge_inode_internal(de, inode, 0); 657 int flags = relocated ? RR_RELOC_DE : 0;
658 int result = parse_rock_ridge_inode_internal(de, inode, flags);
643 659
644 /* 660 /*
645 * if rockridge flag was reset and we didn't look for attributes 661 * if rockridge flag was reset and we didn't look for attributes
@@ -647,7 +663,8 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
647 */ 663 */
648 if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1) 664 if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1)
649 && (ISOFS_SB(inode->i_sb)->s_rock == 2)) { 665 && (ISOFS_SB(inode->i_sb)->s_rock == 2)) {
650 result = parse_rock_ridge_inode_internal(de, inode, 14); 666 result = parse_rock_ridge_inode_internal(de, inode,
667 flags | RR_REGARD_XA);
651 } 668 }
652 return result; 669 return result;
653} 670}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6fac74349856..b73e0215baa7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
97 struct commit_header *h; 97 struct commit_header *h;
98 __u32 csum; 98 __u32 csum;
99 99
100 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 100 if (!jbd2_journal_has_csum_v2or3(j))
101 return; 101 return;
102 102
103 h = (struct commit_header *)(bh->b_data); 103 h = (struct commit_header *)(bh->b_data);
@@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
313 return checksum; 313 return checksum;
314} 314}
315 315
316static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, 316static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
317 unsigned long long block) 317 unsigned long long block)
318{ 318{
319 tag->t_blocknr = cpu_to_be32(block & (u32)~0); 319 tag->t_blocknr = cpu_to_be32(block & (u32)~0);
320 if (tag_bytes > JBD2_TAG_SIZE32) 320 if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT))
321 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); 321 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
322} 322}
323 323
@@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j,
327 struct jbd2_journal_block_tail *tail; 327 struct jbd2_journal_block_tail *tail;
328 __u32 csum; 328 __u32 csum;
329 329
330 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 330 if (!jbd2_journal_has_csum_v2or3(j))
331 return; 331 return;
332 332
333 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - 333 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
@@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j,
340static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, 340static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
341 struct buffer_head *bh, __u32 sequence) 341 struct buffer_head *bh, __u32 sequence)
342{ 342{
343 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
343 struct page *page = bh->b_page; 344 struct page *page = bh->b_page;
344 __u8 *addr; 345 __u8 *addr;
345 __u32 csum32; 346 __u32 csum32;
346 __be32 seq; 347 __be32 seq;
347 348
348 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 349 if (!jbd2_journal_has_csum_v2or3(j))
349 return; 350 return;
350 351
351 seq = cpu_to_be32(sequence); 352 seq = cpu_to_be32(sequence);
@@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
355 bh->b_size); 356 bh->b_size);
356 kunmap_atomic(addr); 357 kunmap_atomic(addr);
357 358
358 /* We only have space to store the lower 16 bits of the crc32c. */ 359 if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
359 tag->t_checksum = cpu_to_be16(csum32); 360 tag3->t_checksum = cpu_to_be32(csum32);
361 else
362 tag->t_checksum = cpu_to_be16(csum32);
360} 363}
361/* 364/*
362 * jbd2_journal_commit_transaction 365 * jbd2_journal_commit_transaction
@@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
396 LIST_HEAD(io_bufs); 399 LIST_HEAD(io_bufs);
397 LIST_HEAD(log_bufs); 400 LIST_HEAD(log_bufs);
398 401
399 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 402 if (jbd2_journal_has_csum_v2or3(journal))
400 csum_size = sizeof(struct jbd2_journal_block_tail); 403 csum_size = sizeof(struct jbd2_journal_block_tail);
401 404
402 /* 405 /*
@@ -690,7 +693,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
690 tag_flag |= JBD2_FLAG_SAME_UUID; 693 tag_flag |= JBD2_FLAG_SAME_UUID;
691 694
692 tag = (journal_block_tag_t *) tagp; 695 tag = (journal_block_tag_t *) tagp;
693 write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); 696 write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
694 tag->t_flags = cpu_to_be16(tag_flag); 697 tag->t_flags = cpu_to_be16(tag_flag);
695 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], 698 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
696 commit_transaction->t_tid); 699 commit_transaction->t_tid);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 67b8e303946c..19d74d86d99c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug);
124/* Checksumming functions */ 124/* Checksumming functions */
125static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 125static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
126{ 126{
127 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 127 if (!jbd2_journal_has_csum_v2or3(j))
128 return 1; 128 return 1;
129 129
130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; 130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
@@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
145 145
146static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) 146static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
147{ 147{
148 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 148 if (!jbd2_journal_has_csum_v2or3(j))
149 return 1; 149 return 1;
150 150
151 return sb->s_checksum == jbd2_superblock_csum(j, sb); 151 return sb->s_checksum == jbd2_superblock_csum(j, sb);
@@ -153,7 +153,7 @@ static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
153 153
154static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) 154static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
155{ 155{
156 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 156 if (!jbd2_journal_has_csum_v2or3(j))
157 return; 157 return;
158 158
159 sb->s_checksum = jbd2_superblock_csum(j, sb); 159 sb->s_checksum = jbd2_superblock_csum(j, sb);
@@ -1522,21 +1522,29 @@ static int journal_get_superblock(journal_t *journal)
1522 goto out; 1522 goto out;
1523 } 1523 }
1524 1524
1525 if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && 1525 if (jbd2_journal_has_csum_v2or3(journal) &&
1526 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1526 JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
1527 /* Can't have checksum v1 and v2 on at the same time! */ 1527 /* Can't have checksum v1 and v2 on at the same time! */
1528 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " 1528 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
1529 "at the same time!\n"); 1529 "at the same time!\n");
1530 goto out; 1530 goto out;
1531 } 1531 }
1532 1532
1533 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
1534 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
1535 /* Can't have checksum v2 and v3 at the same time! */
1536 printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
1537 "at the same time!\n");
1538 goto out;
1539 }
1540
1533 if (!jbd2_verify_csum_type(journal, sb)) { 1541 if (!jbd2_verify_csum_type(journal, sb)) {
1534 printk(KERN_ERR "JBD2: Unknown checksum type\n"); 1542 printk(KERN_ERR "JBD2: Unknown checksum type\n");
1535 goto out; 1543 goto out;
1536 } 1544 }
1537 1545
1538 /* Load the checksum driver */ 1546 /* Load the checksum driver */
1539 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1547 if (jbd2_journal_has_csum_v2or3(journal)) {
1540 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 1548 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
1541 if (IS_ERR(journal->j_chksum_driver)) { 1549 if (IS_ERR(journal->j_chksum_driver)) {
1542 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); 1550 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
@@ -1553,7 +1561,7 @@ static int journal_get_superblock(journal_t *journal)
1553 } 1561 }
1554 1562
1555 /* Precompute checksum seed for all metadata */ 1563 /* Precompute checksum seed for all metadata */
1556 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 1564 if (jbd2_journal_has_csum_v2or3(journal))
1557 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 1565 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
1558 sizeof(sb->s_uuid)); 1566 sizeof(sb->s_uuid));
1559 1567
@@ -1813,8 +1821,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1813 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) 1821 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
1814 return 0; 1822 return 0;
1815 1823
1816 /* Asking for checksumming v2 and v1? Only give them v2. */ 1824 /* If enabling v2 checksums, turn on v3 instead */
1817 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && 1825 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
1826 incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
1827 incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3;
1828 }
1829
1830 /* Asking for checksumming v3 and v1? Only give them v3. */
1831 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
1818 compat & JBD2_FEATURE_COMPAT_CHECKSUM) 1832 compat & JBD2_FEATURE_COMPAT_CHECKSUM)
1819 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; 1833 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
1820 1834
@@ -1823,8 +1837,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1823 1837
1824 sb = journal->j_superblock; 1838 sb = journal->j_superblock;
1825 1839
1826 /* If enabling v2 checksums, update superblock */ 1840 /* If enabling v3 checksums, update superblock */
1827 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1841 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
1828 sb->s_checksum_type = JBD2_CRC32C_CHKSUM; 1842 sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
1829 sb->s_feature_compat &= 1843 sb->s_feature_compat &=
1830 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); 1844 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
@@ -1842,8 +1856,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1842 } 1856 }
1843 1857
1844 /* Precompute checksum seed for all metadata */ 1858 /* Precompute checksum seed for all metadata */
1845 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 1859 if (jbd2_journal_has_csum_v2or3(journal))
1846 JBD2_FEATURE_INCOMPAT_CSUM_V2))
1847 journal->j_csum_seed = jbd2_chksum(journal, ~0, 1860 journal->j_csum_seed = jbd2_chksum(journal, ~0,
1848 sb->s_uuid, 1861 sb->s_uuid,
1849 sizeof(sb->s_uuid)); 1862 sizeof(sb->s_uuid));
@@ -1852,7 +1865,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1852 /* If enabling v1 checksums, downgrade superblock */ 1865 /* If enabling v1 checksums, downgrade superblock */
1853 if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) 1866 if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
1854 sb->s_feature_incompat &= 1867 sb->s_feature_incompat &=
1855 ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); 1868 ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 |
1869 JBD2_FEATURE_INCOMPAT_CSUM_V3);
1856 1870
1857 sb->s_feature_compat |= cpu_to_be32(compat); 1871 sb->s_feature_compat |= cpu_to_be32(compat);
1858 sb->s_feature_ro_compat |= cpu_to_be32(ro); 1872 sb->s_feature_ro_compat |= cpu_to_be32(ro);
@@ -2165,16 +2179,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
2165 */ 2179 */
2166size_t journal_tag_bytes(journal_t *journal) 2180size_t journal_tag_bytes(journal_t *journal)
2167{ 2181{
2168 journal_block_tag_t tag; 2182 size_t sz;
2169 size_t x = 0; 2183
2184 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
2185 return sizeof(journal_block_tag3_t);
2186
2187 sz = sizeof(journal_block_tag_t);
2170 2188
2171 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 2189 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
2172 x += sizeof(tag.t_checksum); 2190 sz += sizeof(__u16);
2173 2191
2174 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 2192 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
2175 return x + JBD2_TAG_SIZE64; 2193 return sz;
2176 else 2194 else
2177 return x + JBD2_TAG_SIZE32; 2195 return sz - sizeof(__u32);
2178} 2196}
2179 2197
2180/* 2198/*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 3b6bb19d60b1..9b329b55ffe3 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
181 __be32 provided; 181 __be32 provided;
182 __u32 calculated; 182 __u32 calculated;
183 183
184 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 184 if (!jbd2_journal_has_csum_v2or3(j))
185 return 1; 185 return 1;
186 186
187 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - 187 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
@@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
205 int nr = 0, size = journal->j_blocksize; 205 int nr = 0, size = journal->j_blocksize;
206 int tag_bytes = journal_tag_bytes(journal); 206 int tag_bytes = journal_tag_bytes(journal);
207 207
208 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 208 if (jbd2_journal_has_csum_v2or3(journal))
209 size -= sizeof(struct jbd2_journal_block_tail); 209 size -= sizeof(struct jbd2_journal_block_tail);
210 210
211 tagp = &bh->b_data[sizeof(journal_header_t)]; 211 tagp = &bh->b_data[sizeof(journal_header_t)];
@@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal)
338 return err; 338 return err;
339} 339}
340 340
341static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) 341static inline unsigned long long read_tag_block(journal_t *journal,
342 journal_block_tag_t *tag)
342{ 343{
343 unsigned long long block = be32_to_cpu(tag->t_blocknr); 344 unsigned long long block = be32_to_cpu(tag->t_blocknr);
344 if (tag_bytes > JBD2_TAG_SIZE32) 345 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
345 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 346 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
346 return block; 347 return block;
347} 348}
@@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
384 __be32 provided; 385 __be32 provided;
385 __u32 calculated; 386 __u32 calculated;
386 387
387 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 388 if (!jbd2_journal_has_csum_v2or3(j))
388 return 1; 389 return 1;
389 390
390 h = buf; 391 h = buf;
@@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
399static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 400static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
400 void *buf, __u32 sequence) 401 void *buf, __u32 sequence)
401{ 402{
403 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
402 __u32 csum32; 404 __u32 csum32;
403 __be32 seq; 405 __be32 seq;
404 406
405 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 407 if (!jbd2_journal_has_csum_v2or3(j))
406 return 1; 408 return 1;
407 409
408 seq = cpu_to_be32(sequence); 410 seq = cpu_to_be32(sequence);
409 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 411 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
410 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 412 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
411 413
412 return tag->t_checksum == cpu_to_be16(csum32); 414 if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
415 return tag3->t_checksum == cpu_to_be32(csum32);
416 else
417 return tag->t_checksum == cpu_to_be16(csum32);
413} 418}
414 419
415static int do_one_pass(journal_t *journal, 420static int do_one_pass(journal_t *journal,
@@ -426,6 +431,7 @@ static int do_one_pass(journal_t *journal,
426 int tag_bytes = journal_tag_bytes(journal); 431 int tag_bytes = journal_tag_bytes(journal);
427 __u32 crc32_sum = ~0; /* Transactional Checksums */ 432 __u32 crc32_sum = ~0; /* Transactional Checksums */
428 int descr_csum_size = 0; 433 int descr_csum_size = 0;
434 int block_error = 0;
429 435
430 /* 436 /*
431 * First thing is to establish what we expect to find in the log 437 * First thing is to establish what we expect to find in the log
@@ -512,8 +518,7 @@ static int do_one_pass(journal_t *journal,
512 switch(blocktype) { 518 switch(blocktype) {
513 case JBD2_DESCRIPTOR_BLOCK: 519 case JBD2_DESCRIPTOR_BLOCK:
514 /* Verify checksum first */ 520 /* Verify checksum first */
515 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 521 if (jbd2_journal_has_csum_v2or3(journal))
516 JBD2_FEATURE_INCOMPAT_CSUM_V2))
517 descr_csum_size = 522 descr_csum_size =
518 sizeof(struct jbd2_journal_block_tail); 523 sizeof(struct jbd2_journal_block_tail);
519 if (descr_csum_size > 0 && 524 if (descr_csum_size > 0 &&
@@ -574,7 +579,7 @@ static int do_one_pass(journal_t *journal,
574 unsigned long long blocknr; 579 unsigned long long blocknr;
575 580
576 J_ASSERT(obh != NULL); 581 J_ASSERT(obh != NULL);
577 blocknr = read_tag_block(tag_bytes, 582 blocknr = read_tag_block(journal,
578 tag); 583 tag);
579 584
580 /* If the block has been 585 /* If the block has been
@@ -598,7 +603,8 @@ static int do_one_pass(journal_t *journal,
598 "checksum recovering " 603 "checksum recovering "
599 "block %llu in log\n", 604 "block %llu in log\n",
600 blocknr); 605 blocknr);
601 continue; 606 block_error = 1;
607 goto skip_write;
602 } 608 }
603 609
604 /* Find a buffer for the new 610 /* Find a buffer for the new
@@ -797,7 +803,8 @@ static int do_one_pass(journal_t *journal,
797 success = -EIO; 803 success = -EIO;
798 } 804 }
799 } 805 }
800 806 if (block_error && success == 0)
807 success = -EIO;
801 return success; 808 return success;
802 809
803 failed: 810 failed:
@@ -811,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
811 __be32 provided; 818 __be32 provided;
812 __u32 calculated; 819 __u32 calculated;
813 820
814 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 821 if (!jbd2_journal_has_csum_v2or3(j))
815 return 1; 822 return 1;
816 823
817 tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - 824 tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 198c9c10276d..d5e95a175c92 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -91,8 +91,8 @@
91#include <linux/list.h> 91#include <linux/list.h>
92#include <linux/init.h> 92#include <linux/init.h>
93#include <linux/bio.h> 93#include <linux/bio.h>
94#endif
95#include <linux/log2.h> 94#include <linux/log2.h>
95#endif
96 96
97static struct kmem_cache *jbd2_revoke_record_cache; 97static struct kmem_cache *jbd2_revoke_record_cache;
98static struct kmem_cache *jbd2_revoke_table_cache; 98static struct kmem_cache *jbd2_revoke_table_cache;
@@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal,
597 offset = *offsetp; 597 offset = *offsetp;
598 598
599 /* Do we need to leave space at the end for a checksum? */ 599 /* Do we need to leave space at the end for a checksum? */
600 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 600 if (jbd2_journal_has_csum_v2or3(journal))
601 csum_size = sizeof(struct jbd2_journal_revoke_tail); 601 csum_size = sizeof(struct jbd2_journal_revoke_tail);
602 602
603 /* Make sure we have a descriptor with space left for the record */ 603 /* Make sure we have a descriptor with space left for the record */
@@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
644 struct jbd2_journal_revoke_tail *tail; 644 struct jbd2_journal_revoke_tail *tail;
645 __u32 csum; 645 __u32 csum;
646 646
647 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 647 if (!jbd2_journal_has_csum_v2or3(j))
648 return; 648 return;
649 649
650 tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - 650 tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 8f27c93f8d2e..ec9e082f9ecd 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -253,13 +253,11 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net)
253 253
254 error = make_socks(serv, net); 254 error = make_socks(serv, net);
255 if (error < 0) 255 if (error < 0)
256 goto err_socks; 256 goto err_bind;
257 set_grace_period(net); 257 set_grace_period(net);
258 dprintk("lockd_up_net: per-net data created; net=%p\n", net); 258 dprintk("lockd_up_net: per-net data created; net=%p\n", net);
259 return 0; 259 return 0;
260 260
261err_socks:
262 svc_rpcb_cleanup(serv, net);
263err_bind: 261err_bind:
264 ln->nlmsvc_users--; 262 ln->nlmsvc_users--;
265 return error; 263 return error;
diff --git a/fs/locks.c b/fs/locks.c
index cb66fb05ad4a..bb08857f90b5 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1619,7 +1619,7 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
1619 smp_mb(); 1619 smp_mb();
1620 error = check_conflicting_open(dentry, arg); 1620 error = check_conflicting_open(dentry, arg);
1621 if (error) 1621 if (error)
1622 locks_unlink_lock(flp); 1622 locks_unlink_lock(before);
1623out: 1623out:
1624 if (is_deleg) 1624 if (is_deleg)
1625 mutex_unlock(&inode->i_mutex); 1625 mutex_unlock(&inode->i_mutex);
diff --git a/fs/namei.c b/fs/namei.c
index a996bb48dfab..a7b05bf82d31 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -34,6 +34,7 @@
34#include <linux/device_cgroup.h> 34#include <linux/device_cgroup.h>
35#include <linux/fs_struct.h> 35#include <linux/fs_struct.h>
36#include <linux/posix_acl.h> 36#include <linux/posix_acl.h>
37#include <linux/hash.h>
37#include <asm/uaccess.h> 38#include <asm/uaccess.h>
38 39
39#include "internal.h" 40#include "internal.h"
@@ -643,24 +644,22 @@ static int complete_walk(struct nameidata *nd)
643 644
644static __always_inline void set_root(struct nameidata *nd) 645static __always_inline void set_root(struct nameidata *nd)
645{ 646{
646 if (!nd->root.mnt) 647 get_fs_root(current->fs, &nd->root);
647 get_fs_root(current->fs, &nd->root);
648} 648}
649 649
650static int link_path_walk(const char *, struct nameidata *); 650static int link_path_walk(const char *, struct nameidata *);
651 651
652static __always_inline void set_root_rcu(struct nameidata *nd) 652static __always_inline unsigned set_root_rcu(struct nameidata *nd)
653{ 653{
654 if (!nd->root.mnt) { 654 struct fs_struct *fs = current->fs;
655 struct fs_struct *fs = current->fs; 655 unsigned seq, res;
656 unsigned seq;
657 656
658 do { 657 do {
659 seq = read_seqcount_begin(&fs->seq); 658 seq = read_seqcount_begin(&fs->seq);
660 nd->root = fs->root; 659 nd->root = fs->root;
661 nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq); 660 res = __read_seqcount_begin(&nd->root.dentry->d_seq);
662 } while (read_seqcount_retry(&fs->seq, seq)); 661 } while (read_seqcount_retry(&fs->seq, seq));
663 } 662 return res;
664} 663}
665 664
666static void path_put_conditional(struct path *path, struct nameidata *nd) 665static void path_put_conditional(struct path *path, struct nameidata *nd)
@@ -860,7 +859,8 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
860 return PTR_ERR(s); 859 return PTR_ERR(s);
861 } 860 }
862 if (*s == '/') { 861 if (*s == '/') {
863 set_root(nd); 862 if (!nd->root.mnt)
863 set_root(nd);
864 path_put(&nd->path); 864 path_put(&nd->path);
865 nd->path = nd->root; 865 nd->path = nd->root;
866 path_get(&nd->root); 866 path_get(&nd->root);
@@ -1137,13 +1137,15 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1137 */ 1137 */
1138 *inode = path->dentry->d_inode; 1138 *inode = path->dentry->d_inode;
1139 } 1139 }
1140 return read_seqretry(&mount_lock, nd->m_seq) && 1140 return !read_seqretry(&mount_lock, nd->m_seq) &&
1141 !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); 1141 !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
1142} 1142}
1143 1143
1144static int follow_dotdot_rcu(struct nameidata *nd) 1144static int follow_dotdot_rcu(struct nameidata *nd)
1145{ 1145{
1146 set_root_rcu(nd); 1146 struct inode *inode = nd->inode;
1147 if (!nd->root.mnt)
1148 set_root_rcu(nd);
1147 1149
1148 while (1) { 1150 while (1) {
1149 if (nd->path.dentry == nd->root.dentry && 1151 if (nd->path.dentry == nd->root.dentry &&
@@ -1155,6 +1157,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1155 struct dentry *parent = old->d_parent; 1157 struct dentry *parent = old->d_parent;
1156 unsigned seq; 1158 unsigned seq;
1157 1159
1160 inode = parent->d_inode;
1158 seq = read_seqcount_begin(&parent->d_seq); 1161 seq = read_seqcount_begin(&parent->d_seq);
1159 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1162 if (read_seqcount_retry(&old->d_seq, nd->seq))
1160 goto failed; 1163 goto failed;
@@ -1164,6 +1167,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1164 } 1167 }
1165 if (!follow_up_rcu(&nd->path)) 1168 if (!follow_up_rcu(&nd->path))
1166 break; 1169 break;
1170 inode = nd->path.dentry->d_inode;
1167 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); 1171 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1168 } 1172 }
1169 while (d_mountpoint(nd->path.dentry)) { 1173 while (d_mountpoint(nd->path.dentry)) {
@@ -1173,11 +1177,12 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1173 break; 1177 break;
1174 nd->path.mnt = &mounted->mnt; 1178 nd->path.mnt = &mounted->mnt;
1175 nd->path.dentry = mounted->mnt.mnt_root; 1179 nd->path.dentry = mounted->mnt.mnt_root;
1180 inode = nd->path.dentry->d_inode;
1176 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); 1181 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
1177 if (!read_seqretry(&mount_lock, nd->m_seq)) 1182 if (read_seqretry(&mount_lock, nd->m_seq))
1178 goto failed; 1183 goto failed;
1179 } 1184 }
1180 nd->inode = nd->path.dentry->d_inode; 1185 nd->inode = inode;
1181 return 0; 1186 return 0;
1182 1187
1183failed: 1188failed:
@@ -1256,7 +1261,8 @@ static void follow_mount(struct path *path)
1256 1261
1257static void follow_dotdot(struct nameidata *nd) 1262static void follow_dotdot(struct nameidata *nd)
1258{ 1263{
1259 set_root(nd); 1264 if (!nd->root.mnt)
1265 set_root(nd);
1260 1266
1261 while(1) { 1267 while(1) {
1262 struct dentry *old = nd->path.dentry; 1268 struct dentry *old = nd->path.dentry;
@@ -1634,8 +1640,7 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1634 1640
1635static inline unsigned int fold_hash(unsigned long hash) 1641static inline unsigned int fold_hash(unsigned long hash)
1636{ 1642{
1637 hash += hash >> (8*sizeof(int)); 1643 return hash_64(hash, 32);
1638 return hash;
1639} 1644}
1640 1645
1641#else /* 32-bit case */ 1646#else /* 32-bit case */
@@ -1669,9 +1674,9 @@ EXPORT_SYMBOL(full_name_hash);
1669 1674
1670/* 1675/*
1671 * Calculate the length and hash of the path component, and 1676 * Calculate the length and hash of the path component, and
1672 * return the length of the component; 1677 * return the "hash_len" as the result.
1673 */ 1678 */
1674static inline unsigned long hash_name(const char *name, unsigned int *hashp) 1679static inline u64 hash_name(const char *name)
1675{ 1680{
1676 unsigned long a, b, adata, bdata, mask, hash, len; 1681 unsigned long a, b, adata, bdata, mask, hash, len;
1677 const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; 1682 const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
@@ -1691,9 +1696,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1691 mask = create_zero_mask(adata | bdata); 1696 mask = create_zero_mask(adata | bdata);
1692 1697
1693 hash += a & zero_bytemask(mask); 1698 hash += a & zero_bytemask(mask);
1694 *hashp = fold_hash(hash); 1699 len += find_zero(mask);
1695 1700 return hashlen_create(fold_hash(hash), len);
1696 return len + find_zero(mask);
1697} 1701}
1698 1702
1699#else 1703#else
@@ -1711,7 +1715,7 @@ EXPORT_SYMBOL(full_name_hash);
1711 * We know there's a real path component here of at least 1715 * We know there's a real path component here of at least
1712 * one character. 1716 * one character.
1713 */ 1717 */
1714static inline unsigned long hash_name(const char *name, unsigned int *hashp) 1718static inline u64 hash_name(const char *name)
1715{ 1719{
1716 unsigned long hash = init_name_hash(); 1720 unsigned long hash = init_name_hash();
1717 unsigned long len = 0, c; 1721 unsigned long len = 0, c;
@@ -1722,8 +1726,7 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
1722 hash = partial_name_hash(c, hash); 1726 hash = partial_name_hash(c, hash);
1723 c = (unsigned char)name[len]; 1727 c = (unsigned char)name[len];
1724 } while (c && c != '/'); 1728 } while (c && c != '/');
1725 *hashp = end_name_hash(hash); 1729 return hashlen_create(end_name_hash(hash), len);
1726 return len;
1727} 1730}
1728 1731
1729#endif 1732#endif
@@ -1748,20 +1751,17 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1748 1751
1749 /* At this point we know we have a real path component. */ 1752 /* At this point we know we have a real path component. */
1750 for(;;) { 1753 for(;;) {
1751 struct qstr this; 1754 u64 hash_len;
1752 long len;
1753 int type; 1755 int type;
1754 1756
1755 err = may_lookup(nd); 1757 err = may_lookup(nd);
1756 if (err) 1758 if (err)
1757 break; 1759 break;
1758 1760
1759 len = hash_name(name, &this.hash); 1761 hash_len = hash_name(name);
1760 this.name = name;
1761 this.len = len;
1762 1762
1763 type = LAST_NORM; 1763 type = LAST_NORM;
1764 if (name[0] == '.') switch (len) { 1764 if (name[0] == '.') switch (hashlen_len(hash_len)) {
1765 case 2: 1765 case 2:
1766 if (name[1] == '.') { 1766 if (name[1] == '.') {
1767 type = LAST_DOTDOT; 1767 type = LAST_DOTDOT;
@@ -1775,29 +1775,32 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1775 struct dentry *parent = nd->path.dentry; 1775 struct dentry *parent = nd->path.dentry;
1776 nd->flags &= ~LOOKUP_JUMPED; 1776 nd->flags &= ~LOOKUP_JUMPED;
1777 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { 1777 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1778 struct qstr this = { { .hash_len = hash_len }, .name = name };
1778 err = parent->d_op->d_hash(parent, &this); 1779 err = parent->d_op->d_hash(parent, &this);
1779 if (err < 0) 1780 if (err < 0)
1780 break; 1781 break;
1782 hash_len = this.hash_len;
1783 name = this.name;
1781 } 1784 }
1782 } 1785 }
1783 1786
1784 nd->last = this; 1787 nd->last.hash_len = hash_len;
1788 nd->last.name = name;
1785 nd->last_type = type; 1789 nd->last_type = type;
1786 1790
1787 if (!name[len]) 1791 name += hashlen_len(hash_len);
1792 if (!*name)
1788 return 0; 1793 return 0;
1789 /* 1794 /*
1790 * If it wasn't NUL, we know it was '/'. Skip that 1795 * If it wasn't NUL, we know it was '/'. Skip that
1791 * slash, and continue until no more slashes. 1796 * slash, and continue until no more slashes.
1792 */ 1797 */
1793 do { 1798 do {
1794 len++; 1799 name++;
1795 } while (unlikely(name[len] == '/')); 1800 } while (unlikely(*name == '/'));
1796 if (!name[len]) 1801 if (!*name)
1797 return 0; 1802 return 0;
1798 1803
1799 name += len;
1800
1801 err = walk_component(nd, &next, LOOKUP_FOLLOW); 1804 err = walk_component(nd, &next, LOOKUP_FOLLOW);
1802 if (err < 0) 1805 if (err < 0)
1803 return err; 1806 return err;
@@ -1852,7 +1855,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1852 if (*name=='/') { 1855 if (*name=='/') {
1853 if (flags & LOOKUP_RCU) { 1856 if (flags & LOOKUP_RCU) {
1854 rcu_read_lock(); 1857 rcu_read_lock();
1855 set_root_rcu(nd); 1858 nd->seq = set_root_rcu(nd);
1856 } else { 1859 } else {
1857 set_root(nd); 1860 set_root(nd);
1858 path_get(&nd->root); 1861 path_get(&nd->root);
@@ -1903,7 +1906,14 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1903 } 1906 }
1904 1907
1905 nd->inode = nd->path.dentry->d_inode; 1908 nd->inode = nd->path.dentry->d_inode;
1906 return 0; 1909 if (!(flags & LOOKUP_RCU))
1910 return 0;
1911 if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq)))
1912 return 0;
1913 if (!(nd->flags & LOOKUP_ROOT))
1914 nd->root.mnt = NULL;
1915 rcu_read_unlock();
1916 return -ECHILD;
1907} 1917}
1908 1918
1909static inline int lookup_last(struct nameidata *nd, struct path *path) 1919static inline int lookup_last(struct nameidata *nd, struct path *path)
diff --git a/fs/namespace.c b/fs/namespace.c
index a01c7730e9af..ef42d9bee212 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1217,6 +1217,11 @@ static void namespace_unlock(void)
1217 head.first->pprev = &head.first; 1217 head.first->pprev = &head.first;
1218 INIT_HLIST_HEAD(&unmounted); 1218 INIT_HLIST_HEAD(&unmounted);
1219 1219
1220 /* undo decrements we'd done in umount_tree() */
1221 hlist_for_each_entry(mnt, &head, mnt_hash)
1222 if (mnt->mnt_ex_mountpoint.mnt)
1223 mntget(mnt->mnt_ex_mountpoint.mnt);
1224
1220 up_write(&namespace_sem); 1225 up_write(&namespace_sem);
1221 1226
1222 synchronize_rcu(); 1227 synchronize_rcu();
@@ -1253,6 +1258,9 @@ void umount_tree(struct mount *mnt, int how)
1253 hlist_add_head(&p->mnt_hash, &tmp_list); 1258 hlist_add_head(&p->mnt_hash, &tmp_list);
1254 } 1259 }
1255 1260
1261 hlist_for_each_entry(p, &tmp_list, mnt_hash)
1262 list_del_init(&p->mnt_child);
1263
1256 if (how) 1264 if (how)
1257 propagate_umount(&tmp_list); 1265 propagate_umount(&tmp_list);
1258 1266
@@ -1263,9 +1271,9 @@ void umount_tree(struct mount *mnt, int how)
1263 p->mnt_ns = NULL; 1271 p->mnt_ns = NULL;
1264 if (how < 2) 1272 if (how < 2)
1265 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; 1273 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1266 list_del_init(&p->mnt_child);
1267 if (mnt_has_parent(p)) { 1274 if (mnt_has_parent(p)) {
1268 put_mountpoint(p->mnt_mp); 1275 put_mountpoint(p->mnt_mp);
1276 mnt_add_count(p->mnt_parent, -1);
1269 /* move the reference to mountpoint into ->mnt_ex_mountpoint */ 1277 /* move the reference to mountpoint into ->mnt_ex_mountpoint */
1270 p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; 1278 p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint;
1271 p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; 1279 p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 1c5ff6d58385..6a4f3666e273 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1412,24 +1412,18 @@ int nfs_fs_proc_net_init(struct net *net)
1412 p = proc_create("volumes", S_IFREG|S_IRUGO, 1412 p = proc_create("volumes", S_IFREG|S_IRUGO,
1413 nn->proc_nfsfs, &nfs_volume_list_fops); 1413 nn->proc_nfsfs, &nfs_volume_list_fops);
1414 if (!p) 1414 if (!p)
1415 goto error_2; 1415 goto error_1;
1416 return 0; 1416 return 0;
1417 1417
1418error_2:
1419 remove_proc_entry("servers", nn->proc_nfsfs);
1420error_1: 1418error_1:
1421 remove_proc_entry("fs/nfsfs", NULL); 1419 remove_proc_subtree("nfsfs", net->proc_net);
1422error_0: 1420error_0:
1423 return -ENOMEM; 1421 return -ENOMEM;
1424} 1422}
1425 1423
1426void nfs_fs_proc_net_exit(struct net *net) 1424void nfs_fs_proc_net_exit(struct net *net)
1427{ 1425{
1428 struct nfs_net *nn = net_generic(net, nfs_net_id); 1426 remove_proc_subtree("nfsfs", net->proc_net);
1429
1430 remove_proc_entry("volumes", nn->proc_nfsfs);
1431 remove_proc_entry("servers", nn->proc_nfsfs);
1432 remove_proc_entry("fs/nfsfs", NULL);
1433} 1427}
1434 1428
1435/* 1429/*
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 1359c4a27393..90978075f730 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -1269,11 +1269,12 @@ filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
1269static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx) 1269static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx)
1270{ 1270{
1271 struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; 1271 struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
1272 struct pnfs_commit_bucket *bucket = fl_cinfo->buckets; 1272 struct pnfs_commit_bucket *bucket;
1273 struct pnfs_layout_segment *freeme; 1273 struct pnfs_layout_segment *freeme;
1274 int i; 1274 int i;
1275 1275
1276 for (i = idx; i < fl_cinfo->nbuckets; i++, bucket++) { 1276 for (i = idx; i < fl_cinfo->nbuckets; i++) {
1277 bucket = &fl_cinfo->buckets[i];
1277 if (list_empty(&bucket->committing)) 1278 if (list_empty(&bucket->committing))
1278 continue; 1279 continue;
1279 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); 1280 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index d0fec260132a..24c6898159cc 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -129,7 +129,10 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
129 .rpc_argp = &args, 129 .rpc_argp = &args,
130 .rpc_resp = &fattr, 130 .rpc_resp = &fattr,
131 }; 131 };
132 int status; 132 int status = 0;
133
134 if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL))
135 goto out;
133 136
134 status = -EOPNOTSUPP; 137 status = -EOPNOTSUPP;
135 if (!nfs_server_capable(inode, NFS_CAP_ACLS)) 138 if (!nfs_server_capable(inode, NFS_CAP_ACLS))
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 92193eddb41d..a8b855ab4e22 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -130,16 +130,15 @@ enum {
130 */ 130 */
131 131
132struct nfs4_lock_state { 132struct nfs4_lock_state {
133 struct list_head ls_locks; /* Other lock stateids */ 133 struct list_head ls_locks; /* Other lock stateids */
134 struct nfs4_state * ls_state; /* Pointer to open state */ 134 struct nfs4_state * ls_state; /* Pointer to open state */
135#define NFS_LOCK_INITIALIZED 0 135#define NFS_LOCK_INITIALIZED 0
136#define NFS_LOCK_LOST 1 136#define NFS_LOCK_LOST 1
137 unsigned long ls_flags; 137 unsigned long ls_flags;
138 struct nfs_seqid_counter ls_seqid; 138 struct nfs_seqid_counter ls_seqid;
139 nfs4_stateid ls_stateid; 139 nfs4_stateid ls_stateid;
140 atomic_t ls_count; 140 atomic_t ls_count;
141 fl_owner_t ls_owner; 141 fl_owner_t ls_owner;
142 struct work_struct ls_release;
143}; 142};
144 143
145/* bits for nfs4_state->flags */ 144/* bits for nfs4_state->flags */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 53e435a95260..ffdb28d86cf8 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -482,6 +482,16 @@ int nfs40_walk_client_list(struct nfs_client *new,
482 482
483 spin_lock(&nn->nfs_client_lock); 483 spin_lock(&nn->nfs_client_lock);
484 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { 484 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
485
486 if (pos->rpc_ops != new->rpc_ops)
487 continue;
488
489 if (pos->cl_proto != new->cl_proto)
490 continue;
491
492 if (pos->cl_minorversion != new->cl_minorversion)
493 continue;
494
485 /* If "pos" isn't marked ready, we can't trust the 495 /* If "pos" isn't marked ready, we can't trust the
486 * remaining fields in "pos" */ 496 * remaining fields in "pos" */
487 if (pos->cl_cons_state > NFS_CS_READY) { 497 if (pos->cl_cons_state > NFS_CS_READY) {
@@ -501,15 +511,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
501 if (pos->cl_cons_state != NFS_CS_READY) 511 if (pos->cl_cons_state != NFS_CS_READY)
502 continue; 512 continue;
503 513
504 if (pos->rpc_ops != new->rpc_ops)
505 continue;
506
507 if (pos->cl_proto != new->cl_proto)
508 continue;
509
510 if (pos->cl_minorversion != new->cl_minorversion)
511 continue;
512
513 if (pos->cl_clientid != new->cl_clientid) 514 if (pos->cl_clientid != new->cl_clientid)
514 continue; 515 continue;
515 516
@@ -622,6 +623,16 @@ int nfs41_walk_client_list(struct nfs_client *new,
622 623
623 spin_lock(&nn->nfs_client_lock); 624 spin_lock(&nn->nfs_client_lock);
624 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { 625 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
626
627 if (pos->rpc_ops != new->rpc_ops)
628 continue;
629
630 if (pos->cl_proto != new->cl_proto)
631 continue;
632
633 if (pos->cl_minorversion != new->cl_minorversion)
634 continue;
635
625 /* If "pos" isn't marked ready, we can't trust the 636 /* If "pos" isn't marked ready, we can't trust the
626 * remaining fields in "pos", especially the client 637 * remaining fields in "pos", especially the client
627 * ID and serverowner fields. Wait for CREATE_SESSION 638 * ID and serverowner fields. Wait for CREATE_SESSION
@@ -647,15 +658,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
647 if (pos->cl_cons_state != NFS_CS_READY) 658 if (pos->cl_cons_state != NFS_CS_READY)
648 continue; 659 continue;
649 660
650 if (pos->rpc_ops != new->rpc_ops)
651 continue;
652
653 if (pos->cl_proto != new->cl_proto)
654 continue;
655
656 if (pos->cl_minorversion != new->cl_minorversion)
657 continue;
658
659 if (!nfs4_match_clientids(pos, new)) 661 if (!nfs4_match_clientids(pos, new))
660 continue; 662 continue;
661 663
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 75ae8d22f067..6ca0c8e7a945 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2226,9 +2226,13 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
2226 ret = _nfs4_proc_open(opendata); 2226 ret = _nfs4_proc_open(opendata);
2227 if (ret != 0) { 2227 if (ret != 0) {
2228 if (ret == -ENOENT) { 2228 if (ret == -ENOENT) {
2229 d_drop(opendata->dentry); 2229 dentry = opendata->dentry;
2230 d_add(opendata->dentry, NULL); 2230 if (dentry->d_inode)
2231 nfs_set_verifier(opendata->dentry, 2231 d_delete(dentry);
2232 else if (d_unhashed(dentry))
2233 d_add(dentry, NULL);
2234
2235 nfs_set_verifier(dentry,
2232 nfs_save_change_attribute(opendata->dir->d_inode)); 2236 nfs_save_change_attribute(opendata->dir->d_inode));
2233 } 2237 }
2234 goto out; 2238 goto out;
@@ -2560,6 +2564,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2560 struct nfs4_closedata *calldata = data; 2564 struct nfs4_closedata *calldata = data;
2561 struct nfs4_state *state = calldata->state; 2565 struct nfs4_state *state = calldata->state;
2562 struct nfs_server *server = NFS_SERVER(calldata->inode); 2566 struct nfs_server *server = NFS_SERVER(calldata->inode);
2567 nfs4_stateid *res_stateid = NULL;
2563 2568
2564 dprintk("%s: begin!\n", __func__); 2569 dprintk("%s: begin!\n", __func__);
2565 if (!nfs4_sequence_done(task, &calldata->res.seq_res)) 2570 if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -2570,12 +2575,12 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2570 */ 2575 */
2571 switch (task->tk_status) { 2576 switch (task->tk_status) {
2572 case 0: 2577 case 0:
2573 if (calldata->roc) 2578 res_stateid = &calldata->res.stateid;
2579 if (calldata->arg.fmode == 0 && calldata->roc)
2574 pnfs_roc_set_barrier(state->inode, 2580 pnfs_roc_set_barrier(state->inode,
2575 calldata->roc_barrier); 2581 calldata->roc_barrier);
2576 nfs_clear_open_stateid(state, &calldata->res.stateid, 0);
2577 renew_lease(server, calldata->timestamp); 2582 renew_lease(server, calldata->timestamp);
2578 goto out_release; 2583 break;
2579 case -NFS4ERR_ADMIN_REVOKED: 2584 case -NFS4ERR_ADMIN_REVOKED:
2580 case -NFS4ERR_STALE_STATEID: 2585 case -NFS4ERR_STALE_STATEID:
2581 case -NFS4ERR_OLD_STATEID: 2586 case -NFS4ERR_OLD_STATEID:
@@ -2589,7 +2594,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2589 goto out_release; 2594 goto out_release;
2590 } 2595 }
2591 } 2596 }
2592 nfs_clear_open_stateid(state, NULL, calldata->arg.fmode); 2597 nfs_clear_open_stateid(state, res_stateid, calldata->arg.fmode);
2593out_release: 2598out_release:
2594 nfs_release_seqid(calldata->arg.seqid); 2599 nfs_release_seqid(calldata->arg.seqid);
2595 nfs_refresh_inode(calldata->inode, calldata->res.fattr); 2600 nfs_refresh_inode(calldata->inode, calldata->res.fattr);
@@ -2601,6 +2606,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2601 struct nfs4_closedata *calldata = data; 2606 struct nfs4_closedata *calldata = data;
2602 struct nfs4_state *state = calldata->state; 2607 struct nfs4_state *state = calldata->state;
2603 struct inode *inode = calldata->inode; 2608 struct inode *inode = calldata->inode;
2609 bool is_rdonly, is_wronly, is_rdwr;
2604 int call_close = 0; 2610 int call_close = 0;
2605 2611
2606 dprintk("%s: begin!\n", __func__); 2612 dprintk("%s: begin!\n", __func__);
@@ -2608,21 +2614,27 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2608 goto out_wait; 2614 goto out_wait;
2609 2615
2610 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 2616 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
2611 calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
2612 spin_lock(&state->owner->so_lock); 2617 spin_lock(&state->owner->so_lock);
2618 is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
2619 is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
2620 is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
2613 /* Calculate the change in open mode */ 2621 /* Calculate the change in open mode */
2622 calldata->arg.fmode = 0;
2614 if (state->n_rdwr == 0) { 2623 if (state->n_rdwr == 0) {
2615 if (state->n_rdonly == 0) { 2624 if (state->n_rdonly == 0)
2616 call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); 2625 call_close |= is_rdonly;
2617 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); 2626 else if (is_rdonly)
2618 calldata->arg.fmode &= ~FMODE_READ; 2627 calldata->arg.fmode |= FMODE_READ;
2619 } 2628 if (state->n_wronly == 0)
2620 if (state->n_wronly == 0) { 2629 call_close |= is_wronly;
2621 call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); 2630 else if (is_wronly)
2622 call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); 2631 calldata->arg.fmode |= FMODE_WRITE;
2623 calldata->arg.fmode &= ~FMODE_WRITE; 2632 } else if (is_rdwr)
2624 } 2633 calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
2625 } 2634
2635 if (calldata->arg.fmode == 0)
2636 call_close |= is_rdwr;
2637
2626 if (!nfs4_valid_open_stateid(state)) 2638 if (!nfs4_valid_open_stateid(state))
2627 call_close = 0; 2639 call_close = 0;
2628 spin_unlock(&state->owner->so_lock); 2640 spin_unlock(&state->owner->so_lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a043f618cd5a..22fe35104c0c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -799,18 +799,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
799 return NULL; 799 return NULL;
800} 800}
801 801
802static void
803free_lock_state_work(struct work_struct *work)
804{
805 struct nfs4_lock_state *lsp = container_of(work,
806 struct nfs4_lock_state, ls_release);
807 struct nfs4_state *state = lsp->ls_state;
808 struct nfs_server *server = state->owner->so_server;
809 struct nfs_client *clp = server->nfs_client;
810
811 clp->cl_mvops->free_lock_state(server, lsp);
812}
813
814/* 802/*
815 * Return a compatible lock_state. If no initialized lock_state structure 803 * Return a compatible lock_state. If no initialized lock_state structure
816 * exists, return an uninitialized one. 804 * exists, return an uninitialized one.
@@ -832,7 +820,6 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
832 if (lsp->ls_seqid.owner_id < 0) 820 if (lsp->ls_seqid.owner_id < 0)
833 goto out_free; 821 goto out_free;
834 INIT_LIST_HEAD(&lsp->ls_locks); 822 INIT_LIST_HEAD(&lsp->ls_locks);
835 INIT_WORK(&lsp->ls_release, free_lock_state_work);
836 return lsp; 823 return lsp;
837out_free: 824out_free:
838 kfree(lsp); 825 kfree(lsp);
@@ -896,12 +883,13 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
896 if (list_empty(&state->lock_states)) 883 if (list_empty(&state->lock_states))
897 clear_bit(LK_STATE_IN_USE, &state->flags); 884 clear_bit(LK_STATE_IN_USE, &state->flags);
898 spin_unlock(&state->state_lock); 885 spin_unlock(&state->state_lock);
899 if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) 886 server = state->owner->so_server;
900 queue_work(nfsiod_workqueue, &lsp->ls_release); 887 if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
901 else { 888 struct nfs_client *clp = server->nfs_client;
902 server = state->owner->so_server; 889
890 clp->cl_mvops->free_lock_state(server, lsp);
891 } else
903 nfs4_free_lock_state(server, lsp); 892 nfs4_free_lock_state(server, lsp);
904 }
905} 893}
906 894
907static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) 895static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ba491926df5f..be7cbce6e4c7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -116,7 +116,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
116 if (atomic_read(&c->io_count) == 0) 116 if (atomic_read(&c->io_count) == 0)
117 break; 117 break;
118 ret = nfs_wait_bit_killable(&q.key); 118 ret = nfs_wait_bit_killable(&q.key);
119 } while (atomic_read(&c->io_count) != 0); 119 } while (atomic_read(&c->io_count) != 0 && !ret);
120 finish_wait(wq, &q.wait); 120 finish_wait(wq, &q.wait);
121 return ret; 121 return ret;
122} 122}
@@ -139,26 +139,49 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
139/* 139/*
140 * nfs_page_group_lock - lock the head of the page group 140 * nfs_page_group_lock - lock the head of the page group
141 * @req - request in group that is to be locked 141 * @req - request in group that is to be locked
142 * @nonblock - if true don't block waiting for lock
142 * 143 *
143 * this lock must be held if modifying the page group list 144 * this lock must be held if modifying the page group list
144 * 145 *
145 * returns result from wait_on_bit_lock: 0 on success, < 0 on error 146 * return 0 on success, < 0 on error: -EDELAY if nonblocking or the
147 * result from wait_on_bit_lock
148 *
149 * NOTE: calling with nonblock=false should always have set the
150 * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock
151 * with TASK_UNINTERRUPTIBLE), so there is no need to check the result.
146 */ 152 */
147int 153int
148nfs_page_group_lock(struct nfs_page *req, bool wait) 154nfs_page_group_lock(struct nfs_page *req, bool nonblock)
149{ 155{
150 struct nfs_page *head = req->wb_head; 156 struct nfs_page *head = req->wb_head;
151 int ret;
152 157
153 WARN_ON_ONCE(head != head->wb_head); 158 WARN_ON_ONCE(head != head->wb_head);
154 159
155 do { 160 if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
156 ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, 161 return 0;
157 TASK_UNINTERRUPTIBLE);
158 } while (wait && ret != 0);
159 162
160 WARN_ON_ONCE(ret > 0); 163 if (!nonblock)
161 return ret; 164 return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
165 TASK_UNINTERRUPTIBLE);
166
167 return -EAGAIN;
168}
169
170/*
171 * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it
172 * @req - a request in the group
173 *
174 * This is a blocking call to wait for the group lock to be cleared.
175 */
176void
177nfs_page_group_lock_wait(struct nfs_page *req)
178{
179 struct nfs_page *head = req->wb_head;
180
181 WARN_ON_ONCE(head != head->wb_head);
182
183 wait_on_bit(&head->wb_flags, PG_HEADLOCK,
184 TASK_UNINTERRUPTIBLE);
162} 185}
163 186
164/* 187/*
@@ -219,7 +242,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
219{ 242{
220 bool ret; 243 bool ret;
221 244
222 nfs_page_group_lock(req, true); 245 nfs_page_group_lock(req, false);
223 ret = nfs_page_group_sync_on_bit_locked(req, bit); 246 ret = nfs_page_group_sync_on_bit_locked(req, bit);
224 nfs_page_group_unlock(req); 247 nfs_page_group_unlock(req);
225 248
@@ -701,10 +724,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
701 struct nfs_pgio_header *hdr) 724 struct nfs_pgio_header *hdr)
702{ 725{
703 struct nfs_page *req; 726 struct nfs_page *req;
704 struct page **pages; 727 struct page **pages,
728 *last_page;
705 struct list_head *head = &desc->pg_list; 729 struct list_head *head = &desc->pg_list;
706 struct nfs_commit_info cinfo; 730 struct nfs_commit_info cinfo;
707 unsigned int pagecount; 731 unsigned int pagecount, pageused;
708 732
709 pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); 733 pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count);
710 if (!nfs_pgarray_set(&hdr->page_array, pagecount)) 734 if (!nfs_pgarray_set(&hdr->page_array, pagecount))
@@ -712,12 +736,23 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
712 736
713 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); 737 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
714 pages = hdr->page_array.pagevec; 738 pages = hdr->page_array.pagevec;
739 last_page = NULL;
740 pageused = 0;
715 while (!list_empty(head)) { 741 while (!list_empty(head)) {
716 req = nfs_list_entry(head->next); 742 req = nfs_list_entry(head->next);
717 nfs_list_remove_request(req); 743 nfs_list_remove_request(req);
718 nfs_list_add_request(req, &hdr->pages); 744 nfs_list_add_request(req, &hdr->pages);
719 *pages++ = req->wb_page; 745
746 if (WARN_ON_ONCE(pageused >= pagecount))
747 return nfs_pgio_error(desc, hdr);
748
749 if (!last_page || last_page != req->wb_page) {
750 *pages++ = last_page = req->wb_page;
751 pageused++;
752 }
720 } 753 }
754 if (WARN_ON_ONCE(pageused != pagecount))
755 return nfs_pgio_error(desc, hdr);
721 756
722 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 757 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
723 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) 758 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
@@ -788,6 +823,14 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
788 return false; 823 return false;
789 if (req_offset(req) != req_offset(prev) + prev->wb_bytes) 824 if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
790 return false; 825 return false;
826 if (req->wb_page == prev->wb_page) {
827 if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes)
828 return false;
829 } else {
830 if (req->wb_pgbase != 0 ||
831 prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
832 return false;
833 }
791 } 834 }
792 size = pgio->pg_ops->pg_test(pgio, prev, req); 835 size = pgio->pg_ops->pg_test(pgio, prev, req);
793 WARN_ON_ONCE(size > req->wb_bytes); 836 WARN_ON_ONCE(size > req->wb_bytes);
@@ -858,13 +901,8 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
858 struct nfs_page *subreq; 901 struct nfs_page *subreq;
859 unsigned int bytes_left = 0; 902 unsigned int bytes_left = 0;
860 unsigned int offset, pgbase; 903 unsigned int offset, pgbase;
861 int ret;
862 904
863 ret = nfs_page_group_lock(req, false); 905 nfs_page_group_lock(req, false);
864 if (ret < 0) {
865 desc->pg_error = ret;
866 return 0;
867 }
868 906
869 subreq = req; 907 subreq = req;
870 bytes_left = subreq->wb_bytes; 908 bytes_left = subreq->wb_bytes;
@@ -886,11 +924,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
886 if (desc->pg_recoalesce) 924 if (desc->pg_recoalesce)
887 return 0; 925 return 0;
888 /* retry add_request for this subreq */ 926 /* retry add_request for this subreq */
889 ret = nfs_page_group_lock(req, false); 927 nfs_page_group_lock(req, false);
890 if (ret < 0) {
891 desc->pg_error = ret;
892 return 0;
893 }
894 continue; 928 continue;
895 } 929 }
896 930
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e3b5cf28bdc5..175d5d073ccf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -241,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
241 unsigned int pos = 0; 241 unsigned int pos = 0;
242 unsigned int len = nfs_page_length(req->wb_page); 242 unsigned int len = nfs_page_length(req->wb_page);
243 243
244 nfs_page_group_lock(req, true); 244 nfs_page_group_lock(req, false);
245 245
246 do { 246 do {
247 tmp = nfs_page_group_search_locked(req->wb_head, pos); 247 tmp = nfs_page_group_search_locked(req->wb_head, pos);
@@ -478,10 +478,23 @@ try_again:
478 return NULL; 478 return NULL;
479 } 479 }
480 480
481 /* lock each request in the page group */ 481 /* holding inode lock, so always make a non-blocking call to try the
482 ret = nfs_page_group_lock(head, false); 482 * page group lock */
483 if (ret < 0) 483 ret = nfs_page_group_lock(head, true);
484 if (ret < 0) {
485 spin_unlock(&inode->i_lock);
486
487 if (!nonblock && ret == -EAGAIN) {
488 nfs_page_group_lock_wait(head);
489 nfs_release_request(head);
490 goto try_again;
491 }
492
493 nfs_release_request(head);
484 return ERR_PTR(ret); 494 return ERR_PTR(ret);
495 }
496
497 /* lock each request in the page group */
485 subreq = head; 498 subreq = head;
486 do { 499 do {
487 /* 500 /*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f9821ce6658a..e94457c33ad6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2657,6 +2657,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
2657 struct xdr_stream *xdr = cd->xdr; 2657 struct xdr_stream *xdr = cd->xdr;
2658 int start_offset = xdr->buf->len; 2658 int start_offset = xdr->buf->len;
2659 int cookie_offset; 2659 int cookie_offset;
2660 u32 name_and_cookie;
2660 int entry_bytes; 2661 int entry_bytes;
2661 __be32 nfserr = nfserr_toosmall; 2662 __be32 nfserr = nfserr_toosmall;
2662 __be64 wire_offset; 2663 __be64 wire_offset;
@@ -2718,7 +2719,14 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
2718 cd->rd_maxcount -= entry_bytes; 2719 cd->rd_maxcount -= entry_bytes;
2719 if (!cd->rd_dircount) 2720 if (!cd->rd_dircount)
2720 goto fail; 2721 goto fail;
2721 cd->rd_dircount--; 2722 /*
2723 * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so
2724 * let's always let through the first entry, at least:
2725 */
2726 name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8;
2727 if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
2728 goto fail;
2729 cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
2722 cd->cookie_offset = cookie_offset; 2730 cd->cookie_offset = cookie_offset;
2723skip_entry: 2731skip_entry:
2724 cd->common.err = nfs_ok; 2732 cd->common.err = nfs_ok;
@@ -3321,6 +3329,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
3321 } 3329 }
3322 maxcount = min_t(int, maxcount-16, bytes_left); 3330 maxcount = min_t(int, maxcount-16, bytes_left);
3323 3331
3332 /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */
3333 if (!readdir->rd_dircount)
3334 readdir->rd_dircount = INT_MAX;
3335
3324 readdir->xdr = xdr; 3336 readdir->xdr = xdr;
3325 readdir->rd_maxcount = maxcount; 3337 readdir->rd_maxcount = maxcount;
3326 readdir->common.err = 0; 3338 readdir->common.err = 0;
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 238a5930cb3c..9d7e2b9659cb 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -42,7 +42,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
42{ 42{
43 struct { 43 struct {
44 struct file_handle handle; 44 struct file_handle handle;
45 u8 pad[64]; 45 u8 pad[MAX_HANDLE_SZ];
46 } f; 46 } f;
47 int size, ret, i; 47 int size, ret, i;
48 48
@@ -50,7 +50,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
50 size = f.handle.handle_bytes >> 2; 50 size = f.handle.handle_bytes >> 2;
51 51
52 ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0); 52 ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0);
53 if ((ret == 255) || (ret == -ENOSPC)) { 53 if ((ret == FILEID_INVALID) || (ret < 0)) {
54 WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); 54 WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
55 return 0; 55 return 0;
56 } 56 }
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 1ec141e758d7..62e8ec619b4c 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -160,9 +160,18 @@ static void o2quo_make_decision(struct work_struct *work)
160 } 160 }
161 161
162out: 162out:
163 spin_unlock(&qs->qs_lock); 163 if (fence) {
164 if (fence) 164 spin_unlock(&qs->qs_lock);
165 o2quo_fence_self(); 165 o2quo_fence_self();
166 } else {
167 mlog(ML_NOTICE, "not fencing this node, heartbeating: %d, "
168 "connected: %d, lowest: %d (%sreachable)\n",
169 qs->qs_heartbeating, qs->qs_connected, lowest_hb,
170 lowest_reachable ? "" : "un");
171 spin_unlock(&qs->qs_lock);
172
173 }
174
166} 175}
167 176
168static void o2quo_set_hold(struct o2quo_state *qs, u8 node) 177static void o2quo_set_hold(struct o2quo_state *qs, u8 node)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 681691bc233a..ea34952f9496 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1480,6 +1480,14 @@ static int o2net_set_nodelay(struct socket *sock)
1480 return ret; 1480 return ret;
1481} 1481}
1482 1482
1483static int o2net_set_usertimeout(struct socket *sock)
1484{
1485 int user_timeout = O2NET_TCP_USER_TIMEOUT;
1486
1487 return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
1488 (char *)&user_timeout, sizeof(user_timeout));
1489}
1490
1483static void o2net_initialize_handshake(void) 1491static void o2net_initialize_handshake(void)
1484{ 1492{
1485 o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( 1493 o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
@@ -1536,16 +1544,20 @@ static void o2net_idle_timer(unsigned long data)
1536#endif 1544#endif
1537 1545
1538 printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been " 1546 printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been "
1539 "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc), 1547 "idle for %lu.%lu secs.\n",
1540 msecs / 1000, msecs % 1000); 1548 SC_NODEF_ARGS(sc), msecs / 1000, msecs % 1000);
1541 1549
1542 /* 1550 /* idle timerout happen, don't shutdown the connection, but
1543 * Initialize the nn_timeout so that the next connection attempt 1551 * make fence decision. Maybe the connection can recover before
1544 * will continue in o2net_start_connect. 1552 * the decision is made.
1545 */ 1553 */
1546 atomic_set(&nn->nn_timeout, 1); 1554 atomic_set(&nn->nn_timeout, 1);
1555 o2quo_conn_err(o2net_num_from_nn(nn));
1556 queue_delayed_work(o2net_wq, &nn->nn_still_up,
1557 msecs_to_jiffies(O2NET_QUORUM_DELAY_MS));
1558
1559 o2net_sc_reset_idle_timer(sc);
1547 1560
1548 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
1549} 1561}
1550 1562
1551static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) 1563static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
@@ -1560,6 +1572,15 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
1560 1572
1561static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) 1573static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
1562{ 1574{
1575 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
1576
1577 /* clear fence decision since the connection recover from timeout*/
1578 if (atomic_read(&nn->nn_timeout)) {
1579 o2quo_conn_up(o2net_num_from_nn(nn));
1580 cancel_delayed_work(&nn->nn_still_up);
1581 atomic_set(&nn->nn_timeout, 0);
1582 }
1583
1563 /* Only push out an existing timer */ 1584 /* Only push out an existing timer */
1564 if (timer_pending(&sc->sc_idle_timeout)) 1585 if (timer_pending(&sc->sc_idle_timeout))
1565 o2net_sc_reset_idle_timer(sc); 1586 o2net_sc_reset_idle_timer(sc);
@@ -1650,6 +1671,12 @@ static void o2net_start_connect(struct work_struct *work)
1650 goto out; 1671 goto out;
1651 } 1672 }
1652 1673
1674 ret = o2net_set_usertimeout(sock);
1675 if (ret) {
1676 mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
1677 goto out;
1678 }
1679
1653 o2net_register_callbacks(sc->sc_sock->sk, sc); 1680 o2net_register_callbacks(sc->sc_sock->sk, sc);
1654 1681
1655 spin_lock(&nn->nn_lock); 1682 spin_lock(&nn->nn_lock);
@@ -1831,6 +1858,12 @@ static int o2net_accept_one(struct socket *sock, int *more)
1831 goto out; 1858 goto out;
1832 } 1859 }
1833 1860
1861 ret = o2net_set_usertimeout(new_sock);
1862 if (ret) {
1863 mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
1864 goto out;
1865 }
1866
1834 slen = sizeof(sin); 1867 slen = sizeof(sin);
1835 ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1868 ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin,
1836 &slen, 1); 1869 &slen, 1);
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 5bada2a69b50..c571e849fda4 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -63,6 +63,7 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data,
63#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000 63#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000
64#define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000 64#define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000
65 65
66#define O2NET_TCP_USER_TIMEOUT 0x7fffffff
66 67
67/* TODO: figure this out.... */ 68/* TODO: figure this out.... */
68static inline int o2net_link_down(int err, struct socket *sock) 69static inline int o2net_link_down(int err, struct socket *sock)
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 6f66b3751ace..53e6c40ed4c6 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -35,9 +35,8 @@
35 copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) 35 copy_to_user((typeof(a) __user *)b, &(a), sizeof(a))
36 36
37/* 37/*
38 * This call is void because we are already reporting an error that may 38 * This is just a best-effort to tell userspace that this request
39 * be -EFAULT. The error will be returned from the ioctl(2) call. It's 39 * caused the error.
40 * just a best-effort to tell userspace that this request caused the error.
41 */ 40 */
42static inline void o2info_set_request_error(struct ocfs2_info_request *kreq, 41static inline void o2info_set_request_error(struct ocfs2_info_request *kreq,
43 struct ocfs2_info_request __user *req) 42 struct ocfs2_info_request __user *req)
@@ -146,136 +145,105 @@ bail:
146static int ocfs2_info_handle_blocksize(struct inode *inode, 145static int ocfs2_info_handle_blocksize(struct inode *inode,
147 struct ocfs2_info_request __user *req) 146 struct ocfs2_info_request __user *req)
148{ 147{
149 int status = -EFAULT;
150 struct ocfs2_info_blocksize oib; 148 struct ocfs2_info_blocksize oib;
151 149
152 if (o2info_from_user(oib, req)) 150 if (o2info_from_user(oib, req))
153 goto bail; 151 return -EFAULT;
154 152
155 oib.ib_blocksize = inode->i_sb->s_blocksize; 153 oib.ib_blocksize = inode->i_sb->s_blocksize;
156 154
157 o2info_set_request_filled(&oib.ib_req); 155 o2info_set_request_filled(&oib.ib_req);
158 156
159 if (o2info_to_user(oib, req)) 157 if (o2info_to_user(oib, req))
160 goto bail; 158 return -EFAULT;
161
162 status = 0;
163bail:
164 if (status)
165 o2info_set_request_error(&oib.ib_req, req);
166 159
167 return status; 160 return 0;
168} 161}
169 162
170static int ocfs2_info_handle_clustersize(struct inode *inode, 163static int ocfs2_info_handle_clustersize(struct inode *inode,
171 struct ocfs2_info_request __user *req) 164 struct ocfs2_info_request __user *req)
172{ 165{
173 int status = -EFAULT;
174 struct ocfs2_info_clustersize oic; 166 struct ocfs2_info_clustersize oic;
175 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 167 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
176 168
177 if (o2info_from_user(oic, req)) 169 if (o2info_from_user(oic, req))
178 goto bail; 170 return -EFAULT;
179 171
180 oic.ic_clustersize = osb->s_clustersize; 172 oic.ic_clustersize = osb->s_clustersize;
181 173
182 o2info_set_request_filled(&oic.ic_req); 174 o2info_set_request_filled(&oic.ic_req);
183 175
184 if (o2info_to_user(oic, req)) 176 if (o2info_to_user(oic, req))
185 goto bail; 177 return -EFAULT;
186
187 status = 0;
188bail:
189 if (status)
190 o2info_set_request_error(&oic.ic_req, req);
191 178
192 return status; 179 return 0;
193} 180}
194 181
195static int ocfs2_info_handle_maxslots(struct inode *inode, 182static int ocfs2_info_handle_maxslots(struct inode *inode,
196 struct ocfs2_info_request __user *req) 183 struct ocfs2_info_request __user *req)
197{ 184{
198 int status = -EFAULT;
199 struct ocfs2_info_maxslots oim; 185 struct ocfs2_info_maxslots oim;
200 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 186 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
201 187
202 if (o2info_from_user(oim, req)) 188 if (o2info_from_user(oim, req))
203 goto bail; 189 return -EFAULT;
204 190
205 oim.im_max_slots = osb->max_slots; 191 oim.im_max_slots = osb->max_slots;
206 192
207 o2info_set_request_filled(&oim.im_req); 193 o2info_set_request_filled(&oim.im_req);
208 194
209 if (o2info_to_user(oim, req)) 195 if (o2info_to_user(oim, req))
210 goto bail; 196 return -EFAULT;
211 197
212 status = 0; 198 return 0;
213bail:
214 if (status)
215 o2info_set_request_error(&oim.im_req, req);
216
217 return status;
218} 199}
219 200
220static int ocfs2_info_handle_label(struct inode *inode, 201static int ocfs2_info_handle_label(struct inode *inode,
221 struct ocfs2_info_request __user *req) 202 struct ocfs2_info_request __user *req)
222{ 203{
223 int status = -EFAULT;
224 struct ocfs2_info_label oil; 204 struct ocfs2_info_label oil;
225 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 205 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
226 206
227 if (o2info_from_user(oil, req)) 207 if (o2info_from_user(oil, req))
228 goto bail; 208 return -EFAULT;
229 209
230 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); 210 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
231 211
232 o2info_set_request_filled(&oil.il_req); 212 o2info_set_request_filled(&oil.il_req);
233 213
234 if (o2info_to_user(oil, req)) 214 if (o2info_to_user(oil, req))
235 goto bail; 215 return -EFAULT;
236 216
237 status = 0; 217 return 0;
238bail:
239 if (status)
240 o2info_set_request_error(&oil.il_req, req);
241
242 return status;
243} 218}
244 219
245static int ocfs2_info_handle_uuid(struct inode *inode, 220static int ocfs2_info_handle_uuid(struct inode *inode,
246 struct ocfs2_info_request __user *req) 221 struct ocfs2_info_request __user *req)
247{ 222{
248 int status = -EFAULT;
249 struct ocfs2_info_uuid oiu; 223 struct ocfs2_info_uuid oiu;
250 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 224 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
251 225
252 if (o2info_from_user(oiu, req)) 226 if (o2info_from_user(oiu, req))
253 goto bail; 227 return -EFAULT;
254 228
255 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); 229 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
256 230
257 o2info_set_request_filled(&oiu.iu_req); 231 o2info_set_request_filled(&oiu.iu_req);
258 232
259 if (o2info_to_user(oiu, req)) 233 if (o2info_to_user(oiu, req))
260 goto bail; 234 return -EFAULT;
261
262 status = 0;
263bail:
264 if (status)
265 o2info_set_request_error(&oiu.iu_req, req);
266 235
267 return status; 236 return 0;
268} 237}
269 238
270static int ocfs2_info_handle_fs_features(struct inode *inode, 239static int ocfs2_info_handle_fs_features(struct inode *inode,
271 struct ocfs2_info_request __user *req) 240 struct ocfs2_info_request __user *req)
272{ 241{
273 int status = -EFAULT;
274 struct ocfs2_info_fs_features oif; 242 struct ocfs2_info_fs_features oif;
275 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 243 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
276 244
277 if (o2info_from_user(oif, req)) 245 if (o2info_from_user(oif, req))
278 goto bail; 246 return -EFAULT;
279 247
280 oif.if_compat_features = osb->s_feature_compat; 248 oif.if_compat_features = osb->s_feature_compat;
281 oif.if_incompat_features = osb->s_feature_incompat; 249 oif.if_incompat_features = osb->s_feature_incompat;
@@ -284,39 +252,28 @@ static int ocfs2_info_handle_fs_features(struct inode *inode,
284 o2info_set_request_filled(&oif.if_req); 252 o2info_set_request_filled(&oif.if_req);
285 253
286 if (o2info_to_user(oif, req)) 254 if (o2info_to_user(oif, req))
287 goto bail; 255 return -EFAULT;
288 256
289 status = 0; 257 return 0;
290bail:
291 if (status)
292 o2info_set_request_error(&oif.if_req, req);
293
294 return status;
295} 258}
296 259
297static int ocfs2_info_handle_journal_size(struct inode *inode, 260static int ocfs2_info_handle_journal_size(struct inode *inode,
298 struct ocfs2_info_request __user *req) 261 struct ocfs2_info_request __user *req)
299{ 262{
300 int status = -EFAULT;
301 struct ocfs2_info_journal_size oij; 263 struct ocfs2_info_journal_size oij;
302 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 264 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
303 265
304 if (o2info_from_user(oij, req)) 266 if (o2info_from_user(oij, req))
305 goto bail; 267 return -EFAULT;
306 268
307 oij.ij_journal_size = i_size_read(osb->journal->j_inode); 269 oij.ij_journal_size = i_size_read(osb->journal->j_inode);
308 270
309 o2info_set_request_filled(&oij.ij_req); 271 o2info_set_request_filled(&oij.ij_req);
310 272
311 if (o2info_to_user(oij, req)) 273 if (o2info_to_user(oij, req))
312 goto bail; 274 return -EFAULT;
313 275
314 status = 0; 276 return 0;
315bail:
316 if (status)
317 o2info_set_request_error(&oij.ij_req, req);
318
319 return status;
320} 277}
321 278
322static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, 279static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
@@ -373,7 +330,7 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
373 u32 i; 330 u32 i;
374 u64 blkno = -1; 331 u64 blkno = -1;
375 char namebuf[40]; 332 char namebuf[40];
376 int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE; 333 int status, type = INODE_ALLOC_SYSTEM_INODE;
377 struct ocfs2_info_freeinode *oifi = NULL; 334 struct ocfs2_info_freeinode *oifi = NULL;
378 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 335 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
379 struct inode *inode_alloc = NULL; 336 struct inode *inode_alloc = NULL;
@@ -385,8 +342,10 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
385 goto out_err; 342 goto out_err;
386 } 343 }
387 344
388 if (o2info_from_user(*oifi, req)) 345 if (o2info_from_user(*oifi, req)) {
389 goto bail; 346 status = -EFAULT;
347 goto out_free;
348 }
390 349
391 oifi->ifi_slotnum = osb->max_slots; 350 oifi->ifi_slotnum = osb->max_slots;
392 351
@@ -424,14 +383,16 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
424 383
425 o2info_set_request_filled(&oifi->ifi_req); 384 o2info_set_request_filled(&oifi->ifi_req);
426 385
427 if (o2info_to_user(*oifi, req)) 386 if (o2info_to_user(*oifi, req)) {
428 goto bail; 387 status = -EFAULT;
388 goto out_free;
389 }
429 390
430 status = 0; 391 status = 0;
431bail: 392bail:
432 if (status) 393 if (status)
433 o2info_set_request_error(&oifi->ifi_req, req); 394 o2info_set_request_error(&oifi->ifi_req, req);
434 395out_free:
435 kfree(oifi); 396 kfree(oifi);
436out_err: 397out_err:
437 return status; 398 return status;
@@ -658,7 +619,7 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
658{ 619{
659 u64 blkno = -1; 620 u64 blkno = -1;
660 char namebuf[40]; 621 char namebuf[40];
661 int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE; 622 int status, type = GLOBAL_BITMAP_SYSTEM_INODE;
662 623
663 struct ocfs2_info_freefrag *oiff; 624 struct ocfs2_info_freefrag *oiff;
664 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 625 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -671,8 +632,10 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
671 goto out_err; 632 goto out_err;
672 } 633 }
673 634
674 if (o2info_from_user(*oiff, req)) 635 if (o2info_from_user(*oiff, req)) {
675 goto bail; 636 status = -EFAULT;
637 goto out_free;
638 }
676 /* 639 /*
677 * chunksize from userspace should be power of 2. 640 * chunksize from userspace should be power of 2.
678 */ 641 */
@@ -711,14 +674,14 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
711 674
712 if (o2info_to_user(*oiff, req)) { 675 if (o2info_to_user(*oiff, req)) {
713 status = -EFAULT; 676 status = -EFAULT;
714 goto bail; 677 goto out_free;
715 } 678 }
716 679
717 status = 0; 680 status = 0;
718bail: 681bail:
719 if (status) 682 if (status)
720 o2info_set_request_error(&oiff->iff_req, req); 683 o2info_set_request_error(&oiff->iff_req, req);
721 684out_free:
722 kfree(oiff); 685 kfree(oiff);
723out_err: 686out_err:
724 return status; 687 return status;
@@ -727,23 +690,17 @@ out_err:
727static int ocfs2_info_handle_unknown(struct inode *inode, 690static int ocfs2_info_handle_unknown(struct inode *inode,
728 struct ocfs2_info_request __user *req) 691 struct ocfs2_info_request __user *req)
729{ 692{
730 int status = -EFAULT;
731 struct ocfs2_info_request oir; 693 struct ocfs2_info_request oir;
732 694
733 if (o2info_from_user(oir, req)) 695 if (o2info_from_user(oir, req))
734 goto bail; 696 return -EFAULT;
735 697
736 o2info_clear_request_filled(&oir); 698 o2info_clear_request_filled(&oir);
737 699
738 if (o2info_to_user(oir, req)) 700 if (o2info_to_user(oir, req))
739 goto bail; 701 return -EFAULT;
740 702
741 status = 0; 703 return 0;
742bail:
743 if (status)
744 o2info_set_request_error(&oir, req);
745
746 return status;
747} 704}
748 705
749/* 706/*
diff --git a/fs/pnode.c b/fs/pnode.c
index 302bf22c4a30..aae331a5d03b 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -381,6 +381,7 @@ static void __propagate_umount(struct mount *mnt)
381 * other children 381 * other children
382 */ 382 */
383 if (child && list_empty(&child->mnt_mounts)) { 383 if (child && list_empty(&child->mnt_mounts)) {
384 list_del_init(&child->mnt_child);
384 hlist_del_init_rcu(&child->mnt_hash); 385 hlist_del_init_rcu(&child->mnt_hash);
385 hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); 386 hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
386 } 387 }
diff --git a/fs/sync.c b/fs/sync.c
index b28d1dd10e8b..bdc729d80e5e 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -65,7 +65,7 @@ int sync_filesystem(struct super_block *sb)
65 return ret; 65 return ret;
66 return __sync_filesystem(sb, 1); 66 return __sync_filesystem(sb, 1);
67} 67}
68EXPORT_SYMBOL_GPL(sync_filesystem); 68EXPORT_SYMBOL(sync_filesystem);
69 69
70static void sync_inodes_one_sb(struct super_block *sb, void *arg) 70static void sync_inodes_one_sb(struct super_block *sb, void *arg)
71{ 71{
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 6eaf5edf1ea1..e77db621ec89 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -45,7 +45,7 @@ void udf_free_inode(struct inode *inode)
45 udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1); 45 udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
46} 46}
47 47
48struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) 48struct inode *udf_new_inode(struct inode *dir, umode_t mode)
49{ 49{
50 struct super_block *sb = dir->i_sb; 50 struct super_block *sb = dir->i_sb;
51 struct udf_sb_info *sbi = UDF_SB(sb); 51 struct udf_sb_info *sbi = UDF_SB(sb);
@@ -55,14 +55,12 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
55 struct udf_inode_info *iinfo; 55 struct udf_inode_info *iinfo;
56 struct udf_inode_info *dinfo = UDF_I(dir); 56 struct udf_inode_info *dinfo = UDF_I(dir);
57 struct logicalVolIntegrityDescImpUse *lvidiu; 57 struct logicalVolIntegrityDescImpUse *lvidiu;
58 int err;
58 59
59 inode = new_inode(sb); 60 inode = new_inode(sb);
60 61
61 if (!inode) { 62 if (!inode)
62 *err = -ENOMEM; 63 return ERR_PTR(-ENOMEM);
63 return NULL;
64 }
65 *err = -ENOSPC;
66 64
67 iinfo = UDF_I(inode); 65 iinfo = UDF_I(inode);
68 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { 66 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) {
@@ -80,21 +78,22 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
80 } 78 }
81 if (!iinfo->i_ext.i_data) { 79 if (!iinfo->i_ext.i_data) {
82 iput(inode); 80 iput(inode);
83 *err = -ENOMEM; 81 return ERR_PTR(-ENOMEM);
84 return NULL;
85 } 82 }
86 83
84 err = -ENOSPC;
87 block = udf_new_block(dir->i_sb, NULL, 85 block = udf_new_block(dir->i_sb, NULL,
88 dinfo->i_location.partitionReferenceNum, 86 dinfo->i_location.partitionReferenceNum,
89 start, err); 87 start, &err);
90 if (*err) { 88 if (err) {
91 iput(inode); 89 iput(inode);
92 return NULL; 90 return ERR_PTR(err);
93 } 91 }
94 92
95 lvidiu = udf_sb_lvidiu(sb); 93 lvidiu = udf_sb_lvidiu(sb);
96 if (lvidiu) { 94 if (lvidiu) {
97 iinfo->i_unique = lvid_get_unique_id(sb); 95 iinfo->i_unique = lvid_get_unique_id(sb);
96 inode->i_generation = iinfo->i_unique;
98 mutex_lock(&sbi->s_alloc_mutex); 97 mutex_lock(&sbi->s_alloc_mutex);
99 if (S_ISDIR(mode)) 98 if (S_ISDIR(mode))
100 le32_add_cpu(&lvidiu->numDirs, 1); 99 le32_add_cpu(&lvidiu->numDirs, 1);
@@ -123,9 +122,12 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
123 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; 122 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
124 inode->i_mtime = inode->i_atime = inode->i_ctime = 123 inode->i_mtime = inode->i_atime = inode->i_ctime =
125 iinfo->i_crtime = current_fs_time(inode->i_sb); 124 iinfo->i_crtime = current_fs_time(inode->i_sb);
126 insert_inode_hash(inode); 125 if (unlikely(insert_inode_locked(inode) < 0)) {
126 make_bad_inode(inode);
127 iput(inode);
128 return ERR_PTR(-EIO);
129 }
127 mark_inode_dirty(inode); 130 mark_inode_dirty(inode);
128 131
129 *err = 0;
130 return inode; 132 return inode;
131} 133}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 236cd48184c2..08598843288f 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -51,7 +51,6 @@ MODULE_LICENSE("GPL");
51 51
52static umode_t udf_convert_permissions(struct fileEntry *); 52static umode_t udf_convert_permissions(struct fileEntry *);
53static int udf_update_inode(struct inode *, int); 53static int udf_update_inode(struct inode *, int);
54static void udf_fill_inode(struct inode *, struct buffer_head *);
55static int udf_sync_inode(struct inode *inode); 54static int udf_sync_inode(struct inode *inode);
56static int udf_alloc_i_data(struct inode *inode, size_t size); 55static int udf_alloc_i_data(struct inode *inode, size_t size);
57static sector_t inode_getblk(struct inode *, sector_t, int *, int *); 56static sector_t inode_getblk(struct inode *, sector_t, int *, int *);
@@ -1271,12 +1270,33 @@ update_time:
1271 return 0; 1270 return 0;
1272} 1271}
1273 1272
1274static void __udf_read_inode(struct inode *inode) 1273/*
1274 * Maximum length of linked list formed by ICB hierarchy. The chosen number is
1275 * arbitrary - just that we hopefully don't limit any real use of rewritten
1276 * inode on write-once media but avoid looping for too long on corrupted media.
1277 */
1278#define UDF_MAX_ICB_NESTING 1024
1279
1280static int udf_read_inode(struct inode *inode)
1275{ 1281{
1276 struct buffer_head *bh = NULL; 1282 struct buffer_head *bh = NULL;
1277 struct fileEntry *fe; 1283 struct fileEntry *fe;
1284 struct extendedFileEntry *efe;
1278 uint16_t ident; 1285 uint16_t ident;
1279 struct udf_inode_info *iinfo = UDF_I(inode); 1286 struct udf_inode_info *iinfo = UDF_I(inode);
1287 struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
1288 struct kernel_lb_addr *iloc = &iinfo->i_location;
1289 unsigned int link_count;
1290 unsigned int indirections = 0;
1291 int ret = -EIO;
1292
1293reread:
1294 if (iloc->logicalBlockNum >=
1295 sbi->s_partmaps[iloc->partitionReferenceNum].s_partition_len) {
1296 udf_debug("block=%d, partition=%d out of range\n",
1297 iloc->logicalBlockNum, iloc->partitionReferenceNum);
1298 return -EIO;
1299 }
1280 1300
1281 /* 1301 /*
1282 * Set defaults, but the inode is still incomplete! 1302 * Set defaults, but the inode is still incomplete!
@@ -1290,78 +1310,54 @@ static void __udf_read_inode(struct inode *inode)
1290 * i_nlink = 1 1310 * i_nlink = 1
1291 * i_op = NULL; 1311 * i_op = NULL;
1292 */ 1312 */
1293 bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident); 1313 bh = udf_read_ptagged(inode->i_sb, iloc, 0, &ident);
1294 if (!bh) { 1314 if (!bh) {
1295 udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino); 1315 udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino);
1296 make_bad_inode(inode); 1316 return -EIO;
1297 return;
1298 } 1317 }
1299 1318
1300 if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE && 1319 if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE &&
1301 ident != TAG_IDENT_USE) { 1320 ident != TAG_IDENT_USE) {
1302 udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n", 1321 udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n",
1303 inode->i_ino, ident); 1322 inode->i_ino, ident);
1304 brelse(bh); 1323 goto out;
1305 make_bad_inode(inode);
1306 return;
1307 } 1324 }
1308 1325
1309 fe = (struct fileEntry *)bh->b_data; 1326 fe = (struct fileEntry *)bh->b_data;
1327 efe = (struct extendedFileEntry *)bh->b_data;
1310 1328
1311 if (fe->icbTag.strategyType == cpu_to_le16(4096)) { 1329 if (fe->icbTag.strategyType == cpu_to_le16(4096)) {
1312 struct buffer_head *ibh; 1330 struct buffer_head *ibh;
1313 1331
1314 ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1, 1332 ibh = udf_read_ptagged(inode->i_sb, iloc, 1, &ident);
1315 &ident);
1316 if (ident == TAG_IDENT_IE && ibh) { 1333 if (ident == TAG_IDENT_IE && ibh) {
1317 struct buffer_head *nbh = NULL;
1318 struct kernel_lb_addr loc; 1334 struct kernel_lb_addr loc;
1319 struct indirectEntry *ie; 1335 struct indirectEntry *ie;
1320 1336
1321 ie = (struct indirectEntry *)ibh->b_data; 1337 ie = (struct indirectEntry *)ibh->b_data;
1322 loc = lelb_to_cpu(ie->indirectICB.extLocation); 1338 loc = lelb_to_cpu(ie->indirectICB.extLocation);
1323 1339
1324 if (ie->indirectICB.extLength && 1340 if (ie->indirectICB.extLength) {
1325 (nbh = udf_read_ptagged(inode->i_sb, &loc, 0, 1341 brelse(ibh);
1326 &ident))) { 1342 memcpy(&iinfo->i_location, &loc,
1327 if (ident == TAG_IDENT_FE || 1343 sizeof(struct kernel_lb_addr));
1328 ident == TAG_IDENT_EFE) { 1344 if (++indirections > UDF_MAX_ICB_NESTING) {
1329 memcpy(&iinfo->i_location, 1345 udf_err(inode->i_sb,
1330 &loc, 1346 "too many ICBs in ICB hierarchy"
1331 sizeof(struct kernel_lb_addr)); 1347 " (max %d supported)\n",
1332 brelse(bh); 1348 UDF_MAX_ICB_NESTING);
1333 brelse(ibh); 1349 goto out;
1334 brelse(nbh);
1335 __udf_read_inode(inode);
1336 return;
1337 } 1350 }
1338 brelse(nbh); 1351 brelse(bh);
1352 goto reread;
1339 } 1353 }
1340 } 1354 }
1341 brelse(ibh); 1355 brelse(ibh);
1342 } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { 1356 } else if (fe->icbTag.strategyType != cpu_to_le16(4)) {
1343 udf_err(inode->i_sb, "unsupported strategy type: %d\n", 1357 udf_err(inode->i_sb, "unsupported strategy type: %d\n",
1344 le16_to_cpu(fe->icbTag.strategyType)); 1358 le16_to_cpu(fe->icbTag.strategyType));
1345 brelse(bh); 1359 goto out;
1346 make_bad_inode(inode);
1347 return;
1348 } 1360 }
1349 udf_fill_inode(inode, bh);
1350
1351 brelse(bh);
1352}
1353
1354static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1355{
1356 struct fileEntry *fe;
1357 struct extendedFileEntry *efe;
1358 struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
1359 struct udf_inode_info *iinfo = UDF_I(inode);
1360 unsigned int link_count;
1361
1362 fe = (struct fileEntry *)bh->b_data;
1363 efe = (struct extendedFileEntry *)bh->b_data;
1364
1365 if (fe->icbTag.strategyType == cpu_to_le16(4)) 1361 if (fe->icbTag.strategyType == cpu_to_le16(4))
1366 iinfo->i_strat4096 = 0; 1362 iinfo->i_strat4096 = 0;
1367 else /* if (fe->icbTag.strategyType == cpu_to_le16(4096)) */ 1363 else /* if (fe->icbTag.strategyType == cpu_to_le16(4096)) */
@@ -1378,11 +1374,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1378 if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) { 1374 if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) {
1379 iinfo->i_efe = 1; 1375 iinfo->i_efe = 1;
1380 iinfo->i_use = 0; 1376 iinfo->i_use = 0;
1381 if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - 1377 ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize -
1382 sizeof(struct extendedFileEntry))) { 1378 sizeof(struct extendedFileEntry));
1383 make_bad_inode(inode); 1379 if (ret)
1384 return; 1380 goto out;
1385 }
1386 memcpy(iinfo->i_ext.i_data, 1381 memcpy(iinfo->i_ext.i_data,
1387 bh->b_data + sizeof(struct extendedFileEntry), 1382 bh->b_data + sizeof(struct extendedFileEntry),
1388 inode->i_sb->s_blocksize - 1383 inode->i_sb->s_blocksize -
@@ -1390,11 +1385,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1390 } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { 1385 } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) {
1391 iinfo->i_efe = 0; 1386 iinfo->i_efe = 0;
1392 iinfo->i_use = 0; 1387 iinfo->i_use = 0;
1393 if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - 1388 ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize -
1394 sizeof(struct fileEntry))) { 1389 sizeof(struct fileEntry));
1395 make_bad_inode(inode); 1390 if (ret)
1396 return; 1391 goto out;
1397 }
1398 memcpy(iinfo->i_ext.i_data, 1392 memcpy(iinfo->i_ext.i_data,
1399 bh->b_data + sizeof(struct fileEntry), 1393 bh->b_data + sizeof(struct fileEntry),
1400 inode->i_sb->s_blocksize - sizeof(struct fileEntry)); 1394 inode->i_sb->s_blocksize - sizeof(struct fileEntry));
@@ -1404,18 +1398,18 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1404 iinfo->i_lenAlloc = le32_to_cpu( 1398 iinfo->i_lenAlloc = le32_to_cpu(
1405 ((struct unallocSpaceEntry *)bh->b_data)-> 1399 ((struct unallocSpaceEntry *)bh->b_data)->
1406 lengthAllocDescs); 1400 lengthAllocDescs);
1407 if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - 1401 ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize -
1408 sizeof(struct unallocSpaceEntry))) { 1402 sizeof(struct unallocSpaceEntry));
1409 make_bad_inode(inode); 1403 if (ret)
1410 return; 1404 goto out;
1411 }
1412 memcpy(iinfo->i_ext.i_data, 1405 memcpy(iinfo->i_ext.i_data,
1413 bh->b_data + sizeof(struct unallocSpaceEntry), 1406 bh->b_data + sizeof(struct unallocSpaceEntry),
1414 inode->i_sb->s_blocksize - 1407 inode->i_sb->s_blocksize -
1415 sizeof(struct unallocSpaceEntry)); 1408 sizeof(struct unallocSpaceEntry));
1416 return; 1409 return 0;
1417 } 1410 }
1418 1411
1412 ret = -EIO;
1419 read_lock(&sbi->s_cred_lock); 1413 read_lock(&sbi->s_cred_lock);
1420 i_uid_write(inode, le32_to_cpu(fe->uid)); 1414 i_uid_write(inode, le32_to_cpu(fe->uid));
1421 if (!uid_valid(inode->i_uid) || 1415 if (!uid_valid(inode->i_uid) ||
@@ -1441,8 +1435,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1441 read_unlock(&sbi->s_cred_lock); 1435 read_unlock(&sbi->s_cred_lock);
1442 1436
1443 link_count = le16_to_cpu(fe->fileLinkCount); 1437 link_count = le16_to_cpu(fe->fileLinkCount);
1444 if (!link_count) 1438 if (!link_count) {
1445 link_count = 1; 1439 ret = -ESTALE;
1440 goto out;
1441 }
1446 set_nlink(inode, link_count); 1442 set_nlink(inode, link_count);
1447 1443
1448 inode->i_size = le64_to_cpu(fe->informationLength); 1444 inode->i_size = le64_to_cpu(fe->informationLength);
@@ -1488,6 +1484,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1488 iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); 1484 iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs);
1489 iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint); 1485 iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint);
1490 } 1486 }
1487 inode->i_generation = iinfo->i_unique;
1491 1488
1492 switch (fe->icbTag.fileType) { 1489 switch (fe->icbTag.fileType) {
1493 case ICBTAG_FILE_TYPE_DIRECTORY: 1490 case ICBTAG_FILE_TYPE_DIRECTORY:
@@ -1537,8 +1534,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1537 default: 1534 default:
1538 udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n", 1535 udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n",
1539 inode->i_ino, fe->icbTag.fileType); 1536 inode->i_ino, fe->icbTag.fileType);
1540 make_bad_inode(inode); 1537 goto out;
1541 return;
1542 } 1538 }
1543 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 1539 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
1544 struct deviceSpec *dsea = 1540 struct deviceSpec *dsea =
@@ -1549,8 +1545,12 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1549 le32_to_cpu(dsea->minorDeviceIdent))); 1545 le32_to_cpu(dsea->minorDeviceIdent)));
1550 /* Developer ID ??? */ 1546 /* Developer ID ??? */
1551 } else 1547 } else
1552 make_bad_inode(inode); 1548 goto out;
1553 } 1549 }
1550 ret = 0;
1551out:
1552 brelse(bh);
1553 return ret;
1554} 1554}
1555 1555
1556static int udf_alloc_i_data(struct inode *inode, size_t size) 1556static int udf_alloc_i_data(struct inode *inode, size_t size)
@@ -1664,7 +1664,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1664 FE_PERM_U_DELETE | FE_PERM_U_CHATTR)); 1664 FE_PERM_U_DELETE | FE_PERM_U_CHATTR));
1665 fe->permissions = cpu_to_le32(udfperms); 1665 fe->permissions = cpu_to_le32(udfperms);
1666 1666
1667 if (S_ISDIR(inode->i_mode)) 1667 if (S_ISDIR(inode->i_mode) && inode->i_nlink > 0)
1668 fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1); 1668 fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1);
1669 else 1669 else
1670 fe->fileLinkCount = cpu_to_le16(inode->i_nlink); 1670 fe->fileLinkCount = cpu_to_le16(inode->i_nlink);
@@ -1830,32 +1830,23 @@ struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino)
1830{ 1830{
1831 unsigned long block = udf_get_lb_pblock(sb, ino, 0); 1831 unsigned long block = udf_get_lb_pblock(sb, ino, 0);
1832 struct inode *inode = iget_locked(sb, block); 1832 struct inode *inode = iget_locked(sb, block);
1833 int err;
1833 1834
1834 if (!inode) 1835 if (!inode)
1835 return NULL; 1836 return ERR_PTR(-ENOMEM);
1836
1837 if (inode->i_state & I_NEW) {
1838 memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr));
1839 __udf_read_inode(inode);
1840 unlock_new_inode(inode);
1841 }
1842 1837
1843 if (is_bad_inode(inode)) 1838 if (!(inode->i_state & I_NEW))
1844 goto out_iput; 1839 return inode;
1845 1840
1846 if (ino->logicalBlockNum >= UDF_SB(sb)-> 1841 memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr));
1847 s_partmaps[ino->partitionReferenceNum].s_partition_len) { 1842 err = udf_read_inode(inode);
1848 udf_debug("block=%d, partition=%d out of range\n", 1843 if (err < 0) {
1849 ino->logicalBlockNum, ino->partitionReferenceNum); 1844 iget_failed(inode);
1850 make_bad_inode(inode); 1845 return ERR_PTR(err);
1851 goto out_iput;
1852 } 1846 }
1847 unlock_new_inode(inode);
1853 1848
1854 return inode; 1849 return inode;
1855
1856 out_iput:
1857 iput(inode);
1858 return NULL;
1859} 1850}
1860 1851
1861int udf_add_aext(struct inode *inode, struct extent_position *epos, 1852int udf_add_aext(struct inode *inode, struct extent_position *epos,
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 9737cba1357d..c12e260fd6c4 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -270,9 +270,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
270 NULL, 0), 270 NULL, 0),
271 }; 271 };
272 inode = udf_iget(dir->i_sb, lb); 272 inode = udf_iget(dir->i_sb, lb);
273 if (!inode) { 273 if (IS_ERR(inode))
274 return ERR_PTR(-EACCES); 274 return inode;
275 }
276 } else 275 } else
277#endif /* UDF_RECOVERY */ 276#endif /* UDF_RECOVERY */
278 277
@@ -285,9 +284,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
285 284
286 loc = lelb_to_cpu(cfi.icb.extLocation); 285 loc = lelb_to_cpu(cfi.icb.extLocation);
287 inode = udf_iget(dir->i_sb, &loc); 286 inode = udf_iget(dir->i_sb, &loc);
288 if (!inode) { 287 if (IS_ERR(inode))
289 return ERR_PTR(-EACCES); 288 return ERR_CAST(inode);
290 }
291 } 289 }
292 290
293 return d_splice_alias(inode, dentry); 291 return d_splice_alias(inode, dentry);
@@ -550,32 +548,18 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
550 return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); 548 return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL);
551} 549}
552 550
553static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, 551static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
554 bool excl)
555{ 552{
553 struct udf_inode_info *iinfo = UDF_I(inode);
554 struct inode *dir = dentry->d_parent->d_inode;
556 struct udf_fileident_bh fibh; 555 struct udf_fileident_bh fibh;
557 struct inode *inode;
558 struct fileIdentDesc cfi, *fi; 556 struct fileIdentDesc cfi, *fi;
559 int err; 557 int err;
560 struct udf_inode_info *iinfo;
561
562 inode = udf_new_inode(dir, mode, &err);
563 if (!inode) {
564 return err;
565 }
566
567 iinfo = UDF_I(inode);
568 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
569 inode->i_data.a_ops = &udf_adinicb_aops;
570 else
571 inode->i_data.a_ops = &udf_aops;
572 inode->i_op = &udf_file_inode_operations;
573 inode->i_fop = &udf_file_operations;
574 mark_inode_dirty(inode);
575 558
576 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 559 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
577 if (!fi) { 560 if (unlikely(!fi)) {
578 inode_dec_link_count(inode); 561 inode_dec_link_count(inode);
562 unlock_new_inode(inode);
579 iput(inode); 563 iput(inode);
580 return err; 564 return err;
581 } 565 }
@@ -589,23 +573,21 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
589 if (fibh.sbh != fibh.ebh) 573 if (fibh.sbh != fibh.ebh)
590 brelse(fibh.ebh); 574 brelse(fibh.ebh);
591 brelse(fibh.sbh); 575 brelse(fibh.sbh);
576 unlock_new_inode(inode);
592 d_instantiate(dentry, inode); 577 d_instantiate(dentry, inode);
593 578
594 return 0; 579 return 0;
595} 580}
596 581
597static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) 582static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
583 bool excl)
598{ 584{
599 struct inode *inode; 585 struct inode *inode = udf_new_inode(dir, mode);
600 struct udf_inode_info *iinfo;
601 int err;
602 586
603 inode = udf_new_inode(dir, mode, &err); 587 if (IS_ERR(inode))
604 if (!inode) 588 return PTR_ERR(inode);
605 return err;
606 589
607 iinfo = UDF_I(inode); 590 if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
608 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
609 inode->i_data.a_ops = &udf_adinicb_aops; 591 inode->i_data.a_ops = &udf_adinicb_aops;
610 else 592 else
611 inode->i_data.a_ops = &udf_aops; 593 inode->i_data.a_ops = &udf_aops;
@@ -613,7 +595,25 @@ static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
613 inode->i_fop = &udf_file_operations; 595 inode->i_fop = &udf_file_operations;
614 mark_inode_dirty(inode); 596 mark_inode_dirty(inode);
615 597
598 return udf_add_nondir(dentry, inode);
599}
600
601static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
602{
603 struct inode *inode = udf_new_inode(dir, mode);
604
605 if (IS_ERR(inode))
606 return PTR_ERR(inode);
607
608 if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
609 inode->i_data.a_ops = &udf_adinicb_aops;
610 else
611 inode->i_data.a_ops = &udf_aops;
612 inode->i_op = &udf_file_inode_operations;
613 inode->i_fop = &udf_file_operations;
614 mark_inode_dirty(inode);
616 d_tmpfile(dentry, inode); 615 d_tmpfile(dentry, inode);
616 unlock_new_inode(inode);
617 return 0; 617 return 0;
618} 618}
619 619
@@ -621,44 +621,16 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
621 dev_t rdev) 621 dev_t rdev)
622{ 622{
623 struct inode *inode; 623 struct inode *inode;
624 struct udf_fileident_bh fibh;
625 struct fileIdentDesc cfi, *fi;
626 int err;
627 struct udf_inode_info *iinfo;
628 624
629 if (!old_valid_dev(rdev)) 625 if (!old_valid_dev(rdev))
630 return -EINVAL; 626 return -EINVAL;
631 627
632 err = -EIO; 628 inode = udf_new_inode(dir, mode);
633 inode = udf_new_inode(dir, mode, &err); 629 if (IS_ERR(inode))
634 if (!inode) 630 return PTR_ERR(inode);
635 goto out;
636 631
637 iinfo = UDF_I(inode);
638 init_special_inode(inode, mode, rdev); 632 init_special_inode(inode, mode, rdev);
639 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 633 return udf_add_nondir(dentry, inode);
640 if (!fi) {
641 inode_dec_link_count(inode);
642 iput(inode);
643 return err;
644 }
645 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
646 cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
647 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
648 cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL);
649 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
650 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
651 mark_inode_dirty(dir);
652 mark_inode_dirty(inode);
653
654 if (fibh.sbh != fibh.ebh)
655 brelse(fibh.ebh);
656 brelse(fibh.sbh);
657 d_instantiate(dentry, inode);
658 err = 0;
659
660out:
661 return err;
662} 634}
663 635
664static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 636static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
@@ -670,10 +642,9 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
670 struct udf_inode_info *dinfo = UDF_I(dir); 642 struct udf_inode_info *dinfo = UDF_I(dir);
671 struct udf_inode_info *iinfo; 643 struct udf_inode_info *iinfo;
672 644
673 err = -EIO; 645 inode = udf_new_inode(dir, S_IFDIR | mode);
674 inode = udf_new_inode(dir, S_IFDIR | mode, &err); 646 if (IS_ERR(inode))
675 if (!inode) 647 return PTR_ERR(inode);
676 goto out;
677 648
678 iinfo = UDF_I(inode); 649 iinfo = UDF_I(inode);
679 inode->i_op = &udf_dir_inode_operations; 650 inode->i_op = &udf_dir_inode_operations;
@@ -681,6 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
681 fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); 652 fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err);
682 if (!fi) { 653 if (!fi) {
683 inode_dec_link_count(inode); 654 inode_dec_link_count(inode);
655 unlock_new_inode(inode);
684 iput(inode); 656 iput(inode);
685 goto out; 657 goto out;
686 } 658 }
@@ -699,6 +671,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
699 if (!fi) { 671 if (!fi) {
700 clear_nlink(inode); 672 clear_nlink(inode);
701 mark_inode_dirty(inode); 673 mark_inode_dirty(inode);
674 unlock_new_inode(inode);
702 iput(inode); 675 iput(inode);
703 goto out; 676 goto out;
704 } 677 }
@@ -710,6 +683,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
710 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); 683 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
711 inc_nlink(dir); 684 inc_nlink(dir);
712 mark_inode_dirty(dir); 685 mark_inode_dirty(dir);
686 unlock_new_inode(inode);
713 d_instantiate(dentry, inode); 687 d_instantiate(dentry, inode);
714 if (fibh.sbh != fibh.ebh) 688 if (fibh.sbh != fibh.ebh)
715 brelse(fibh.ebh); 689 brelse(fibh.ebh);
@@ -876,14 +850,11 @@ out:
876static int udf_symlink(struct inode *dir, struct dentry *dentry, 850static int udf_symlink(struct inode *dir, struct dentry *dentry,
877 const char *symname) 851 const char *symname)
878{ 852{
879 struct inode *inode; 853 struct inode *inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO);
880 struct pathComponent *pc; 854 struct pathComponent *pc;
881 const char *compstart; 855 const char *compstart;
882 struct udf_fileident_bh fibh;
883 struct extent_position epos = {}; 856 struct extent_position epos = {};
884 int eoffset, elen = 0; 857 int eoffset, elen = 0;
885 struct fileIdentDesc *fi;
886 struct fileIdentDesc cfi;
887 uint8_t *ea; 858 uint8_t *ea;
888 int err; 859 int err;
889 int block; 860 int block;
@@ -892,9 +863,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
892 struct udf_inode_info *iinfo; 863 struct udf_inode_info *iinfo;
893 struct super_block *sb = dir->i_sb; 864 struct super_block *sb = dir->i_sb;
894 865
895 inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err); 866 if (IS_ERR(inode))
896 if (!inode) 867 return PTR_ERR(inode);
897 goto out;
898 868
899 iinfo = UDF_I(inode); 869 iinfo = UDF_I(inode);
900 down_write(&iinfo->i_data_sem); 870 down_write(&iinfo->i_data_sem);
@@ -1012,24 +982,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
1012 mark_inode_dirty(inode); 982 mark_inode_dirty(inode);
1013 up_write(&iinfo->i_data_sem); 983 up_write(&iinfo->i_data_sem);
1014 984
1015 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 985 err = udf_add_nondir(dentry, inode);
1016 if (!fi)
1017 goto out_no_entry;
1018 cfi.icb.extLength = cpu_to_le32(sb->s_blocksize);
1019 cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location);
1020 if (UDF_SB(inode->i_sb)->s_lvid_bh) {
1021 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
1022 cpu_to_le32(lvid_get_unique_id(sb));
1023 }
1024 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
1025 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
1026 mark_inode_dirty(dir);
1027 if (fibh.sbh != fibh.ebh)
1028 brelse(fibh.ebh);
1029 brelse(fibh.sbh);
1030 d_instantiate(dentry, inode);
1031 err = 0;
1032
1033out: 986out:
1034 kfree(name); 987 kfree(name);
1035 return err; 988 return err;
@@ -1037,6 +990,7 @@ out:
1037out_no_entry: 990out_no_entry:
1038 up_write(&iinfo->i_data_sem); 991 up_write(&iinfo->i_data_sem);
1039 inode_dec_link_count(inode); 992 inode_dec_link_count(inode);
993 unlock_new_inode(inode);
1040 iput(inode); 994 iput(inode);
1041 goto out; 995 goto out;
1042} 996}
@@ -1221,7 +1175,7 @@ static struct dentry *udf_get_parent(struct dentry *child)
1221 struct udf_fileident_bh fibh; 1175 struct udf_fileident_bh fibh;
1222 1176
1223 if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) 1177 if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
1224 goto out_unlock; 1178 return ERR_PTR(-EACCES);
1225 1179
1226 if (fibh.sbh != fibh.ebh) 1180 if (fibh.sbh != fibh.ebh)
1227 brelse(fibh.ebh); 1181 brelse(fibh.ebh);
@@ -1229,12 +1183,10 @@ static struct dentry *udf_get_parent(struct dentry *child)
1229 1183
1230 tloc = lelb_to_cpu(cfi.icb.extLocation); 1184 tloc = lelb_to_cpu(cfi.icb.extLocation);
1231 inode = udf_iget(child->d_inode->i_sb, &tloc); 1185 inode = udf_iget(child->d_inode->i_sb, &tloc);
1232 if (!inode) 1186 if (IS_ERR(inode))
1233 goto out_unlock; 1187 return ERR_CAST(inode);
1234 1188
1235 return d_obtain_alias(inode); 1189 return d_obtain_alias(inode);
1236out_unlock:
1237 return ERR_PTR(-EACCES);
1238} 1190}
1239 1191
1240 1192
@@ -1251,8 +1203,8 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
1251 loc.partitionReferenceNum = partref; 1203 loc.partitionReferenceNum = partref;
1252 inode = udf_iget(sb, &loc); 1204 inode = udf_iget(sb, &loc);
1253 1205
1254 if (inode == NULL) 1206 if (IS_ERR(inode))
1255 return ERR_PTR(-ENOMEM); 1207 return ERR_CAST(inode);
1256 1208
1257 if (generation && inode->i_generation != generation) { 1209 if (generation && inode->i_generation != generation) {
1258 iput(inode); 1210 iput(inode);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 813da94d447b..5401fc33f5cc 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -961,12 +961,14 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
961 961
962 metadata_fe = udf_iget(sb, &addr); 962 metadata_fe = udf_iget(sb, &addr);
963 963
964 if (metadata_fe == NULL) 964 if (IS_ERR(metadata_fe)) {
965 udf_warn(sb, "metadata inode efe not found\n"); 965 udf_warn(sb, "metadata inode efe not found\n");
966 else if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) { 966 return metadata_fe;
967 }
968 if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) {
967 udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n"); 969 udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n");
968 iput(metadata_fe); 970 iput(metadata_fe);
969 metadata_fe = NULL; 971 return ERR_PTR(-EIO);
970 } 972 }
971 973
972 return metadata_fe; 974 return metadata_fe;
@@ -978,6 +980,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
978 struct udf_part_map *map; 980 struct udf_part_map *map;
979 struct udf_meta_data *mdata; 981 struct udf_meta_data *mdata;
980 struct kernel_lb_addr addr; 982 struct kernel_lb_addr addr;
983 struct inode *fe;
981 984
982 map = &sbi->s_partmaps[partition]; 985 map = &sbi->s_partmaps[partition];
983 mdata = &map->s_type_specific.s_metadata; 986 mdata = &map->s_type_specific.s_metadata;
@@ -986,22 +989,24 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
986 udf_debug("Metadata file location: block = %d part = %d\n", 989 udf_debug("Metadata file location: block = %d part = %d\n",
987 mdata->s_meta_file_loc, map->s_partition_num); 990 mdata->s_meta_file_loc, map->s_partition_num);
988 991
989 mdata->s_metadata_fe = udf_find_metadata_inode_efe(sb, 992 fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc,
990 mdata->s_meta_file_loc, map->s_partition_num); 993 map->s_partition_num);
991 994 if (IS_ERR(fe)) {
992 if (mdata->s_metadata_fe == NULL) {
993 /* mirror file entry */ 995 /* mirror file entry */
994 udf_debug("Mirror metadata file location: block = %d part = %d\n", 996 udf_debug("Mirror metadata file location: block = %d part = %d\n",
995 mdata->s_mirror_file_loc, map->s_partition_num); 997 mdata->s_mirror_file_loc, map->s_partition_num);
996 998
997 mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb, 999 fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc,
998 mdata->s_mirror_file_loc, map->s_partition_num); 1000 map->s_partition_num);
999 1001
1000 if (mdata->s_mirror_fe == NULL) { 1002 if (IS_ERR(fe)) {
1001 udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n"); 1003 udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
1002 return -EIO; 1004 return PTR_ERR(fe);
1003 } 1005 }
1004 } 1006 mdata->s_mirror_fe = fe;
1007 } else
1008 mdata->s_metadata_fe = fe;
1009
1005 1010
1006 /* 1011 /*
1007 * bitmap file entry 1012 * bitmap file entry
@@ -1015,15 +1020,16 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
1015 udf_debug("Bitmap file location: block = %d part = %d\n", 1020 udf_debug("Bitmap file location: block = %d part = %d\n",
1016 addr.logicalBlockNum, addr.partitionReferenceNum); 1021 addr.logicalBlockNum, addr.partitionReferenceNum);
1017 1022
1018 mdata->s_bitmap_fe = udf_iget(sb, &addr); 1023 fe = udf_iget(sb, &addr);
1019 if (mdata->s_bitmap_fe == NULL) { 1024 if (IS_ERR(fe)) {
1020 if (sb->s_flags & MS_RDONLY) 1025 if (sb->s_flags & MS_RDONLY)
1021 udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n"); 1026 udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n");
1022 else { 1027 else {
1023 udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n"); 1028 udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n");
1024 return -EIO; 1029 return PTR_ERR(fe);
1025 } 1030 }
1026 } 1031 } else
1032 mdata->s_bitmap_fe = fe;
1027 } 1033 }
1028 1034
1029 udf_debug("udf_load_metadata_files Ok\n"); 1035 udf_debug("udf_load_metadata_files Ok\n");
@@ -1111,13 +1117,15 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1111 phd->unallocSpaceTable.extPosition), 1117 phd->unallocSpaceTable.extPosition),
1112 .partitionReferenceNum = p_index, 1118 .partitionReferenceNum = p_index,
1113 }; 1119 };
1120 struct inode *inode;
1114 1121
1115 map->s_uspace.s_table = udf_iget(sb, &loc); 1122 inode = udf_iget(sb, &loc);
1116 if (!map->s_uspace.s_table) { 1123 if (IS_ERR(inode)) {
1117 udf_debug("cannot load unallocSpaceTable (part %d)\n", 1124 udf_debug("cannot load unallocSpaceTable (part %d)\n",
1118 p_index); 1125 p_index);
1119 return -EIO; 1126 return PTR_ERR(inode);
1120 } 1127 }
1128 map->s_uspace.s_table = inode;
1121 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; 1129 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
1122 udf_debug("unallocSpaceTable (part %d) @ %ld\n", 1130 udf_debug("unallocSpaceTable (part %d) @ %ld\n",
1123 p_index, map->s_uspace.s_table->i_ino); 1131 p_index, map->s_uspace.s_table->i_ino);
@@ -1144,14 +1152,15 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1144 phd->freedSpaceTable.extPosition), 1152 phd->freedSpaceTable.extPosition),
1145 .partitionReferenceNum = p_index, 1153 .partitionReferenceNum = p_index,
1146 }; 1154 };
1155 struct inode *inode;
1147 1156
1148 map->s_fspace.s_table = udf_iget(sb, &loc); 1157 inode = udf_iget(sb, &loc);
1149 if (!map->s_fspace.s_table) { 1158 if (IS_ERR(inode)) {
1150 udf_debug("cannot load freedSpaceTable (part %d)\n", 1159 udf_debug("cannot load freedSpaceTable (part %d)\n",
1151 p_index); 1160 p_index);
1152 return -EIO; 1161 return PTR_ERR(inode);
1153 } 1162 }
1154 1163 map->s_fspace.s_table = inode;
1155 map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; 1164 map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
1156 udf_debug("freedSpaceTable (part %d) @ %ld\n", 1165 udf_debug("freedSpaceTable (part %d) @ %ld\n",
1157 p_index, map->s_fspace.s_table->i_ino); 1166 p_index, map->s_fspace.s_table->i_ino);
@@ -1178,6 +1187,7 @@ static void udf_find_vat_block(struct super_block *sb, int p_index,
1178 struct udf_part_map *map = &sbi->s_partmaps[p_index]; 1187 struct udf_part_map *map = &sbi->s_partmaps[p_index];
1179 sector_t vat_block; 1188 sector_t vat_block;
1180 struct kernel_lb_addr ino; 1189 struct kernel_lb_addr ino;
1190 struct inode *inode;
1181 1191
1182 /* 1192 /*
1183 * VAT file entry is in the last recorded block. Some broken disks have 1193 * VAT file entry is in the last recorded block. Some broken disks have
@@ -1186,10 +1196,13 @@ static void udf_find_vat_block(struct super_block *sb, int p_index,
1186 ino.partitionReferenceNum = type1_index; 1196 ino.partitionReferenceNum = type1_index;
1187 for (vat_block = start_block; 1197 for (vat_block = start_block;
1188 vat_block >= map->s_partition_root && 1198 vat_block >= map->s_partition_root &&
1189 vat_block >= start_block - 3 && 1199 vat_block >= start_block - 3; vat_block--) {
1190 !sbi->s_vat_inode; vat_block--) {
1191 ino.logicalBlockNum = vat_block - map->s_partition_root; 1200 ino.logicalBlockNum = vat_block - map->s_partition_root;
1192 sbi->s_vat_inode = udf_iget(sb, &ino); 1201 inode = udf_iget(sb, &ino);
1202 if (!IS_ERR(inode)) {
1203 sbi->s_vat_inode = inode;
1204 break;
1205 }
1193 } 1206 }
1194} 1207}
1195 1208
@@ -2205,10 +2218,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2205 /* assign inodes by physical block number */ 2218 /* assign inodes by physical block number */
2206 /* perhaps it's not extensible enough, but for now ... */ 2219 /* perhaps it's not extensible enough, but for now ... */
2207 inode = udf_iget(sb, &rootdir); 2220 inode = udf_iget(sb, &rootdir);
2208 if (!inode) { 2221 if (IS_ERR(inode)) {
2209 udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n", 2222 udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n",
2210 rootdir.logicalBlockNum, rootdir.partitionReferenceNum); 2223 rootdir.logicalBlockNum, rootdir.partitionReferenceNum);
2211 ret = -EIO; 2224 ret = PTR_ERR(inode);
2212 goto error_out; 2225 goto error_out;
2213 } 2226 }
2214 2227
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index be7dabbbcb49..742557be9936 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -143,7 +143,6 @@ extern int udf_expand_file_adinicb(struct inode *);
143extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); 143extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
144extern struct buffer_head *udf_bread(struct inode *, int, int, int *); 144extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
145extern int udf_setsize(struct inode *, loff_t); 145extern int udf_setsize(struct inode *, loff_t);
146extern void udf_read_inode(struct inode *);
147extern void udf_evict_inode(struct inode *); 146extern void udf_evict_inode(struct inode *);
148extern int udf_write_inode(struct inode *, struct writeback_control *wbc); 147extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
149extern long udf_block_map(struct inode *, sector_t); 148extern long udf_block_map(struct inode *, sector_t);
@@ -209,7 +208,7 @@ extern int udf_CS0toUTF8(struct ustr *, const struct ustr *);
209 208
210/* ialloc.c */ 209/* ialloc.c */
211extern void udf_free_inode(struct inode *); 210extern void udf_free_inode(struct inode *);
212extern struct inode *udf_new_inode(struct inode *, umode_t, int *); 211extern struct inode *udf_new_inode(struct inode *, umode_t);
213 212
214/* truncate.c */ 213/* truncate.c */
215extern void udf_truncate_tail_extent(struct inode *); 214extern void udf_truncate_tail_extent(struct inode *);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7c580c97990e..be7d42c7d938 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -902,9 +902,6 @@ void ufs_evict_inode(struct inode * inode)
902 invalidate_inode_buffers(inode); 902 invalidate_inode_buffers(inode);
903 clear_inode(inode); 903 clear_inode(inode);
904 904
905 if (want_delete) { 905 if (want_delete)
906 lock_ufs(inode->i_sb); 906 ufs_free_inode(inode);
907 ufs_free_inode (inode);
908 unlock_ufs(inode->i_sb);
909 }
910} 907}
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 90d74b8f8eba..2df62a73f20c 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -126,12 +126,12 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
126 if (l > sb->s_blocksize) 126 if (l > sb->s_blocksize)
127 goto out_notlocked; 127 goto out_notlocked;
128 128
129 lock_ufs(dir->i_sb);
130 inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); 129 inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
131 err = PTR_ERR(inode); 130 err = PTR_ERR(inode);
132 if (IS_ERR(inode)) 131 if (IS_ERR(inode))
133 goto out; 132 goto out_notlocked;
134 133
134 lock_ufs(dir->i_sb);
135 if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { 135 if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) {
136 /* slow symlink */ 136 /* slow symlink */
137 inode->i_op = &ufs_symlink_inode_operations; 137 inode->i_op = &ufs_symlink_inode_operations;
@@ -181,13 +181,9 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
181 struct inode * inode; 181 struct inode * inode;
182 int err; 182 int err;
183 183
184 lock_ufs(dir->i_sb);
185 inode_inc_link_count(dir);
186
187 inode = ufs_new_inode(dir, S_IFDIR|mode); 184 inode = ufs_new_inode(dir, S_IFDIR|mode);
188 err = PTR_ERR(inode);
189 if (IS_ERR(inode)) 185 if (IS_ERR(inode))
190 goto out_dir; 186 return PTR_ERR(inode);
191 187
192 inode->i_op = &ufs_dir_inode_operations; 188 inode->i_op = &ufs_dir_inode_operations;
193 inode->i_fop = &ufs_dir_operations; 189 inode->i_fop = &ufs_dir_operations;
@@ -195,6 +191,9 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
195 191
196 inode_inc_link_count(inode); 192 inode_inc_link_count(inode);
197 193
194 lock_ufs(dir->i_sb);
195 inode_inc_link_count(dir);
196
198 err = ufs_make_empty(inode, dir); 197 err = ufs_make_empty(inode, dir);
199 if (err) 198 if (err)
200 goto out_fail; 199 goto out_fail;
@@ -212,7 +211,6 @@ out_fail:
212 inode_dec_link_count(inode); 211 inode_dec_link_count(inode);
213 inode_dec_link_count(inode); 212 inode_dec_link_count(inode);
214 iput (inode); 213 iput (inode);
215out_dir:
216 inode_dec_link_count(dir); 214 inode_dec_link_count(dir);
217 unlock_ufs(dir->i_sb); 215 unlock_ufs(dir->i_sb);
218 goto out; 216 goto out;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index de2d26d32844..86df952d3e24 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5424,7 +5424,7 @@ xfs_bmap_shift_extents(
5424 struct xfs_bmap_free *flist, 5424 struct xfs_bmap_free *flist,
5425 int num_exts) 5425 int num_exts)
5426{ 5426{
5427 struct xfs_btree_cur *cur; 5427 struct xfs_btree_cur *cur = NULL;
5428 struct xfs_bmbt_rec_host *gotp; 5428 struct xfs_bmbt_rec_host *gotp;
5429 struct xfs_bmbt_irec got; 5429 struct xfs_bmbt_irec got;
5430 struct xfs_bmbt_irec left; 5430 struct xfs_bmbt_irec left;
@@ -5435,7 +5435,7 @@ xfs_bmap_shift_extents(
5435 int error = 0; 5435 int error = 0;
5436 int i; 5436 int i;
5437 int whichfork = XFS_DATA_FORK; 5437 int whichfork = XFS_DATA_FORK;
5438 int logflags; 5438 int logflags = 0;
5439 xfs_filblks_t blockcount = 0; 5439 xfs_filblks_t blockcount = 0;
5440 int total_extents; 5440 int total_extents;
5441 5441
@@ -5478,16 +5478,11 @@ xfs_bmap_shift_extents(
5478 } 5478 }
5479 } 5479 }
5480 5480
5481 /* We are going to change core inode */
5482 logflags = XFS_ILOG_CORE;
5483 if (ifp->if_flags & XFS_IFBROOT) { 5481 if (ifp->if_flags & XFS_IFBROOT) {
5484 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5482 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5485 cur->bc_private.b.firstblock = *firstblock; 5483 cur->bc_private.b.firstblock = *firstblock;
5486 cur->bc_private.b.flist = flist; 5484 cur->bc_private.b.flist = flist;
5487 cur->bc_private.b.flags = 0; 5485 cur->bc_private.b.flags = 0;
5488 } else {
5489 cur = NULL;
5490 logflags |= XFS_ILOG_DEXT;
5491 } 5486 }
5492 5487
5493 /* 5488 /*
@@ -5545,11 +5540,14 @@ xfs_bmap_shift_extents(
5545 blockcount = left.br_blockcount + 5540 blockcount = left.br_blockcount +
5546 got.br_blockcount; 5541 got.br_blockcount;
5547 xfs_iext_remove(ip, *current_ext, 1, 0); 5542 xfs_iext_remove(ip, *current_ext, 1, 0);
5543 logflags |= XFS_ILOG_CORE;
5548 if (cur) { 5544 if (cur) {
5549 error = xfs_btree_delete(cur, &i); 5545 error = xfs_btree_delete(cur, &i);
5550 if (error) 5546 if (error)
5551 goto del_cursor; 5547 goto del_cursor;
5552 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); 5548 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5549 } else {
5550 logflags |= XFS_ILOG_DEXT;
5553 } 5551 }
5554 XFS_IFORK_NEXT_SET(ip, whichfork, 5552 XFS_IFORK_NEXT_SET(ip, whichfork,
5555 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 5553 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
@@ -5575,6 +5573,7 @@ xfs_bmap_shift_extents(
5575 got.br_startoff = startoff; 5573 got.br_startoff = startoff;
5576 } 5574 }
5577 5575
5576 logflags |= XFS_ILOG_CORE;
5578 if (cur) { 5577 if (cur) {
5579 error = xfs_bmbt_update(cur, got.br_startoff, 5578 error = xfs_bmbt_update(cur, got.br_startoff,
5580 got.br_startblock, 5579 got.br_startblock,
@@ -5582,6 +5581,8 @@ xfs_bmap_shift_extents(
5582 got.br_state); 5581 got.br_state);
5583 if (error) 5582 if (error)
5584 goto del_cursor; 5583 goto del_cursor;
5584 } else {
5585 logflags |= XFS_ILOG_DEXT;
5585 } 5586 }
5586 5587
5587 (*current_ext)++; 5588 (*current_ext)++;
@@ -5597,6 +5598,7 @@ del_cursor:
5597 xfs_btree_del_cursor(cur, 5598 xfs_btree_del_cursor(cur,
5598 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5599 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5599 5600
5600 xfs_trans_log_inode(tp, ip, logflags); 5601 if (logflags)
5602 xfs_trans_log_inode(tp, ip, logflags);
5601 return error; 5603 return error;
5602} 5604}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 11e9b4caa54f..b984647c24db 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1753,11 +1753,72 @@ xfs_vm_readpages(
1753 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1753 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1754} 1754}
1755 1755
1756/*
1757 * This is basically a copy of __set_page_dirty_buffers() with one
1758 * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
1759 * dirty, we'll never be able to clean them because we don't write buffers
1760 * beyond EOF, and that means we can't invalidate pages that span EOF
1761 * that have been marked dirty. Further, the dirty state can leak into
1762 * the file interior if the file is extended, resulting in all sorts of
1763 * bad things happening as the state does not match the underlying data.
1764 *
1765 * XXX: this really indicates that bufferheads in XFS need to die. Warts like
1766 * this only exist because of bufferheads and how the generic code manages them.
1767 */
1768STATIC int
1769xfs_vm_set_page_dirty(
1770 struct page *page)
1771{
1772 struct address_space *mapping = page->mapping;
1773 struct inode *inode = mapping->host;
1774 loff_t end_offset;
1775 loff_t offset;
1776 int newly_dirty;
1777
1778 if (unlikely(!mapping))
1779 return !TestSetPageDirty(page);
1780
1781 end_offset = i_size_read(inode);
1782 offset = page_offset(page);
1783
1784 spin_lock(&mapping->private_lock);
1785 if (page_has_buffers(page)) {
1786 struct buffer_head *head = page_buffers(page);
1787 struct buffer_head *bh = head;
1788
1789 do {
1790 if (offset < end_offset)
1791 set_buffer_dirty(bh);
1792 bh = bh->b_this_page;
1793 offset += 1 << inode->i_blkbits;
1794 } while (bh != head);
1795 }
1796 newly_dirty = !TestSetPageDirty(page);
1797 spin_unlock(&mapping->private_lock);
1798
1799 if (newly_dirty) {
1800 /* sigh - __set_page_dirty() is static, so copy it here, too */
1801 unsigned long flags;
1802
1803 spin_lock_irqsave(&mapping->tree_lock, flags);
1804 if (page->mapping) { /* Race with truncate? */
1805 WARN_ON_ONCE(!PageUptodate(page));
1806 account_page_dirtied(page, mapping);
1807 radix_tree_tag_set(&mapping->page_tree,
1808 page_index(page), PAGECACHE_TAG_DIRTY);
1809 }
1810 spin_unlock_irqrestore(&mapping->tree_lock, flags);
1811 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1812 }
1813 return newly_dirty;
1814}
1815
1756const struct address_space_operations xfs_address_space_operations = { 1816const struct address_space_operations xfs_address_space_operations = {
1757 .readpage = xfs_vm_readpage, 1817 .readpage = xfs_vm_readpage,
1758 .readpages = xfs_vm_readpages, 1818 .readpages = xfs_vm_readpages,
1759 .writepage = xfs_vm_writepage, 1819 .writepage = xfs_vm_writepage,
1760 .writepages = xfs_vm_writepages, 1820 .writepages = xfs_vm_writepages,
1821 .set_page_dirty = xfs_vm_set_page_dirty,
1761 .releasepage = xfs_vm_releasepage, 1822 .releasepage = xfs_vm_releasepage,
1762 .invalidatepage = xfs_vm_invalidatepage, 1823 .invalidatepage = xfs_vm_invalidatepage,
1763 .write_begin = xfs_vm_write_begin, 1824 .write_begin = xfs_vm_write_begin,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2f1e30d39a35..1707980f9a4b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1470,6 +1470,26 @@ xfs_collapse_file_space(
1470 start_fsb = XFS_B_TO_FSB(mp, offset + len); 1470 start_fsb = XFS_B_TO_FSB(mp, offset + len);
1471 shift_fsb = XFS_B_TO_FSB(mp, len); 1471 shift_fsb = XFS_B_TO_FSB(mp, len);
1472 1472
1473 /*
1474 * Writeback the entire file and force remove any post-eof blocks. The
1475 * writeback prevents changes to the extent list via concurrent
1476 * writeback and the eofblocks trim prevents the extent shift algorithm
1477 * from running into a post-eof delalloc extent.
1478 *
1479 * XXX: This is a temporary fix until the extent shift loop below is
1480 * converted to use offsets and lookups within the ILOCK rather than
1481 * carrying around the index into the extent list for the next
1482 * iteration.
1483 */
1484 error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
1485 if (error)
1486 return error;
1487 if (xfs_can_free_eofblocks(ip, true)) {
1488 error = xfs_free_eofblocks(mp, ip, false);
1489 if (error)
1490 return error;
1491 }
1492
1473 error = xfs_free_file_space(ip, offset, len); 1493 error = xfs_free_file_space(ip, offset, len);
1474 if (error) 1494 if (error)
1475 return error; 1495 return error;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 076b1708d134..de5368c803f9 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -291,12 +291,22 @@ xfs_file_read_iter(
291 if (inode->i_mapping->nrpages) { 291 if (inode->i_mapping->nrpages) {
292 ret = filemap_write_and_wait_range( 292 ret = filemap_write_and_wait_range(
293 VFS_I(ip)->i_mapping, 293 VFS_I(ip)->i_mapping,
294 pos, -1); 294 pos, pos + size - 1);
295 if (ret) { 295 if (ret) {
296 xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); 296 xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
297 return ret; 297 return ret;
298 } 298 }
299 truncate_pagecache_range(VFS_I(ip), pos, -1); 299
300 /*
301 * Invalidate whole pages. This can return an error if
302 * we fail to invalidate a page, but this should never
303 * happen on XFS. Warn if it does fail.
304 */
305 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
306 pos >> PAGE_CACHE_SHIFT,
307 (pos + size - 1) >> PAGE_CACHE_SHIFT);
308 WARN_ON_ONCE(ret);
309 ret = 0;
300 } 310 }
301 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); 311 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
302 } 312 }
@@ -632,10 +642,19 @@ xfs_file_dio_aio_write(
632 642
633 if (mapping->nrpages) { 643 if (mapping->nrpages) {
634 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 644 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
635 pos, -1); 645 pos, pos + count - 1);
636 if (ret) 646 if (ret)
637 goto out; 647 goto out;
638 truncate_pagecache_range(VFS_I(ip), pos, -1); 648 /*
649 * Invalidate whole pages. This can return an error if
650 * we fail to invalidate a page, but this should never
651 * happen on XFS. Warn if it does fail.
652 */
653 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
654 pos >> PAGE_CACHE_SHIFT,
655 (pos + count - 1) >> PAGE_CACHE_SHIFT);
656 WARN_ON_ONCE(ret);
657 ret = 0;
639 } 658 }
640 659
641 /* 660 /*