diff options
Diffstat (limited to 'fs')
264 files changed, 10943 insertions, 7595 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 312393f32948..db5dc1598716 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -233,9 +233,13 @@ if NETWORK_FILESYSTEMS | |||
233 | source "fs/nfs/Kconfig" | 233 | source "fs/nfs/Kconfig" |
234 | source "fs/nfsd/Kconfig" | 234 | source "fs/nfsd/Kconfig" |
235 | 235 | ||
236 | config GRACE_PERIOD | ||
237 | tristate | ||
238 | |||
236 | config LOCKD | 239 | config LOCKD |
237 | tristate | 240 | tristate |
238 | depends on FILE_LOCKING | 241 | depends on FILE_LOCKING |
242 | select GRACE_PERIOD | ||
239 | 243 | ||
240 | config LOCKD_V4 | 244 | config LOCKD_V4 |
241 | bool | 245 | bool |
@@ -249,7 +253,7 @@ config NFS_ACL_SUPPORT | |||
249 | 253 | ||
250 | config NFS_COMMON | 254 | config NFS_COMMON |
251 | bool | 255 | bool |
252 | depends on NFSD || NFS_FS | 256 | depends on NFSD || NFS_FS || LOCKD |
253 | default y | 257 | default y |
254 | 258 | ||
255 | source "net/sunrpc/Kconfig" | 259 | source "net/sunrpc/Kconfig" |
@@ -141,6 +141,7 @@ struct kioctx { | |||
141 | 141 | ||
142 | struct { | 142 | struct { |
143 | unsigned tail; | 143 | unsigned tail; |
144 | unsigned completed_events; | ||
144 | spinlock_t completion_lock; | 145 | spinlock_t completion_lock; |
145 | } ____cacheline_aligned_in_smp; | 146 | } ____cacheline_aligned_in_smp; |
146 | 147 | ||
@@ -660,10 +661,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
660 | 661 | ||
661 | INIT_LIST_HEAD(&ctx->active_reqs); | 662 | INIT_LIST_HEAD(&ctx->active_reqs); |
662 | 663 | ||
663 | if (percpu_ref_init(&ctx->users, free_ioctx_users)) | 664 | if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL)) |
664 | goto err; | 665 | goto err; |
665 | 666 | ||
666 | if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) | 667 | if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL)) |
667 | goto err; | 668 | goto err; |
668 | 669 | ||
669 | ctx->cpu = alloc_percpu(struct kioctx_cpu); | 670 | ctx->cpu = alloc_percpu(struct kioctx_cpu); |
@@ -792,6 +793,8 @@ void exit_aio(struct mm_struct *mm) | |||
792 | 793 | ||
793 | for (i = 0; i < table->nr; ++i) { | 794 | for (i = 0; i < table->nr; ++i) { |
794 | struct kioctx *ctx = table->table[i]; | 795 | struct kioctx *ctx = table->table[i]; |
796 | struct completion requests_done = | ||
797 | COMPLETION_INITIALIZER_ONSTACK(requests_done); | ||
795 | 798 | ||
796 | if (!ctx) | 799 | if (!ctx) |
797 | continue; | 800 | continue; |
@@ -803,7 +806,10 @@ void exit_aio(struct mm_struct *mm) | |||
803 | * that it needs to unmap the area, just set it to 0. | 806 | * that it needs to unmap the area, just set it to 0. |
804 | */ | 807 | */ |
805 | ctx->mmap_size = 0; | 808 | ctx->mmap_size = 0; |
806 | kill_ioctx(mm, ctx, NULL); | 809 | kill_ioctx(mm, ctx, &requests_done); |
810 | |||
811 | /* Wait until all IO for the context are done. */ | ||
812 | wait_for_completion(&requests_done); | ||
807 | } | 813 | } |
808 | 814 | ||
809 | RCU_INIT_POINTER(mm->ioctx_table, NULL); | 815 | RCU_INIT_POINTER(mm->ioctx_table, NULL); |
@@ -857,6 +863,68 @@ out: | |||
857 | return ret; | 863 | return ret; |
858 | } | 864 | } |
859 | 865 | ||
866 | /* refill_reqs_available | ||
867 | * Updates the reqs_available reference counts used for tracking the | ||
868 | * number of free slots in the completion ring. This can be called | ||
869 | * from aio_complete() (to optimistically update reqs_available) or | ||
870 | * from aio_get_req() (the we're out of events case). It must be | ||
871 | * called holding ctx->completion_lock. | ||
872 | */ | ||
873 | static void refill_reqs_available(struct kioctx *ctx, unsigned head, | ||
874 | unsigned tail) | ||
875 | { | ||
876 | unsigned events_in_ring, completed; | ||
877 | |||
878 | /* Clamp head since userland can write to it. */ | ||
879 | head %= ctx->nr_events; | ||
880 | if (head <= tail) | ||
881 | events_in_ring = tail - head; | ||
882 | else | ||
883 | events_in_ring = ctx->nr_events - (head - tail); | ||
884 | |||
885 | completed = ctx->completed_events; | ||
886 | if (events_in_ring < completed) | ||
887 | completed -= events_in_ring; | ||
888 | else | ||
889 | completed = 0; | ||
890 | |||
891 | if (!completed) | ||
892 | return; | ||
893 | |||
894 | ctx->completed_events -= completed; | ||
895 | put_reqs_available(ctx, completed); | ||
896 | } | ||
897 | |||
898 | /* user_refill_reqs_available | ||
899 | * Called to refill reqs_available when aio_get_req() encounters an | ||
900 | * out of space in the completion ring. | ||
901 | */ | ||
902 | static void user_refill_reqs_available(struct kioctx *ctx) | ||
903 | { | ||
904 | spin_lock_irq(&ctx->completion_lock); | ||
905 | if (ctx->completed_events) { | ||
906 | struct aio_ring *ring; | ||
907 | unsigned head; | ||
908 | |||
909 | /* Access of ring->head may race with aio_read_events_ring() | ||
910 | * here, but that's okay since whether we read the old version | ||
911 | * or the new version, and either will be valid. The important | ||
912 | * part is that head cannot pass tail since we prevent | ||
913 | * aio_complete() from updating tail by holding | ||
914 | * ctx->completion_lock. Even if head is invalid, the check | ||
915 | * against ctx->completed_events below will make sure we do the | ||
916 | * safe/right thing. | ||
917 | */ | ||
918 | ring = kmap_atomic(ctx->ring_pages[0]); | ||
919 | head = ring->head; | ||
920 | kunmap_atomic(ring); | ||
921 | |||
922 | refill_reqs_available(ctx, head, ctx->tail); | ||
923 | } | ||
924 | |||
925 | spin_unlock_irq(&ctx->completion_lock); | ||
926 | } | ||
927 | |||
860 | /* aio_get_req | 928 | /* aio_get_req |
861 | * Allocate a slot for an aio request. | 929 | * Allocate a slot for an aio request. |
862 | * Returns NULL if no requests are free. | 930 | * Returns NULL if no requests are free. |
@@ -865,8 +933,11 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) | |||
865 | { | 933 | { |
866 | struct kiocb *req; | 934 | struct kiocb *req; |
867 | 935 | ||
868 | if (!get_reqs_available(ctx)) | 936 | if (!get_reqs_available(ctx)) { |
869 | return NULL; | 937 | user_refill_reqs_available(ctx); |
938 | if (!get_reqs_available(ctx)) | ||
939 | return NULL; | ||
940 | } | ||
870 | 941 | ||
871 | req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); | 942 | req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); |
872 | if (unlikely(!req)) | 943 | if (unlikely(!req)) |
@@ -925,8 +996,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
925 | struct kioctx *ctx = iocb->ki_ctx; | 996 | struct kioctx *ctx = iocb->ki_ctx; |
926 | struct aio_ring *ring; | 997 | struct aio_ring *ring; |
927 | struct io_event *ev_page, *event; | 998 | struct io_event *ev_page, *event; |
999 | unsigned tail, pos, head; | ||
928 | unsigned long flags; | 1000 | unsigned long flags; |
929 | unsigned tail, pos; | ||
930 | 1001 | ||
931 | /* | 1002 | /* |
932 | * Special case handling for sync iocbs: | 1003 | * Special case handling for sync iocbs: |
@@ -987,10 +1058,14 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
987 | ctx->tail = tail; | 1058 | ctx->tail = tail; |
988 | 1059 | ||
989 | ring = kmap_atomic(ctx->ring_pages[0]); | 1060 | ring = kmap_atomic(ctx->ring_pages[0]); |
1061 | head = ring->head; | ||
990 | ring->tail = tail; | 1062 | ring->tail = tail; |
991 | kunmap_atomic(ring); | 1063 | kunmap_atomic(ring); |
992 | flush_dcache_page(ctx->ring_pages[0]); | 1064 | flush_dcache_page(ctx->ring_pages[0]); |
993 | 1065 | ||
1066 | ctx->completed_events++; | ||
1067 | if (ctx->completed_events > 1) | ||
1068 | refill_reqs_available(ctx, head, tail); | ||
994 | spin_unlock_irqrestore(&ctx->completion_lock, flags); | 1069 | spin_unlock_irqrestore(&ctx->completion_lock, flags); |
995 | 1070 | ||
996 | pr_debug("added to ring %p at [%u]\n", iocb, tail); | 1071 | pr_debug("added to ring %p at [%u]\n", iocb, tail); |
@@ -1005,7 +1080,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
1005 | 1080 | ||
1006 | /* everything turned out well, dispose of the aiocb. */ | 1081 | /* everything turned out well, dispose of the aiocb. */ |
1007 | kiocb_free(iocb); | 1082 | kiocb_free(iocb); |
1008 | put_reqs_available(ctx, 1); | ||
1009 | 1083 | ||
1010 | /* | 1084 | /* |
1011 | * We have to order our ring_info tail store above and test | 1085 | * We have to order our ring_info tail store above and test |
@@ -1042,6 +1116,12 @@ static long aio_read_events_ring(struct kioctx *ctx, | |||
1042 | tail = ring->tail; | 1116 | tail = ring->tail; |
1043 | kunmap_atomic(ring); | 1117 | kunmap_atomic(ring); |
1044 | 1118 | ||
1119 | /* | ||
1120 | * Ensure that once we've read the current tail pointer, that | ||
1121 | * we also see the events that were stored up to the tail. | ||
1122 | */ | ||
1123 | smp_rmb(); | ||
1124 | |||
1045 | pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); | 1125 | pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); |
1046 | 1126 | ||
1047 | if (head == tail) | 1127 | if (head == tail) |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 6d7274619bf9..e2f3ad0879ce 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -304,6 +304,12 @@ static int blkdev_readpage(struct file * file, struct page * page) | |||
304 | return block_read_full_page(page, blkdev_get_block); | 304 | return block_read_full_page(page, blkdev_get_block); |
305 | } | 305 | } |
306 | 306 | ||
307 | static int blkdev_readpages(struct file *file, struct address_space *mapping, | ||
308 | struct list_head *pages, unsigned nr_pages) | ||
309 | { | ||
310 | return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block); | ||
311 | } | ||
312 | |||
307 | static int blkdev_write_begin(struct file *file, struct address_space *mapping, | 313 | static int blkdev_write_begin(struct file *file, struct address_space *mapping, |
308 | loff_t pos, unsigned len, unsigned flags, | 314 | loff_t pos, unsigned len, unsigned flags, |
309 | struct page **pagep, void **fsdata) | 315 | struct page **pagep, void **fsdata) |
@@ -1622,6 +1628,7 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) | |||
1622 | 1628 | ||
1623 | static const struct address_space_operations def_blk_aops = { | 1629 | static const struct address_space_operations def_blk_aops = { |
1624 | .readpage = blkdev_readpage, | 1630 | .readpage = blkdev_readpage, |
1631 | .readpages = blkdev_readpages, | ||
1625 | .writepage = blkdev_writepage, | 1632 | .writepage = blkdev_writepage, |
1626 | .write_begin = blkdev_write_begin, | 1633 | .write_begin = blkdev_write_begin, |
1627 | .write_end = blkdev_write_end, | 1634 | .write_end = blkdev_write_end, |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 5a201d81049c..4dabeb893b7c 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/list.h> | 22 | #include <linux/list.h> |
23 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
24 | #include <linux/freezer.h> | 24 | #include <linux/freezer.h> |
25 | #include <linux/workqueue.h> | ||
26 | #include "async-thread.h" | 25 | #include "async-thread.h" |
27 | #include "ctree.h" | 26 | #include "ctree.h" |
28 | 27 | ||
@@ -55,13 +54,45 @@ struct btrfs_workqueue { | |||
55 | struct __btrfs_workqueue *high; | 54 | struct __btrfs_workqueue *high; |
56 | }; | 55 | }; |
57 | 56 | ||
58 | static inline struct __btrfs_workqueue | 57 | static void normal_work_helper(struct btrfs_work *work); |
59 | *__btrfs_alloc_workqueue(const char *name, int flags, int max_active, | 58 | |
59 | #define BTRFS_WORK_HELPER(name) \ | ||
60 | void btrfs_##name(struct work_struct *arg) \ | ||
61 | { \ | ||
62 | struct btrfs_work *work = container_of(arg, struct btrfs_work, \ | ||
63 | normal_work); \ | ||
64 | normal_work_helper(work); \ | ||
65 | } | ||
66 | |||
67 | BTRFS_WORK_HELPER(worker_helper); | ||
68 | BTRFS_WORK_HELPER(delalloc_helper); | ||
69 | BTRFS_WORK_HELPER(flush_delalloc_helper); | ||
70 | BTRFS_WORK_HELPER(cache_helper); | ||
71 | BTRFS_WORK_HELPER(submit_helper); | ||
72 | BTRFS_WORK_HELPER(fixup_helper); | ||
73 | BTRFS_WORK_HELPER(endio_helper); | ||
74 | BTRFS_WORK_HELPER(endio_meta_helper); | ||
75 | BTRFS_WORK_HELPER(endio_meta_write_helper); | ||
76 | BTRFS_WORK_HELPER(endio_raid56_helper); | ||
77 | BTRFS_WORK_HELPER(endio_repair_helper); | ||
78 | BTRFS_WORK_HELPER(rmw_helper); | ||
79 | BTRFS_WORK_HELPER(endio_write_helper); | ||
80 | BTRFS_WORK_HELPER(freespace_write_helper); | ||
81 | BTRFS_WORK_HELPER(delayed_meta_helper); | ||
82 | BTRFS_WORK_HELPER(readahead_helper); | ||
83 | BTRFS_WORK_HELPER(qgroup_rescan_helper); | ||
84 | BTRFS_WORK_HELPER(extent_refs_helper); | ||
85 | BTRFS_WORK_HELPER(scrub_helper); | ||
86 | BTRFS_WORK_HELPER(scrubwrc_helper); | ||
87 | BTRFS_WORK_HELPER(scrubnc_helper); | ||
88 | |||
89 | static struct __btrfs_workqueue * | ||
90 | __btrfs_alloc_workqueue(const char *name, int flags, int max_active, | ||
60 | int thresh) | 91 | int thresh) |
61 | { | 92 | { |
62 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | 93 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); |
63 | 94 | ||
64 | if (unlikely(!ret)) | 95 | if (!ret) |
65 | return NULL; | 96 | return NULL; |
66 | 97 | ||
67 | ret->max_active = max_active; | 98 | ret->max_active = max_active; |
@@ -85,7 +116,7 @@ static inline struct __btrfs_workqueue | |||
85 | ret->normal_wq = alloc_workqueue("%s-%s", flags, | 116 | ret->normal_wq = alloc_workqueue("%s-%s", flags, |
86 | ret->max_active, "btrfs", | 117 | ret->max_active, "btrfs", |
87 | name); | 118 | name); |
88 | if (unlikely(!ret->normal_wq)) { | 119 | if (!ret->normal_wq) { |
89 | kfree(ret); | 120 | kfree(ret); |
90 | return NULL; | 121 | return NULL; |
91 | } | 122 | } |
@@ -107,12 +138,12 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, | |||
107 | { | 138 | { |
108 | struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | 139 | struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); |
109 | 140 | ||
110 | if (unlikely(!ret)) | 141 | if (!ret) |
111 | return NULL; | 142 | return NULL; |
112 | 143 | ||
113 | ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, | 144 | ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, |
114 | max_active, thresh); | 145 | max_active, thresh); |
115 | if (unlikely(!ret->normal)) { | 146 | if (!ret->normal) { |
116 | kfree(ret); | 147 | kfree(ret); |
117 | return NULL; | 148 | return NULL; |
118 | } | 149 | } |
@@ -120,7 +151,7 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, | |||
120 | if (flags & WQ_HIGHPRI) { | 151 | if (flags & WQ_HIGHPRI) { |
121 | ret->high = __btrfs_alloc_workqueue(name, flags, max_active, | 152 | ret->high = __btrfs_alloc_workqueue(name, flags, max_active, |
122 | thresh); | 153 | thresh); |
123 | if (unlikely(!ret->high)) { | 154 | if (!ret->high) { |
124 | __btrfs_destroy_workqueue(ret->normal); | 155 | __btrfs_destroy_workqueue(ret->normal); |
125 | kfree(ret); | 156 | kfree(ret); |
126 | return NULL; | 157 | return NULL; |
@@ -232,13 +263,11 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) | |||
232 | spin_unlock_irqrestore(lock, flags); | 263 | spin_unlock_irqrestore(lock, flags); |
233 | } | 264 | } |
234 | 265 | ||
235 | static void normal_work_helper(struct work_struct *arg) | 266 | static void normal_work_helper(struct btrfs_work *work) |
236 | { | 267 | { |
237 | struct btrfs_work *work; | ||
238 | struct __btrfs_workqueue *wq; | 268 | struct __btrfs_workqueue *wq; |
239 | int need_order = 0; | 269 | int need_order = 0; |
240 | 270 | ||
241 | work = container_of(arg, struct btrfs_work, normal_work); | ||
242 | /* | 271 | /* |
243 | * We should not touch things inside work in the following cases: | 272 | * We should not touch things inside work in the following cases: |
244 | * 1) after work->func() if it has no ordered_free | 273 | * 1) after work->func() if it has no ordered_free |
@@ -262,7 +291,7 @@ static void normal_work_helper(struct work_struct *arg) | |||
262 | trace_btrfs_all_work_done(work); | 291 | trace_btrfs_all_work_done(work); |
263 | } | 292 | } |
264 | 293 | ||
265 | void btrfs_init_work(struct btrfs_work *work, | 294 | void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, |
266 | btrfs_func_t func, | 295 | btrfs_func_t func, |
267 | btrfs_func_t ordered_func, | 296 | btrfs_func_t ordered_func, |
268 | btrfs_func_t ordered_free) | 297 | btrfs_func_t ordered_free) |
@@ -270,7 +299,7 @@ void btrfs_init_work(struct btrfs_work *work, | |||
270 | work->func = func; | 299 | work->func = func; |
271 | work->ordered_func = ordered_func; | 300 | work->ordered_func = ordered_func; |
272 | work->ordered_free = ordered_free; | 301 | work->ordered_free = ordered_free; |
273 | INIT_WORK(&work->normal_work, normal_work_helper); | 302 | INIT_WORK(&work->normal_work, uniq_func); |
274 | INIT_LIST_HEAD(&work->ordered_list); | 303 | INIT_LIST_HEAD(&work->ordered_list); |
275 | work->flags = 0; | 304 | work->flags = 0; |
276 | } | 305 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 9c6b66d15fb0..e386c29ef1f6 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -19,12 +19,14 @@ | |||
19 | 19 | ||
20 | #ifndef __BTRFS_ASYNC_THREAD_ | 20 | #ifndef __BTRFS_ASYNC_THREAD_ |
21 | #define __BTRFS_ASYNC_THREAD_ | 21 | #define __BTRFS_ASYNC_THREAD_ |
22 | #include <linux/workqueue.h> | ||
22 | 23 | ||
23 | struct btrfs_workqueue; | 24 | struct btrfs_workqueue; |
24 | /* Internal use only */ | 25 | /* Internal use only */ |
25 | struct __btrfs_workqueue; | 26 | struct __btrfs_workqueue; |
26 | struct btrfs_work; | 27 | struct btrfs_work; |
27 | typedef void (*btrfs_func_t)(struct btrfs_work *arg); | 28 | typedef void (*btrfs_func_t)(struct btrfs_work *arg); |
29 | typedef void (*btrfs_work_func_t)(struct work_struct *arg); | ||
28 | 30 | ||
29 | struct btrfs_work { | 31 | struct btrfs_work { |
30 | btrfs_func_t func; | 32 | btrfs_func_t func; |
@@ -38,11 +40,36 @@ struct btrfs_work { | |||
38 | unsigned long flags; | 40 | unsigned long flags; |
39 | }; | 41 | }; |
40 | 42 | ||
43 | #define BTRFS_WORK_HELPER_PROTO(name) \ | ||
44 | void btrfs_##name(struct work_struct *arg) | ||
45 | |||
46 | BTRFS_WORK_HELPER_PROTO(worker_helper); | ||
47 | BTRFS_WORK_HELPER_PROTO(delalloc_helper); | ||
48 | BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper); | ||
49 | BTRFS_WORK_HELPER_PROTO(cache_helper); | ||
50 | BTRFS_WORK_HELPER_PROTO(submit_helper); | ||
51 | BTRFS_WORK_HELPER_PROTO(fixup_helper); | ||
52 | BTRFS_WORK_HELPER_PROTO(endio_helper); | ||
53 | BTRFS_WORK_HELPER_PROTO(endio_meta_helper); | ||
54 | BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper); | ||
55 | BTRFS_WORK_HELPER_PROTO(endio_raid56_helper); | ||
56 | BTRFS_WORK_HELPER_PROTO(endio_repair_helper); | ||
57 | BTRFS_WORK_HELPER_PROTO(rmw_helper); | ||
58 | BTRFS_WORK_HELPER_PROTO(endio_write_helper); | ||
59 | BTRFS_WORK_HELPER_PROTO(freespace_write_helper); | ||
60 | BTRFS_WORK_HELPER_PROTO(delayed_meta_helper); | ||
61 | BTRFS_WORK_HELPER_PROTO(readahead_helper); | ||
62 | BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper); | ||
63 | BTRFS_WORK_HELPER_PROTO(extent_refs_helper); | ||
64 | BTRFS_WORK_HELPER_PROTO(scrub_helper); | ||
65 | BTRFS_WORK_HELPER_PROTO(scrubwrc_helper); | ||
66 | BTRFS_WORK_HELPER_PROTO(scrubnc_helper); | ||
67 | |||
41 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, | 68 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
42 | int flags, | 69 | int flags, |
43 | int max_active, | 70 | int max_active, |
44 | int thresh); | 71 | int thresh); |
45 | void btrfs_init_work(struct btrfs_work *work, | 72 | void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, |
46 | btrfs_func_t func, | 73 | btrfs_func_t func, |
47 | btrfs_func_t ordered_func, | 74 | btrfs_func_t ordered_func, |
48 | btrfs_func_t ordered_free); | 75 | btrfs_func_t ordered_free); |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 54a201dac7f9..2d3e32ebfd15 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -25,6 +25,9 @@ | |||
25 | #include "delayed-ref.h" | 25 | #include "delayed-ref.h" |
26 | #include "locking.h" | 26 | #include "locking.h" |
27 | 27 | ||
28 | /* Just an arbitrary number so we can be sure this happened */ | ||
29 | #define BACKREF_FOUND_SHARED 6 | ||
30 | |||
28 | struct extent_inode_elem { | 31 | struct extent_inode_elem { |
29 | u64 inum; | 32 | u64 inum; |
30 | u64 offset; | 33 | u64 offset; |
@@ -377,7 +380,8 @@ out: | |||
377 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 380 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
378 | struct btrfs_path *path, u64 time_seq, | 381 | struct btrfs_path *path, u64 time_seq, |
379 | struct list_head *head, | 382 | struct list_head *head, |
380 | const u64 *extent_item_pos, u64 total_refs) | 383 | const u64 *extent_item_pos, u64 total_refs, |
384 | u64 root_objectid) | ||
381 | { | 385 | { |
382 | int err; | 386 | int err; |
383 | int ret = 0; | 387 | int ret = 0; |
@@ -402,6 +406,10 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
402 | continue; | 406 | continue; |
403 | if (ref->count == 0) | 407 | if (ref->count == 0) |
404 | continue; | 408 | continue; |
409 | if (root_objectid && ref->root_id != root_objectid) { | ||
410 | ret = BACKREF_FOUND_SHARED; | ||
411 | goto out; | ||
412 | } | ||
405 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, | 413 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, |
406 | parents, extent_item_pos, | 414 | parents, extent_item_pos, |
407 | total_refs); | 415 | total_refs); |
@@ -482,7 +490,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, | |||
482 | continue; | 490 | continue; |
483 | BUG_ON(!ref->wanted_disk_byte); | 491 | BUG_ON(!ref->wanted_disk_byte); |
484 | eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, | 492 | eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, |
485 | fs_info->tree_root->leafsize, 0); | 493 | 0); |
486 | if (!eb || !extent_buffer_uptodate(eb)) { | 494 | if (!eb || !extent_buffer_uptodate(eb)) { |
487 | free_extent_buffer(eb); | 495 | free_extent_buffer(eb); |
488 | return -EIO; | 496 | return -EIO; |
@@ -561,7 +569,8 @@ static void __merge_refs(struct list_head *head, int mode) | |||
561 | * smaller or equal that seq to the list | 569 | * smaller or equal that seq to the list |
562 | */ | 570 | */ |
563 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | 571 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, |
564 | struct list_head *prefs, u64 *total_refs) | 572 | struct list_head *prefs, u64 *total_refs, |
573 | u64 inum) | ||
565 | { | 574 | { |
566 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | 575 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
567 | struct rb_node *n = &head->node.rb_node; | 576 | struct rb_node *n = &head->node.rb_node; |
@@ -625,6 +634,16 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
625 | key.objectid = ref->objectid; | 634 | key.objectid = ref->objectid; |
626 | key.type = BTRFS_EXTENT_DATA_KEY; | 635 | key.type = BTRFS_EXTENT_DATA_KEY; |
627 | key.offset = ref->offset; | 636 | key.offset = ref->offset; |
637 | |||
638 | /* | ||
639 | * Found a inum that doesn't match our known inum, we | ||
640 | * know it's shared. | ||
641 | */ | ||
642 | if (inum && ref->objectid != inum) { | ||
643 | ret = BACKREF_FOUND_SHARED; | ||
644 | break; | ||
645 | } | ||
646 | |||
628 | ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, | 647 | ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, |
629 | node->bytenr, | 648 | node->bytenr, |
630 | node->ref_mod * sgn, GFP_ATOMIC); | 649 | node->ref_mod * sgn, GFP_ATOMIC); |
@@ -659,7 +678,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
659 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | 678 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, |
660 | struct btrfs_path *path, u64 bytenr, | 679 | struct btrfs_path *path, u64 bytenr, |
661 | int *info_level, struct list_head *prefs, | 680 | int *info_level, struct list_head *prefs, |
662 | u64 *total_refs) | 681 | u64 *total_refs, u64 inum) |
663 | { | 682 | { |
664 | int ret = 0; | 683 | int ret = 0; |
665 | int slot; | 684 | int slot; |
@@ -744,6 +763,12 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
744 | dref); | 763 | dref); |
745 | key.type = BTRFS_EXTENT_DATA_KEY; | 764 | key.type = BTRFS_EXTENT_DATA_KEY; |
746 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 765 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
766 | |||
767 | if (inum && key.objectid != inum) { | ||
768 | ret = BACKREF_FOUND_SHARED; | ||
769 | break; | ||
770 | } | ||
771 | |||
747 | root = btrfs_extent_data_ref_root(leaf, dref); | 772 | root = btrfs_extent_data_ref_root(leaf, dref); |
748 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, | 773 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
749 | bytenr, count, GFP_NOFS); | 774 | bytenr, count, GFP_NOFS); |
@@ -765,7 +790,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
765 | */ | 790 | */ |
766 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | 791 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, |
767 | struct btrfs_path *path, u64 bytenr, | 792 | struct btrfs_path *path, u64 bytenr, |
768 | int info_level, struct list_head *prefs) | 793 | int info_level, struct list_head *prefs, u64 inum) |
769 | { | 794 | { |
770 | struct btrfs_root *extent_root = fs_info->extent_root; | 795 | struct btrfs_root *extent_root = fs_info->extent_root; |
771 | int ret; | 796 | int ret; |
@@ -827,6 +852,12 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
827 | dref); | 852 | dref); |
828 | key.type = BTRFS_EXTENT_DATA_KEY; | 853 | key.type = BTRFS_EXTENT_DATA_KEY; |
829 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 854 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
855 | |||
856 | if (inum && key.objectid != inum) { | ||
857 | ret = BACKREF_FOUND_SHARED; | ||
858 | break; | ||
859 | } | ||
860 | |||
830 | root = btrfs_extent_data_ref_root(leaf, dref); | 861 | root = btrfs_extent_data_ref_root(leaf, dref); |
831 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, | 862 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
832 | bytenr, count, GFP_NOFS); | 863 | bytenr, count, GFP_NOFS); |
@@ -854,7 +885,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
854 | static int find_parent_nodes(struct btrfs_trans_handle *trans, | 885 | static int find_parent_nodes(struct btrfs_trans_handle *trans, |
855 | struct btrfs_fs_info *fs_info, u64 bytenr, | 886 | struct btrfs_fs_info *fs_info, u64 bytenr, |
856 | u64 time_seq, struct ulist *refs, | 887 | u64 time_seq, struct ulist *refs, |
857 | struct ulist *roots, const u64 *extent_item_pos) | 888 | struct ulist *roots, const u64 *extent_item_pos, |
889 | u64 root_objectid, u64 inum) | ||
858 | { | 890 | { |
859 | struct btrfs_key key; | 891 | struct btrfs_key key; |
860 | struct btrfs_path *path; | 892 | struct btrfs_path *path; |
@@ -929,7 +961,8 @@ again: | |||
929 | } | 961 | } |
930 | spin_unlock(&delayed_refs->lock); | 962 | spin_unlock(&delayed_refs->lock); |
931 | ret = __add_delayed_refs(head, time_seq, | 963 | ret = __add_delayed_refs(head, time_seq, |
932 | &prefs_delayed, &total_refs); | 964 | &prefs_delayed, &total_refs, |
965 | inum); | ||
933 | mutex_unlock(&head->mutex); | 966 | mutex_unlock(&head->mutex); |
934 | if (ret) | 967 | if (ret) |
935 | goto out; | 968 | goto out; |
@@ -951,11 +984,11 @@ again: | |||
951 | key.type == BTRFS_METADATA_ITEM_KEY)) { | 984 | key.type == BTRFS_METADATA_ITEM_KEY)) { |
952 | ret = __add_inline_refs(fs_info, path, bytenr, | 985 | ret = __add_inline_refs(fs_info, path, bytenr, |
953 | &info_level, &prefs, | 986 | &info_level, &prefs, |
954 | &total_refs); | 987 | &total_refs, inum); |
955 | if (ret) | 988 | if (ret) |
956 | goto out; | 989 | goto out; |
957 | ret = __add_keyed_refs(fs_info, path, bytenr, | 990 | ret = __add_keyed_refs(fs_info, path, bytenr, |
958 | info_level, &prefs); | 991 | info_level, &prefs, inum); |
959 | if (ret) | 992 | if (ret) |
960 | goto out; | 993 | goto out; |
961 | } | 994 | } |
@@ -971,7 +1004,8 @@ again: | |||
971 | __merge_refs(&prefs, 1); | 1004 | __merge_refs(&prefs, 1); |
972 | 1005 | ||
973 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, | 1006 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, |
974 | extent_item_pos, total_refs); | 1007 | extent_item_pos, total_refs, |
1008 | root_objectid); | ||
975 | if (ret) | 1009 | if (ret) |
976 | goto out; | 1010 | goto out; |
977 | 1011 | ||
@@ -981,6 +1015,11 @@ again: | |||
981 | ref = list_first_entry(&prefs, struct __prelim_ref, list); | 1015 | ref = list_first_entry(&prefs, struct __prelim_ref, list); |
982 | WARN_ON(ref->count < 0); | 1016 | WARN_ON(ref->count < 0); |
983 | if (roots && ref->count && ref->root_id && ref->parent == 0) { | 1017 | if (roots && ref->count && ref->root_id && ref->parent == 0) { |
1018 | if (root_objectid && ref->root_id != root_objectid) { | ||
1019 | ret = BACKREF_FOUND_SHARED; | ||
1020 | goto out; | ||
1021 | } | ||
1022 | |||
984 | /* no parent == root of tree */ | 1023 | /* no parent == root of tree */ |
985 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); | 1024 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); |
986 | if (ret < 0) | 1025 | if (ret < 0) |
@@ -989,12 +1028,10 @@ again: | |||
989 | if (ref->count && ref->parent) { | 1028 | if (ref->count && ref->parent) { |
990 | if (extent_item_pos && !ref->inode_list && | 1029 | if (extent_item_pos && !ref->inode_list && |
991 | ref->level == 0) { | 1030 | ref->level == 0) { |
992 | u32 bsz; | ||
993 | struct extent_buffer *eb; | 1031 | struct extent_buffer *eb; |
994 | bsz = btrfs_level_size(fs_info->extent_root, | 1032 | |
995 | ref->level); | ||
996 | eb = read_tree_block(fs_info->extent_root, | 1033 | eb = read_tree_block(fs_info->extent_root, |
997 | ref->parent, bsz, 0); | 1034 | ref->parent, 0); |
998 | if (!eb || !extent_buffer_uptodate(eb)) { | 1035 | if (!eb || !extent_buffer_uptodate(eb)) { |
999 | free_extent_buffer(eb); | 1036 | free_extent_buffer(eb); |
1000 | ret = -EIO; | 1037 | ret = -EIO; |
@@ -1087,7 +1124,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
1087 | return -ENOMEM; | 1124 | return -ENOMEM; |
1088 | 1125 | ||
1089 | ret = find_parent_nodes(trans, fs_info, bytenr, | 1126 | ret = find_parent_nodes(trans, fs_info, bytenr, |
1090 | time_seq, *leafs, NULL, extent_item_pos); | 1127 | time_seq, *leafs, NULL, extent_item_pos, 0, 0); |
1091 | if (ret < 0 && ret != -ENOENT) { | 1128 | if (ret < 0 && ret != -ENOENT) { |
1092 | free_leaf_list(*leafs); | 1129 | free_leaf_list(*leafs); |
1093 | return ret; | 1130 | return ret; |
@@ -1130,7 +1167,7 @@ static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
1130 | ULIST_ITER_INIT(&uiter); | 1167 | ULIST_ITER_INIT(&uiter); |
1131 | while (1) { | 1168 | while (1) { |
1132 | ret = find_parent_nodes(trans, fs_info, bytenr, | 1169 | ret = find_parent_nodes(trans, fs_info, bytenr, |
1133 | time_seq, tmp, *roots, NULL); | 1170 | time_seq, tmp, *roots, NULL, 0, 0); |
1134 | if (ret < 0 && ret != -ENOENT) { | 1171 | if (ret < 0 && ret != -ENOENT) { |
1135 | ulist_free(tmp); | 1172 | ulist_free(tmp); |
1136 | ulist_free(*roots); | 1173 | ulist_free(*roots); |
@@ -1161,6 +1198,54 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
1161 | return ret; | 1198 | return ret; |
1162 | } | 1199 | } |
1163 | 1200 | ||
1201 | int btrfs_check_shared(struct btrfs_trans_handle *trans, | ||
1202 | struct btrfs_fs_info *fs_info, u64 root_objectid, | ||
1203 | u64 inum, u64 bytenr) | ||
1204 | { | ||
1205 | struct ulist *tmp = NULL; | ||
1206 | struct ulist *roots = NULL; | ||
1207 | struct ulist_iterator uiter; | ||
1208 | struct ulist_node *node; | ||
1209 | struct seq_list elem = {}; | ||
1210 | int ret = 0; | ||
1211 | |||
1212 | tmp = ulist_alloc(GFP_NOFS); | ||
1213 | roots = ulist_alloc(GFP_NOFS); | ||
1214 | if (!tmp || !roots) { | ||
1215 | ulist_free(tmp); | ||
1216 | ulist_free(roots); | ||
1217 | return -ENOMEM; | ||
1218 | } | ||
1219 | |||
1220 | if (trans) | ||
1221 | btrfs_get_tree_mod_seq(fs_info, &elem); | ||
1222 | else | ||
1223 | down_read(&fs_info->commit_root_sem); | ||
1224 | ULIST_ITER_INIT(&uiter); | ||
1225 | while (1) { | ||
1226 | ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, | ||
1227 | roots, NULL, root_objectid, inum); | ||
1228 | if (ret == BACKREF_FOUND_SHARED) { | ||
1229 | ret = 1; | ||
1230 | break; | ||
1231 | } | ||
1232 | if (ret < 0 && ret != -ENOENT) | ||
1233 | break; | ||
1234 | node = ulist_next(tmp, &uiter); | ||
1235 | if (!node) | ||
1236 | break; | ||
1237 | bytenr = node->val; | ||
1238 | cond_resched(); | ||
1239 | } | ||
1240 | if (trans) | ||
1241 | btrfs_put_tree_mod_seq(fs_info, &elem); | ||
1242 | else | ||
1243 | up_read(&fs_info->commit_root_sem); | ||
1244 | ulist_free(tmp); | ||
1245 | ulist_free(roots); | ||
1246 | return ret; | ||
1247 | } | ||
1248 | |||
1164 | /* | 1249 | /* |
1165 | * this makes the path point to (inum INODE_ITEM ioff) | 1250 | * this makes the path point to (inum INODE_ITEM ioff) |
1166 | */ | 1251 | */ |
@@ -1193,7 +1278,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, | |||
1193 | unsigned long ptr; | 1278 | unsigned long ptr; |
1194 | 1279 | ||
1195 | key.objectid = inode_objectid; | 1280 | key.objectid = inode_objectid; |
1196 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); | 1281 | key.type = BTRFS_INODE_EXTREF_KEY; |
1197 | key.offset = start_off; | 1282 | key.offset = start_off; |
1198 | 1283 | ||
1199 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 1284 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
@@ -1233,7 +1318,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, | |||
1233 | ret = -ENOENT; | 1318 | ret = -ENOENT; |
1234 | if (found_key.objectid != inode_objectid) | 1319 | if (found_key.objectid != inode_objectid) |
1235 | break; | 1320 | break; |
1236 | if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY) | 1321 | if (found_key.type != BTRFS_INODE_EXTREF_KEY) |
1237 | break; | 1322 | break; |
1238 | 1323 | ||
1239 | ret = 0; | 1324 | ret = 0; |
@@ -1366,7 +1451,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
1366 | } | 1451 | } |
1367 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); | 1452 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); |
1368 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) | 1453 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) |
1369 | size = fs_info->extent_root->leafsize; | 1454 | size = fs_info->extent_root->nodesize; |
1370 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) | 1455 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) |
1371 | size = found_key->offset; | 1456 | size = found_key->offset; |
1372 | 1457 | ||
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 86fc20fec282..2a1ac6bfc724 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -71,6 +71,9 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, | |||
71 | u64 start_off, struct btrfs_path *path, | 71 | u64 start_off, struct btrfs_path *path, |
72 | struct btrfs_inode_extref **ret_extref, | 72 | struct btrfs_inode_extref **ret_extref, |
73 | u64 *found_off); | 73 | u64 *found_off); |
74 | int btrfs_check_shared(struct btrfs_trans_handle *trans, | ||
75 | struct btrfs_fs_info *fs_info, u64 root_objectid, | ||
76 | u64 inum, u64 bytenr); | ||
74 | 77 | ||
75 | int __init btrfs_prelim_ref_init(void); | 78 | int __init btrfs_prelim_ref_init(void); |
76 | void btrfs_prelim_ref_exit(void); | 79 | void btrfs_prelim_ref_exit(void); |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 43527fd78825..4aadadcfab20 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -44,6 +44,17 @@ | |||
44 | #define BTRFS_INODE_IN_DELALLOC_LIST 9 | 44 | #define BTRFS_INODE_IN_DELALLOC_LIST 9 |
45 | #define BTRFS_INODE_READDIO_NEED_LOCK 10 | 45 | #define BTRFS_INODE_READDIO_NEED_LOCK 10 |
46 | #define BTRFS_INODE_HAS_PROPS 11 | 46 | #define BTRFS_INODE_HAS_PROPS 11 |
47 | /* | ||
48 | * The following 3 bits are meant only for the btree inode. | ||
49 | * When any of them is set, it means an error happened while writing an | ||
50 | * extent buffer belonging to: | ||
51 | * 1) a non-log btree | ||
52 | * 2) a log btree and first log sub-transaction | ||
53 | * 3) a log btree and second log sub-transaction | ||
54 | */ | ||
55 | #define BTRFS_INODE_BTREE_ERR 12 | ||
56 | #define BTRFS_INODE_BTREE_LOG1_ERR 13 | ||
57 | #define BTRFS_INODE_BTREE_LOG2_ERR 14 | ||
47 | 58 | ||
48 | /* in memory btrfs inode */ | 59 | /* in memory btrfs inode */ |
49 | struct btrfs_inode { | 60 | struct btrfs_inode { |
@@ -121,6 +132,12 @@ struct btrfs_inode { | |||
121 | u64 delalloc_bytes; | 132 | u64 delalloc_bytes; |
122 | 133 | ||
123 | /* | 134 | /* |
135 | * total number of bytes pending defrag, used by stat to check whether | ||
136 | * it needs COW. | ||
137 | */ | ||
138 | u64 defrag_bytes; | ||
139 | |||
140 | /* | ||
124 | * the size of the file stored in the metadata on disk. data=ordered | 141 | * the size of the file stored in the metadata on disk. data=ordered |
125 | * means the in-memory i_size might be larger than the size on disk | 142 | * means the in-memory i_size might be larger than the size on disk |
126 | * because not all the blocks are written yet. | 143 | * because not all the blocks are written yet. |
@@ -234,13 +251,25 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) | |||
234 | BTRFS_I(inode)->last_sub_trans <= | 251 | BTRFS_I(inode)->last_sub_trans <= |
235 | BTRFS_I(inode)->last_log_commit && | 252 | BTRFS_I(inode)->last_log_commit && |
236 | BTRFS_I(inode)->last_sub_trans <= | 253 | BTRFS_I(inode)->last_sub_trans <= |
237 | BTRFS_I(inode)->root->last_log_commit) | 254 | BTRFS_I(inode)->root->last_log_commit) { |
238 | return 1; | 255 | /* |
256 | * After a ranged fsync we might have left some extent maps | ||
257 | * (that fall outside the fsync's range). So return false | ||
258 | * here if the list isn't empty, to make sure btrfs_log_inode() | ||
259 | * will be called and process those extent maps. | ||
260 | */ | ||
261 | smp_mb(); | ||
262 | if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents)) | ||
263 | return 1; | ||
264 | } | ||
239 | return 0; | 265 | return 0; |
240 | } | 266 | } |
241 | 267 | ||
268 | #define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1 | ||
269 | |||
242 | struct btrfs_dio_private { | 270 | struct btrfs_dio_private { |
243 | struct inode *inode; | 271 | struct inode *inode; |
272 | unsigned long flags; | ||
244 | u64 logical_offset; | 273 | u64 logical_offset; |
245 | u64 disk_bytenr; | 274 | u64 disk_bytenr; |
246 | u64 bytes; | 275 | u64 bytes; |
@@ -257,7 +286,12 @@ struct btrfs_dio_private { | |||
257 | 286 | ||
258 | /* dio_bio came from fs/direct-io.c */ | 287 | /* dio_bio came from fs/direct-io.c */ |
259 | struct bio *dio_bio; | 288 | struct bio *dio_bio; |
260 | u8 csum[0]; | 289 | |
290 | /* | ||
291 | * The original bio may be splited to several sub-bios, this is | ||
292 | * done during endio of sub-bios | ||
293 | */ | ||
294 | int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int); | ||
261 | }; | 295 | }; |
262 | 296 | ||
263 | /* | 297 | /* |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index ce92ae30250f..cb7f3fe9c9f6 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -807,7 +807,7 @@ static int btrfsic_process_superblock_dev_mirror( | |||
807 | 807 | ||
808 | /* super block bytenr is always the unmapped device bytenr */ | 808 | /* super block bytenr is always the unmapped device bytenr */ |
809 | dev_bytenr = btrfs_sb_offset(superblock_mirror_num); | 809 | dev_bytenr = btrfs_sb_offset(superblock_mirror_num); |
810 | if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) | 810 | if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes) |
811 | return -1; | 811 | return -1; |
812 | bh = __bread(superblock_bdev, dev_bytenr / 4096, | 812 | bh = __bread(superblock_bdev, dev_bytenr / 4096, |
813 | BTRFS_SUPER_INFO_SIZE); | 813 | BTRFS_SUPER_INFO_SIZE); |
@@ -820,7 +820,6 @@ static int btrfsic_process_superblock_dev_mirror( | |||
820 | btrfs_super_magic(super_tmp) != BTRFS_MAGIC || | 820 | btrfs_super_magic(super_tmp) != BTRFS_MAGIC || |
821 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || | 821 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || |
822 | btrfs_super_nodesize(super_tmp) != state->metablock_size || | 822 | btrfs_super_nodesize(super_tmp) != state->metablock_size || |
823 | btrfs_super_leafsize(super_tmp) != state->metablock_size || | ||
824 | btrfs_super_sectorsize(super_tmp) != state->datablock_size) { | 823 | btrfs_super_sectorsize(super_tmp) != state->datablock_size) { |
825 | brelse(bh); | 824 | brelse(bh); |
826 | return 0; | 825 | return 0; |
@@ -1252,8 +1251,7 @@ static void btrfsic_read_from_block_data( | |||
1252 | 1251 | ||
1253 | while (len > 0) { | 1252 | while (len > 0) { |
1254 | cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); | 1253 | cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); |
1255 | BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >> | 1254 | BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_CACHE_SIZE)); |
1256 | PAGE_CACHE_SHIFT); | ||
1257 | kaddr = block_ctx->datav[i]; | 1255 | kaddr = block_ctx->datav[i]; |
1258 | memcpy(dst, kaddr + offset_in_page, cur); | 1256 | memcpy(dst, kaddr + offset_in_page, cur); |
1259 | 1257 | ||
@@ -3120,24 +3118,12 @@ int btrfsic_mount(struct btrfs_root *root, | |||
3120 | struct list_head *dev_head = &fs_devices->devices; | 3118 | struct list_head *dev_head = &fs_devices->devices; |
3121 | struct btrfs_device *device; | 3119 | struct btrfs_device *device; |
3122 | 3120 | ||
3123 | if (root->nodesize != root->leafsize) { | ||
3124 | printk(KERN_INFO | ||
3125 | "btrfsic: cannot handle nodesize %d != leafsize %d!\n", | ||
3126 | root->nodesize, root->leafsize); | ||
3127 | return -1; | ||
3128 | } | ||
3129 | if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { | 3121 | if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { |
3130 | printk(KERN_INFO | 3122 | printk(KERN_INFO |
3131 | "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | 3123 | "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", |
3132 | root->nodesize, PAGE_CACHE_SIZE); | 3124 | root->nodesize, PAGE_CACHE_SIZE); |
3133 | return -1; | 3125 | return -1; |
3134 | } | 3126 | } |
3135 | if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
3136 | printk(KERN_INFO | ||
3137 | "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | ||
3138 | root->leafsize, PAGE_CACHE_SIZE); | ||
3139 | return -1; | ||
3140 | } | ||
3141 | if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { | 3127 | if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { |
3142 | printk(KERN_INFO | 3128 | printk(KERN_INFO |
3143 | "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | 3129 | "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 1daea0b47187..d3220d31d3cb 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -91,8 +91,7 @@ static inline int compressed_bio_size(struct btrfs_root *root, | |||
91 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | 91 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
92 | 92 | ||
93 | return sizeof(struct compressed_bio) + | 93 | return sizeof(struct compressed_bio) + |
94 | ((disk_size + root->sectorsize - 1) / root->sectorsize) * | 94 | (DIV_ROUND_UP(disk_size, root->sectorsize)) * csum_size; |
95 | csum_size; | ||
96 | } | 95 | } |
97 | 96 | ||
98 | static struct bio *compressed_bio_alloc(struct block_device *bdev, | 97 | static struct bio *compressed_bio_alloc(struct block_device *bdev, |
@@ -389,7 +388,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
389 | * freed before we're done setting it up | 388 | * freed before we're done setting it up |
390 | */ | 389 | */ |
391 | atomic_inc(&cb->pending_bios); | 390 | atomic_inc(&cb->pending_bios); |
392 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 391 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, |
392 | BTRFS_WQ_ENDIO_DATA); | ||
393 | BUG_ON(ret); /* -ENOMEM */ | 393 | BUG_ON(ret); /* -ENOMEM */ |
394 | 394 | ||
395 | if (!skip_sum) { | 395 | if (!skip_sum) { |
@@ -420,7 +420,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
420 | } | 420 | } |
421 | bio_get(bio); | 421 | bio_get(bio); |
422 | 422 | ||
423 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 423 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, BTRFS_WQ_ENDIO_DATA); |
424 | BUG_ON(ret); /* -ENOMEM */ | 424 | BUG_ON(ret); /* -ENOMEM */ |
425 | 425 | ||
426 | if (!skip_sum) { | 426 | if (!skip_sum) { |
@@ -615,8 +615,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
615 | cb->compress_type = extent_compress_type(bio_flags); | 615 | cb->compress_type = extent_compress_type(bio_flags); |
616 | cb->orig_bio = bio; | 616 | cb->orig_bio = bio; |
617 | 617 | ||
618 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | 618 | nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE); |
619 | PAGE_CACHE_SIZE; | ||
620 | cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, | 619 | cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, |
621 | GFP_NOFS); | 620 | GFP_NOFS); |
622 | if (!cb->compressed_pages) | 621 | if (!cb->compressed_pages) |
@@ -670,7 +669,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
670 | PAGE_CACHE_SIZE) { | 669 | PAGE_CACHE_SIZE) { |
671 | bio_get(comp_bio); | 670 | bio_get(comp_bio); |
672 | 671 | ||
673 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | 672 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, |
673 | BTRFS_WQ_ENDIO_DATA); | ||
674 | BUG_ON(ret); /* -ENOMEM */ | 674 | BUG_ON(ret); /* -ENOMEM */ |
675 | 675 | ||
676 | /* | 676 | /* |
@@ -686,8 +686,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
686 | comp_bio, sums); | 686 | comp_bio, sums); |
687 | BUG_ON(ret); /* -ENOMEM */ | 687 | BUG_ON(ret); /* -ENOMEM */ |
688 | } | 688 | } |
689 | sums += (comp_bio->bi_iter.bi_size + | 689 | sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size, |
690 | root->sectorsize - 1) / root->sectorsize; | 690 | root->sectorsize); |
691 | 691 | ||
692 | ret = btrfs_map_bio(root, READ, comp_bio, | 692 | ret = btrfs_map_bio(root, READ, comp_bio, |
693 | mirror_num, 0); | 693 | mirror_num, 0); |
@@ -708,7 +708,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
708 | } | 708 | } |
709 | bio_get(comp_bio); | 709 | bio_get(comp_bio); |
710 | 710 | ||
711 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | 711 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, |
712 | BTRFS_WQ_ENDIO_DATA); | ||
712 | BUG_ON(ret); /* -ENOMEM */ | 713 | BUG_ON(ret); /* -ENOMEM */ |
713 | 714 | ||
714 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | 715 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 44ee5d2e52a4..19bc6162fb8e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -258,9 +258,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
258 | else | 258 | else |
259 | btrfs_node_key(buf, &disk_key, 0); | 259 | btrfs_node_key(buf, &disk_key, 0); |
260 | 260 | ||
261 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, | 261 | cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid, |
262 | new_root_objectid, &disk_key, level, | 262 | &disk_key, level, buf->start, 0); |
263 | buf->start, 0); | ||
264 | if (IS_ERR(cow)) | 263 | if (IS_ERR(cow)) |
265 | return PTR_ERR(cow); | 264 | return PTR_ERR(cow); |
266 | 265 | ||
@@ -1133,9 +1132,9 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
1133 | } else | 1132 | } else |
1134 | parent_start = 0; | 1133 | parent_start = 0; |
1135 | 1134 | ||
1136 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, | 1135 | cow = btrfs_alloc_tree_block(trans, root, parent_start, |
1137 | root->root_key.objectid, &disk_key, | 1136 | root->root_key.objectid, &disk_key, level, |
1138 | level, search_start, empty_size); | 1137 | search_start, empty_size); |
1139 | if (IS_ERR(cow)) | 1138 | if (IS_ERR(cow)) |
1140 | return PTR_ERR(cow); | 1139 | return PTR_ERR(cow); |
1141 | 1140 | ||
@@ -1425,7 +1424,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq) | |||
1425 | struct tree_mod_root *old_root = NULL; | 1424 | struct tree_mod_root *old_root = NULL; |
1426 | u64 old_generation = 0; | 1425 | u64 old_generation = 0; |
1427 | u64 logical; | 1426 | u64 logical; |
1428 | u32 blocksize; | ||
1429 | 1427 | ||
1430 | eb_root = btrfs_read_lock_root_node(root); | 1428 | eb_root = btrfs_read_lock_root_node(root); |
1431 | tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq); | 1429 | tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq); |
@@ -1444,8 +1442,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) | |||
1444 | if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { | 1442 | if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { |
1445 | btrfs_tree_read_unlock(eb_root); | 1443 | btrfs_tree_read_unlock(eb_root); |
1446 | free_extent_buffer(eb_root); | 1444 | free_extent_buffer(eb_root); |
1447 | blocksize = btrfs_level_size(root, old_root->level); | 1445 | old = read_tree_block(root, logical, 0); |
1448 | old = read_tree_block(root, logical, blocksize, 0); | ||
1449 | if (WARN_ON(!old || !extent_buffer_uptodate(old))) { | 1446 | if (WARN_ON(!old || !extent_buffer_uptodate(old))) { |
1450 | free_extent_buffer(old); | 1447 | free_extent_buffer(old); |
1451 | btrfs_warn(root->fs_info, | 1448 | btrfs_warn(root->fs_info, |
@@ -1506,10 +1503,9 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, | |||
1506 | struct btrfs_root *root, | 1503 | struct btrfs_root *root, |
1507 | struct extent_buffer *buf) | 1504 | struct extent_buffer *buf) |
1508 | { | 1505 | { |
1509 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 1506 | if (btrfs_test_is_dummy_root(root)) |
1510 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
1511 | return 0; | 1507 | return 0; |
1512 | #endif | 1508 | |
1513 | /* ensure we can see the force_cow */ | 1509 | /* ensure we can see the force_cow */ |
1514 | smp_rmb(); | 1510 | smp_rmb(); |
1515 | 1511 | ||
@@ -1651,7 +1647,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
1651 | WARN_ON(trans->transid != root->fs_info->generation); | 1647 | WARN_ON(trans->transid != root->fs_info->generation); |
1652 | 1648 | ||
1653 | parent_nritems = btrfs_header_nritems(parent); | 1649 | parent_nritems = btrfs_header_nritems(parent); |
1654 | blocksize = btrfs_level_size(root, parent_level - 1); | 1650 | blocksize = root->nodesize; |
1655 | end_slot = parent_nritems; | 1651 | end_slot = parent_nritems; |
1656 | 1652 | ||
1657 | if (parent_nritems == 1) | 1653 | if (parent_nritems == 1) |
@@ -1685,15 +1681,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
1685 | continue; | 1681 | continue; |
1686 | } | 1682 | } |
1687 | 1683 | ||
1688 | cur = btrfs_find_tree_block(root, blocknr, blocksize); | 1684 | cur = btrfs_find_tree_block(root, blocknr); |
1689 | if (cur) | 1685 | if (cur) |
1690 | uptodate = btrfs_buffer_uptodate(cur, gen, 0); | 1686 | uptodate = btrfs_buffer_uptodate(cur, gen, 0); |
1691 | else | 1687 | else |
1692 | uptodate = 0; | 1688 | uptodate = 0; |
1693 | if (!cur || !uptodate) { | 1689 | if (!cur || !uptodate) { |
1694 | if (!cur) { | 1690 | if (!cur) { |
1695 | cur = read_tree_block(root, blocknr, | 1691 | cur = read_tree_block(root, blocknr, gen); |
1696 | blocksize, gen); | ||
1697 | if (!cur || !extent_buffer_uptodate(cur)) { | 1692 | if (!cur || !extent_buffer_uptodate(cur)) { |
1698 | free_extent_buffer(cur); | 1693 | free_extent_buffer(cur); |
1699 | return -EIO; | 1694 | return -EIO; |
@@ -1872,7 +1867,6 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, | |||
1872 | BUG_ON(level == 0); | 1867 | BUG_ON(level == 0); |
1873 | 1868 | ||
1874 | eb = read_tree_block(root, btrfs_node_blockptr(parent, slot), | 1869 | eb = read_tree_block(root, btrfs_node_blockptr(parent, slot), |
1875 | btrfs_level_size(root, level - 1), | ||
1876 | btrfs_node_ptr_generation(parent, slot)); | 1870 | btrfs_node_ptr_generation(parent, slot)); |
1877 | if (eb && !extent_buffer_uptodate(eb)) { | 1871 | if (eb && !extent_buffer_uptodate(eb)) { |
1878 | free_extent_buffer(eb); | 1872 | free_extent_buffer(eb); |
@@ -2267,8 +2261,8 @@ static void reada_for_search(struct btrfs_root *root, | |||
2267 | node = path->nodes[level]; | 2261 | node = path->nodes[level]; |
2268 | 2262 | ||
2269 | search = btrfs_node_blockptr(node, slot); | 2263 | search = btrfs_node_blockptr(node, slot); |
2270 | blocksize = btrfs_level_size(root, level - 1); | 2264 | blocksize = root->nodesize; |
2271 | eb = btrfs_find_tree_block(root, search, blocksize); | 2265 | eb = btrfs_find_tree_block(root, search); |
2272 | if (eb) { | 2266 | if (eb) { |
2273 | free_extent_buffer(eb); | 2267 | free_extent_buffer(eb); |
2274 | return; | 2268 | return; |
@@ -2298,7 +2292,7 @@ static void reada_for_search(struct btrfs_root *root, | |||
2298 | if ((search <= target && target - search <= 65536) || | 2292 | if ((search <= target && target - search <= 65536) || |
2299 | (search > target && search - target <= 65536)) { | 2293 | (search > target && search - target <= 65536)) { |
2300 | gen = btrfs_node_ptr_generation(node, nr); | 2294 | gen = btrfs_node_ptr_generation(node, nr); |
2301 | readahead_tree_block(root, search, blocksize, gen); | 2295 | readahead_tree_block(root, search, blocksize); |
2302 | nread += blocksize; | 2296 | nread += blocksize; |
2303 | } | 2297 | } |
2304 | nscan++; | 2298 | nscan++; |
@@ -2325,12 +2319,12 @@ static noinline void reada_for_balance(struct btrfs_root *root, | |||
2325 | 2319 | ||
2326 | nritems = btrfs_header_nritems(parent); | 2320 | nritems = btrfs_header_nritems(parent); |
2327 | slot = path->slots[level + 1]; | 2321 | slot = path->slots[level + 1]; |
2328 | blocksize = btrfs_level_size(root, level); | 2322 | blocksize = root->nodesize; |
2329 | 2323 | ||
2330 | if (slot > 0) { | 2324 | if (slot > 0) { |
2331 | block1 = btrfs_node_blockptr(parent, slot - 1); | 2325 | block1 = btrfs_node_blockptr(parent, slot - 1); |
2332 | gen = btrfs_node_ptr_generation(parent, slot - 1); | 2326 | gen = btrfs_node_ptr_generation(parent, slot - 1); |
2333 | eb = btrfs_find_tree_block(root, block1, blocksize); | 2327 | eb = btrfs_find_tree_block(root, block1); |
2334 | /* | 2328 | /* |
2335 | * if we get -eagain from btrfs_buffer_uptodate, we | 2329 | * if we get -eagain from btrfs_buffer_uptodate, we |
2336 | * don't want to return eagain here. That will loop | 2330 | * don't want to return eagain here. That will loop |
@@ -2343,16 +2337,16 @@ static noinline void reada_for_balance(struct btrfs_root *root, | |||
2343 | if (slot + 1 < nritems) { | 2337 | if (slot + 1 < nritems) { |
2344 | block2 = btrfs_node_blockptr(parent, slot + 1); | 2338 | block2 = btrfs_node_blockptr(parent, slot + 1); |
2345 | gen = btrfs_node_ptr_generation(parent, slot + 1); | 2339 | gen = btrfs_node_ptr_generation(parent, slot + 1); |
2346 | eb = btrfs_find_tree_block(root, block2, blocksize); | 2340 | eb = btrfs_find_tree_block(root, block2); |
2347 | if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) | 2341 | if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) |
2348 | block2 = 0; | 2342 | block2 = 0; |
2349 | free_extent_buffer(eb); | 2343 | free_extent_buffer(eb); |
2350 | } | 2344 | } |
2351 | 2345 | ||
2352 | if (block1) | 2346 | if (block1) |
2353 | readahead_tree_block(root, block1, blocksize, 0); | 2347 | readahead_tree_block(root, block1, blocksize); |
2354 | if (block2) | 2348 | if (block2) |
2355 | readahead_tree_block(root, block2, blocksize, 0); | 2349 | readahead_tree_block(root, block2, blocksize); |
2356 | } | 2350 | } |
2357 | 2351 | ||
2358 | 2352 | ||
@@ -2454,16 +2448,14 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
2454 | { | 2448 | { |
2455 | u64 blocknr; | 2449 | u64 blocknr; |
2456 | u64 gen; | 2450 | u64 gen; |
2457 | u32 blocksize; | ||
2458 | struct extent_buffer *b = *eb_ret; | 2451 | struct extent_buffer *b = *eb_ret; |
2459 | struct extent_buffer *tmp; | 2452 | struct extent_buffer *tmp; |
2460 | int ret; | 2453 | int ret; |
2461 | 2454 | ||
2462 | blocknr = btrfs_node_blockptr(b, slot); | 2455 | blocknr = btrfs_node_blockptr(b, slot); |
2463 | gen = btrfs_node_ptr_generation(b, slot); | 2456 | gen = btrfs_node_ptr_generation(b, slot); |
2464 | blocksize = btrfs_level_size(root, level - 1); | ||
2465 | 2457 | ||
2466 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | 2458 | tmp = btrfs_find_tree_block(root, blocknr); |
2467 | if (tmp) { | 2459 | if (tmp) { |
2468 | /* first we do an atomic uptodate check */ | 2460 | /* first we do an atomic uptodate check */ |
2469 | if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { | 2461 | if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { |
@@ -2507,7 +2499,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
2507 | btrfs_release_path(p); | 2499 | btrfs_release_path(p); |
2508 | 2500 | ||
2509 | ret = -EAGAIN; | 2501 | ret = -EAGAIN; |
2510 | tmp = read_tree_block(root, blocknr, blocksize, 0); | 2502 | tmp = read_tree_block(root, blocknr, 0); |
2511 | if (tmp) { | 2503 | if (tmp) { |
2512 | /* | 2504 | /* |
2513 | * If the read above didn't mark this buffer up to date, | 2505 | * If the read above didn't mark this buffer up to date, |
@@ -2792,8 +2784,6 @@ again: | |||
2792 | if (!should_cow_block(trans, root, b)) | 2784 | if (!should_cow_block(trans, root, b)) |
2793 | goto cow_done; | 2785 | goto cow_done; |
2794 | 2786 | ||
2795 | btrfs_set_path_blocking(p); | ||
2796 | |||
2797 | /* | 2787 | /* |
2798 | * must have write locks on this node and the | 2788 | * must have write locks on this node and the |
2799 | * parent | 2789 | * parent |
@@ -2807,6 +2797,7 @@ again: | |||
2807 | goto again; | 2797 | goto again; |
2808 | } | 2798 | } |
2809 | 2799 | ||
2800 | btrfs_set_path_blocking(p); | ||
2810 | err = btrfs_cow_block(trans, root, b, | 2801 | err = btrfs_cow_block(trans, root, b, |
2811 | p->nodes[level + 1], | 2802 | p->nodes[level + 1], |
2812 | p->slots[level + 1], &b); | 2803 | p->slots[level + 1], &b); |
@@ -3362,9 +3353,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
3362 | else | 3353 | else |
3363 | btrfs_node_key(lower, &lower_key, 0); | 3354 | btrfs_node_key(lower, &lower_key, 0); |
3364 | 3355 | ||
3365 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 3356 | c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, |
3366 | root->root_key.objectid, &lower_key, | 3357 | &lower_key, level, root->node->start, 0); |
3367 | level, root->node->start, 0); | ||
3368 | if (IS_ERR(c)) | 3358 | if (IS_ERR(c)) |
3369 | return PTR_ERR(c); | 3359 | return PTR_ERR(c); |
3370 | 3360 | ||
@@ -3502,9 +3492,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
3502 | mid = (c_nritems + 1) / 2; | 3492 | mid = (c_nritems + 1) / 2; |
3503 | btrfs_node_key(c, &disk_key, mid); | 3493 | btrfs_node_key(c, &disk_key, mid); |
3504 | 3494 | ||
3505 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 3495 | split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, |
3506 | root->root_key.objectid, | 3496 | &disk_key, level, c->start, 0); |
3507 | &disk_key, level, c->start, 0); | ||
3508 | if (IS_ERR(split)) | 3497 | if (IS_ERR(split)) |
3509 | return PTR_ERR(split); | 3498 | return PTR_ERR(split); |
3510 | 3499 | ||
@@ -4282,13 +4271,12 @@ again: | |||
4282 | else | 4271 | else |
4283 | btrfs_item_key(l, &disk_key, mid); | 4272 | btrfs_item_key(l, &disk_key, mid); |
4284 | 4273 | ||
4285 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 4274 | right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, |
4286 | root->root_key.objectid, | 4275 | &disk_key, 0, l->start, 0); |
4287 | &disk_key, 0, l->start, 0); | ||
4288 | if (IS_ERR(right)) | 4276 | if (IS_ERR(right)) |
4289 | return PTR_ERR(right); | 4277 | return PTR_ERR(right); |
4290 | 4278 | ||
4291 | root_add_used(root, root->leafsize); | 4279 | root_add_used(root, root->nodesize); |
4292 | 4280 | ||
4293 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); | 4281 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); |
4294 | btrfs_set_header_bytenr(right, right->start); | 4282 | btrfs_set_header_bytenr(right, right->start); |
@@ -4626,8 +4614,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path, | |||
4626 | ptr = btrfs_item_ptr_offset(leaf, slot); | 4614 | ptr = btrfs_item_ptr_offset(leaf, slot); |
4627 | memmove_extent_buffer(leaf, ptr, | 4615 | memmove_extent_buffer(leaf, ptr, |
4628 | (unsigned long)fi, | 4616 | (unsigned long)fi, |
4629 | offsetof(struct btrfs_file_extent_item, | 4617 | BTRFS_FILE_EXTENT_INLINE_DATA_START); |
4630 | disk_bytenr)); | ||
4631 | } | 4618 | } |
4632 | } | 4619 | } |
4633 | 4620 | ||
@@ -4738,6 +4725,12 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, | |||
4738 | int slot; | 4725 | int slot; |
4739 | struct btrfs_map_token token; | 4726 | struct btrfs_map_token token; |
4740 | 4727 | ||
4728 | if (path->slots[0] == 0) { | ||
4729 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | ||
4730 | fixup_low_keys(root, path, &disk_key, 1); | ||
4731 | } | ||
4732 | btrfs_unlock_up_safe(path, 1); | ||
4733 | |||
4741 | btrfs_init_map_token(&token); | 4734 | btrfs_init_map_token(&token); |
4742 | 4735 | ||
4743 | leaf = path->nodes[0]; | 4736 | leaf = path->nodes[0]; |
@@ -4798,12 +4791,6 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, | |||
4798 | } | 4791 | } |
4799 | 4792 | ||
4800 | btrfs_set_header_nritems(leaf, nritems + nr); | 4793 | btrfs_set_header_nritems(leaf, nritems + nr); |
4801 | |||
4802 | if (slot == 0) { | ||
4803 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | ||
4804 | fixup_low_keys(root, path, &disk_key, 1); | ||
4805 | } | ||
4806 | btrfs_unlock_up_safe(path, 1); | ||
4807 | btrfs_mark_buffer_dirty(leaf); | 4794 | btrfs_mark_buffer_dirty(leaf); |
4808 | 4795 | ||
4809 | if (btrfs_leaf_free_space(root, leaf) < 0) { | 4796 | if (btrfs_leaf_free_space(root, leaf) < 0) { |
@@ -5145,8 +5132,9 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | |||
5145 | u32 nritems; | 5132 | u32 nritems; |
5146 | int level; | 5133 | int level; |
5147 | int ret = 1; | 5134 | int ret = 1; |
5135 | int keep_locks = path->keep_locks; | ||
5148 | 5136 | ||
5149 | WARN_ON(!path->keep_locks); | 5137 | path->keep_locks = 1; |
5150 | again: | 5138 | again: |
5151 | cur = btrfs_read_lock_root_node(root); | 5139 | cur = btrfs_read_lock_root_node(root); |
5152 | level = btrfs_header_level(cur); | 5140 | level = btrfs_header_level(cur); |
@@ -5210,7 +5198,6 @@ find_next_key: | |||
5210 | path->slots[level] = slot; | 5198 | path->slots[level] = slot; |
5211 | if (level == path->lowest_level) { | 5199 | if (level == path->lowest_level) { |
5212 | ret = 0; | 5200 | ret = 0; |
5213 | unlock_up(path, level, 1, 0, NULL); | ||
5214 | goto out; | 5201 | goto out; |
5215 | } | 5202 | } |
5216 | btrfs_set_path_blocking(path); | 5203 | btrfs_set_path_blocking(path); |
@@ -5225,9 +5212,12 @@ find_next_key: | |||
5225 | btrfs_clear_path_blocking(path, NULL, 0); | 5212 | btrfs_clear_path_blocking(path, NULL, 0); |
5226 | } | 5213 | } |
5227 | out: | 5214 | out: |
5228 | if (ret == 0) | 5215 | path->keep_locks = keep_locks; |
5216 | if (ret == 0) { | ||
5217 | btrfs_unlock_up_safe(path, path->lowest_level + 1); | ||
5218 | btrfs_set_path_blocking(path); | ||
5229 | memcpy(min_key, &found_key, sizeof(found_key)); | 5219 | memcpy(min_key, &found_key, sizeof(found_key)); |
5230 | btrfs_set_path_blocking(path); | 5220 | } |
5231 | return ret; | 5221 | return ret; |
5232 | } | 5222 | } |
5233 | 5223 | ||
@@ -5375,7 +5365,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5375 | goto out; | 5365 | goto out; |
5376 | } | 5366 | } |
5377 | 5367 | ||
5378 | tmp_buf = kmalloc(left_root->leafsize, GFP_NOFS); | 5368 | tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS); |
5379 | if (!tmp_buf) { | 5369 | if (!tmp_buf) { |
5380 | ret = -ENOMEM; | 5370 | ret = -ENOMEM; |
5381 | goto out; | 5371 | goto out; |
@@ -5520,18 +5510,18 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5520 | goto out; | 5510 | goto out; |
5521 | advance_right = ADVANCE; | 5511 | advance_right = ADVANCE; |
5522 | } else { | 5512 | } else { |
5523 | enum btrfs_compare_tree_result cmp; | 5513 | enum btrfs_compare_tree_result result; |
5524 | 5514 | ||
5525 | WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); | 5515 | WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); |
5526 | ret = tree_compare_item(left_root, left_path, | 5516 | ret = tree_compare_item(left_root, left_path, |
5527 | right_path, tmp_buf); | 5517 | right_path, tmp_buf); |
5528 | if (ret) | 5518 | if (ret) |
5529 | cmp = BTRFS_COMPARE_TREE_CHANGED; | 5519 | result = BTRFS_COMPARE_TREE_CHANGED; |
5530 | else | 5520 | else |
5531 | cmp = BTRFS_COMPARE_TREE_SAME; | 5521 | result = BTRFS_COMPARE_TREE_SAME; |
5532 | ret = changed_cb(left_root, right_root, | 5522 | ret = changed_cb(left_root, right_root, |
5533 | left_path, right_path, | 5523 | left_path, right_path, |
5534 | &left_key, cmp, ctx); | 5524 | &left_key, result, ctx); |
5535 | if (ret < 0) | 5525 | if (ret < 0) |
5536 | goto out; | 5526 | goto out; |
5537 | advance_left = ADVANCE; | 5527 | advance_left = ADVANCE; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8e29b614fe93..d557264ee974 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/pagemap.h> | 34 | #include <linux/pagemap.h> |
35 | #include <linux/btrfs.h> | 35 | #include <linux/btrfs.h> |
36 | #include <linux/workqueue.h> | 36 | #include <linux/workqueue.h> |
37 | #include <linux/security.h> | ||
37 | #include "extent_io.h" | 38 | #include "extent_io.h" |
38 | #include "extent_map.h" | 39 | #include "extent_map.h" |
39 | #include "async-thread.h" | 40 | #include "async-thread.h" |
@@ -62,13 +63,6 @@ struct btrfs_ordered_sum; | |||
62 | 63 | ||
63 | #define BTRFS_COMPAT_EXTENT_TREE_V0 | 64 | #define BTRFS_COMPAT_EXTENT_TREE_V0 |
64 | 65 | ||
65 | /* | ||
66 | * files bigger than this get some pre-flushing when they are added | ||
67 | * to the ordered operations list. That way we limit the total | ||
68 | * work done by the commit | ||
69 | */ | ||
70 | #define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024) | ||
71 | |||
72 | /* holds pointers to all of the tree roots */ | 66 | /* holds pointers to all of the tree roots */ |
73 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL | 67 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL |
74 | 68 | ||
@@ -391,10 +385,12 @@ struct btrfs_header { | |||
391 | sizeof(struct btrfs_header)) / \ | 385 | sizeof(struct btrfs_header)) / \ |
392 | sizeof(struct btrfs_key_ptr)) | 386 | sizeof(struct btrfs_key_ptr)) |
393 | #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) | 387 | #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) |
394 | #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) | 388 | #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->nodesize)) |
389 | #define BTRFS_FILE_EXTENT_INLINE_DATA_START \ | ||
390 | (offsetof(struct btrfs_file_extent_item, disk_bytenr)) | ||
395 | #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ | 391 | #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ |
396 | sizeof(struct btrfs_item) - \ | 392 | sizeof(struct btrfs_item) - \ |
397 | sizeof(struct btrfs_file_extent_item)) | 393 | BTRFS_FILE_EXTENT_INLINE_DATA_START) |
398 | #define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ | 394 | #define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ |
399 | sizeof(struct btrfs_item) -\ | 395 | sizeof(struct btrfs_item) -\ |
400 | sizeof(struct btrfs_dir_item)) | 396 | sizeof(struct btrfs_dir_item)) |
@@ -474,7 +470,7 @@ struct btrfs_super_block { | |||
474 | __le64 num_devices; | 470 | __le64 num_devices; |
475 | __le32 sectorsize; | 471 | __le32 sectorsize; |
476 | __le32 nodesize; | 472 | __le32 nodesize; |
477 | __le32 leafsize; | 473 | __le32 __unused_leafsize; |
478 | __le32 stripesize; | 474 | __le32 stripesize; |
479 | __le32 sys_chunk_array_size; | 475 | __le32 sys_chunk_array_size; |
480 | __le64 chunk_root_generation; | 476 | __le64 chunk_root_generation; |
@@ -903,6 +899,8 @@ struct btrfs_file_extent_item { | |||
903 | /* | 899 | /* |
904 | * disk space consumed by the extent, checksum blocks are included | 900 | * disk space consumed by the extent, checksum blocks are included |
905 | * in these numbers | 901 | * in these numbers |
902 | * | ||
903 | * At this offset in the structure, the inline extent data start. | ||
906 | */ | 904 | */ |
907 | __le64 disk_bytenr; | 905 | __le64 disk_bytenr; |
908 | __le64 disk_num_bytes; | 906 | __le64 disk_num_bytes; |
@@ -1305,8 +1303,8 @@ struct btrfs_block_group_cache { | |||
1305 | */ | 1303 | */ |
1306 | struct list_head cluster_list; | 1304 | struct list_head cluster_list; |
1307 | 1305 | ||
1308 | /* For delayed block group creation */ | 1306 | /* For delayed block group creation or deletion of empty block groups */ |
1309 | struct list_head new_bg_list; | 1307 | struct list_head bg_list; |
1310 | }; | 1308 | }; |
1311 | 1309 | ||
1312 | /* delayed seq elem */ | 1310 | /* delayed seq elem */ |
@@ -1545,6 +1543,7 @@ struct btrfs_fs_info { | |||
1545 | struct btrfs_workqueue *endio_workers; | 1543 | struct btrfs_workqueue *endio_workers; |
1546 | struct btrfs_workqueue *endio_meta_workers; | 1544 | struct btrfs_workqueue *endio_meta_workers; |
1547 | struct btrfs_workqueue *endio_raid56_workers; | 1545 | struct btrfs_workqueue *endio_raid56_workers; |
1546 | struct btrfs_workqueue *endio_repair_workers; | ||
1548 | struct btrfs_workqueue *rmw_workers; | 1547 | struct btrfs_workqueue *rmw_workers; |
1549 | struct btrfs_workqueue *endio_meta_write_workers; | 1548 | struct btrfs_workqueue *endio_meta_write_workers; |
1550 | struct btrfs_workqueue *endio_write_workers; | 1549 | struct btrfs_workqueue *endio_write_workers; |
@@ -1574,6 +1573,7 @@ struct btrfs_fs_info { | |||
1574 | int do_barriers; | 1573 | int do_barriers; |
1575 | int closing; | 1574 | int closing; |
1576 | int log_root_recovering; | 1575 | int log_root_recovering; |
1576 | int open; | ||
1577 | 1577 | ||
1578 | u64 total_pinned; | 1578 | u64 total_pinned; |
1579 | 1579 | ||
@@ -1723,6 +1723,12 @@ struct btrfs_fs_info { | |||
1723 | 1723 | ||
1724 | /* Used to reclaim the metadata space in the background. */ | 1724 | /* Used to reclaim the metadata space in the background. */ |
1725 | struct work_struct async_reclaim_work; | 1725 | struct work_struct async_reclaim_work; |
1726 | |||
1727 | spinlock_t unused_bgs_lock; | ||
1728 | struct list_head unused_bgs; | ||
1729 | |||
1730 | /* For btrfs to record security options */ | ||
1731 | struct security_mnt_opts security_opts; | ||
1726 | }; | 1732 | }; |
1727 | 1733 | ||
1728 | struct btrfs_subvolume_writers { | 1734 | struct btrfs_subvolume_writers { |
@@ -1776,12 +1782,12 @@ struct btrfs_root { | |||
1776 | 1782 | ||
1777 | /* free ino cache stuff */ | 1783 | /* free ino cache stuff */ |
1778 | struct btrfs_free_space_ctl *free_ino_ctl; | 1784 | struct btrfs_free_space_ctl *free_ino_ctl; |
1779 | enum btrfs_caching_type cached; | 1785 | enum btrfs_caching_type ino_cache_state; |
1780 | spinlock_t cache_lock; | 1786 | spinlock_t ino_cache_lock; |
1781 | wait_queue_head_t cache_wait; | 1787 | wait_queue_head_t ino_cache_wait; |
1782 | struct btrfs_free_space_ctl *free_ino_pinned; | 1788 | struct btrfs_free_space_ctl *free_ino_pinned; |
1783 | u64 cache_progress; | 1789 | u64 ino_cache_progress; |
1784 | struct inode *cache_inode; | 1790 | struct inode *ino_cache_inode; |
1785 | 1791 | ||
1786 | struct mutex log_mutex; | 1792 | struct mutex log_mutex; |
1787 | wait_queue_head_t log_writer_wait; | 1793 | wait_queue_head_t log_writer_wait; |
@@ -1806,18 +1812,14 @@ struct btrfs_root { | |||
1806 | /* node allocations are done in nodesize units */ | 1812 | /* node allocations are done in nodesize units */ |
1807 | u32 nodesize; | 1813 | u32 nodesize; |
1808 | 1814 | ||
1809 | /* leaf allocations are done in leafsize units */ | ||
1810 | u32 leafsize; | ||
1811 | |||
1812 | u32 stripesize; | 1815 | u32 stripesize; |
1813 | 1816 | ||
1814 | u32 type; | 1817 | u32 type; |
1815 | 1818 | ||
1816 | u64 highest_objectid; | 1819 | u64 highest_objectid; |
1817 | 1820 | ||
1818 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 1821 | /* only used with CONFIG_BTRFS_FS_RUN_SANITY_TESTS is enabled */ |
1819 | u64 alloc_bytenr; | 1822 | u64 alloc_bytenr; |
1820 | #endif | ||
1821 | 1823 | ||
1822 | u64 defrag_trans_start; | 1824 | u64 defrag_trans_start; |
1823 | struct btrfs_key defrag_progress; | 1825 | struct btrfs_key defrag_progress; |
@@ -2094,6 +2096,7 @@ struct btrfs_ioctl_defrag_range_args { | |||
2094 | #define BTRFS_MOUNT_CHANGE_INODE_CACHE (1 << 24) | 2096 | #define BTRFS_MOUNT_CHANGE_INODE_CACHE (1 << 24) |
2095 | 2097 | ||
2096 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) | 2098 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) |
2099 | #define BTRFS_DEFAULT_MAX_INLINE (8192) | ||
2097 | 2100 | ||
2098 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 2101 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
2099 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 2102 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -2995,8 +2998,6 @@ BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, | |||
2995 | sectorsize, 32); | 2998 | sectorsize, 32); |
2996 | BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, | 2999 | BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, |
2997 | nodesize, 32); | 3000 | nodesize, 32); |
2998 | BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, | ||
2999 | leafsize, 32); | ||
3000 | BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, | 3001 | BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, |
3001 | stripesize, 32); | 3002 | stripesize, 32); |
3002 | BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, | 3003 | BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, |
@@ -3049,14 +3050,12 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, | |||
3049 | static inline unsigned long | 3050 | static inline unsigned long |
3050 | btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) | 3051 | btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) |
3051 | { | 3052 | { |
3052 | unsigned long offset = (unsigned long)e; | 3053 | return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START; |
3053 | offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
3054 | return offset; | ||
3055 | } | 3054 | } |
3056 | 3055 | ||
3057 | static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) | 3056 | static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) |
3058 | { | 3057 | { |
3059 | return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; | 3058 | return BTRFS_FILE_EXTENT_INLINE_DATA_START + datasize; |
3060 | } | 3059 | } |
3061 | 3060 | ||
3062 | BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, | 3061 | BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, |
@@ -3086,9 +3085,7 @@ BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, | |||
3086 | static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, | 3085 | static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, |
3087 | struct btrfs_item *e) | 3086 | struct btrfs_item *e) |
3088 | { | 3087 | { |
3089 | unsigned long offset; | 3088 | return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; |
3090 | offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
3091 | return btrfs_item_size(eb, e) - offset; | ||
3092 | } | 3089 | } |
3093 | 3090 | ||
3094 | /* this returns the number of file bytes represented by the inline item. | 3091 | /* this returns the number of file bytes represented by the inline item. |
@@ -3232,13 +3229,6 @@ static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) | |||
3232 | return sb->s_fs_info; | 3229 | return sb->s_fs_info; |
3233 | } | 3230 | } |
3234 | 3231 | ||
3235 | static inline u32 btrfs_level_size(struct btrfs_root *root, int level) | ||
3236 | { | ||
3237 | if (level == 0) | ||
3238 | return root->leafsize; | ||
3239 | return root->nodesize; | ||
3240 | } | ||
3241 | |||
3242 | /* helper function to cast into the data area of the leaf. */ | 3232 | /* helper function to cast into the data area of the leaf. */ |
3243 | #define btrfs_item_ptr(leaf, slot, type) \ | 3233 | #define btrfs_item_ptr(leaf, slot, type) \ |
3244 | ((type *)(btrfs_leaf_data(leaf) + \ | 3234 | ((type *)(btrfs_leaf_data(leaf) + \ |
@@ -3263,7 +3253,7 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) | |||
3263 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | 3253 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, |
3264 | unsigned num_items) | 3254 | unsigned num_items) |
3265 | { | 3255 | { |
3266 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 3256 | return (root->nodesize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * |
3267 | 2 * num_items; | 3257 | 2 * num_items; |
3268 | } | 3258 | } |
3269 | 3259 | ||
@@ -3274,8 +3264,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | |||
3274 | static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, | 3264 | static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, |
3275 | unsigned num_items) | 3265 | unsigned num_items) |
3276 | { | 3266 | { |
3277 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 3267 | return root->nodesize * BTRFS_MAX_LEVEL * num_items; |
3278 | num_items; | ||
3279 | } | 3268 | } |
3280 | 3269 | ||
3281 | int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, | 3270 | int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, |
@@ -3305,9 +3294,9 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( | |||
3305 | u64 bytenr); | 3294 | u64 bytenr); |
3306 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | 3295 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); |
3307 | int get_block_group_index(struct btrfs_block_group_cache *cache); | 3296 | int get_block_group_index(struct btrfs_block_group_cache *cache); |
3308 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 3297 | struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, |
3309 | struct btrfs_root *root, u32 blocksize, | 3298 | struct btrfs_root *root, u64 parent, |
3310 | u64 parent, u64 root_objectid, | 3299 | u64 root_objectid, |
3311 | struct btrfs_disk_key *key, int level, | 3300 | struct btrfs_disk_key *key, int level, |
3312 | u64 hint, u64 empty_size); | 3301 | u64 hint, u64 empty_size); |
3313 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 3302 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
@@ -3363,6 +3352,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
3363 | u64 size); | 3352 | u64 size); |
3364 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 3353 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
3365 | struct btrfs_root *root, u64 group_start); | 3354 | struct btrfs_root *root, u64 group_start); |
3355 | void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); | ||
3366 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | 3356 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, |
3367 | struct btrfs_root *root); | 3357 | struct btrfs_root *root); |
3368 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | 3358 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); |
@@ -3604,6 +3594,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) | |||
3604 | kfree(fs_info->uuid_root); | 3594 | kfree(fs_info->uuid_root); |
3605 | kfree(fs_info->super_copy); | 3595 | kfree(fs_info->super_copy); |
3606 | kfree(fs_info->super_for_commit); | 3596 | kfree(fs_info->super_for_commit); |
3597 | security_free_mnt_opts(&fs_info->security_opts); | ||
3607 | kfree(fs_info); | 3598 | kfree(fs_info); |
3608 | } | 3599 | } |
3609 | 3600 | ||
@@ -3739,8 +3730,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
3739 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 3730 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
3740 | struct bio *bio, u32 *dst); | 3731 | struct bio *bio, u32 *dst); |
3741 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | 3732 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, |
3742 | struct btrfs_dio_private *dip, struct bio *bio, | 3733 | struct bio *bio, u64 logical_offset); |
3743 | u64 logical_offset); | ||
3744 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 3734 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
3745 | struct btrfs_root *root, | 3735 | struct btrfs_root *root, |
3746 | u64 objectid, u64 pos, | 3736 | u64 objectid, u64 pos, |
@@ -4141,8 +4131,15 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) | |||
4141 | /* Sanity test specific functions */ | 4131 | /* Sanity test specific functions */ |
4142 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 4132 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
4143 | void btrfs_test_destroy_inode(struct inode *inode); | 4133 | void btrfs_test_destroy_inode(struct inode *inode); |
4144 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
4145 | u64 rfer, u64 excl); | ||
4146 | #endif | 4134 | #endif |
4147 | 4135 | ||
4136 | static inline int btrfs_test_is_dummy_root(struct btrfs_root *root) | ||
4137 | { | ||
4138 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
4139 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
4140 | return 1; | ||
4141 | #endif | ||
4142 | return 0; | ||
4143 | } | ||
4144 | |||
4148 | #endif | 4145 | #endif |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index da775bfdebc9..054577bddaf2 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -1042,7 +1042,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, | |||
1042 | int ret; | 1042 | int ret; |
1043 | 1043 | ||
1044 | key.objectid = node->inode_id; | 1044 | key.objectid = node->inode_id; |
1045 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 1045 | key.type = BTRFS_INODE_ITEM_KEY; |
1046 | key.offset = 0; | 1046 | key.offset = 0; |
1047 | 1047 | ||
1048 | if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags)) | 1048 | if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags)) |
@@ -1099,7 +1099,7 @@ err_out: | |||
1099 | search: | 1099 | search: |
1100 | btrfs_release_path(path); | 1100 | btrfs_release_path(path); |
1101 | 1101 | ||
1102 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); | 1102 | key.type = BTRFS_INODE_EXTREF_KEY; |
1103 | key.offset = -1; | 1103 | key.offset = -1; |
1104 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 1104 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
1105 | if (ret < 0) | 1105 | if (ret < 0) |
@@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, | |||
1395 | return -ENOMEM; | 1395 | return -ENOMEM; |
1396 | 1396 | ||
1397 | async_work->delayed_root = delayed_root; | 1397 | async_work->delayed_root = delayed_root; |
1398 | btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, | 1398 | btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper, |
1399 | NULL, NULL); | 1399 | btrfs_async_run_delayed_root, NULL, NULL); |
1400 | async_work->nr = nr; | 1400 | async_work->nr = nr; |
1401 | 1401 | ||
1402 | btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); | 1402 | btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); |
@@ -1473,7 +1473,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, | |||
1473 | } | 1473 | } |
1474 | 1474 | ||
1475 | delayed_item->key.objectid = btrfs_ino(dir); | 1475 | delayed_item->key.objectid = btrfs_ino(dir); |
1476 | btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY); | 1476 | delayed_item->key.type = BTRFS_DIR_INDEX_KEY; |
1477 | delayed_item->key.offset = index; | 1477 | delayed_item->key.offset = index; |
1478 | 1478 | ||
1479 | dir_item = (struct btrfs_dir_item *)delayed_item->data; | 1479 | dir_item = (struct btrfs_dir_item *)delayed_item->data; |
@@ -1542,7 +1542,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, | |||
1542 | return PTR_ERR(node); | 1542 | return PTR_ERR(node); |
1543 | 1543 | ||
1544 | item_key.objectid = btrfs_ino(dir); | 1544 | item_key.objectid = btrfs_ino(dir); |
1545 | btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY); | 1545 | item_key.type = BTRFS_DIR_INDEX_KEY; |
1546 | item_key.offset = index; | 1546 | item_key.offset = index; |
1547 | 1547 | ||
1548 | ret = btrfs_delete_delayed_insertion_item(root, node, &item_key); | 1548 | ret = btrfs_delete_delayed_insertion_item(root, node, &item_key); |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index eea26e1b2fda..6f662b34ba0e 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -168,8 +168,12 @@ no_valid_dev_replace_entry_found: | |||
168 | dev_replace->srcdev->total_bytes; | 168 | dev_replace->srcdev->total_bytes; |
169 | dev_replace->tgtdev->disk_total_bytes = | 169 | dev_replace->tgtdev->disk_total_bytes = |
170 | dev_replace->srcdev->disk_total_bytes; | 170 | dev_replace->srcdev->disk_total_bytes; |
171 | dev_replace->tgtdev->commit_total_bytes = | ||
172 | dev_replace->srcdev->commit_total_bytes; | ||
171 | dev_replace->tgtdev->bytes_used = | 173 | dev_replace->tgtdev->bytes_used = |
172 | dev_replace->srcdev->bytes_used; | 174 | dev_replace->srcdev->bytes_used; |
175 | dev_replace->tgtdev->commit_bytes_used = | ||
176 | dev_replace->srcdev->commit_bytes_used; | ||
173 | } | 177 | } |
174 | dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1; | 178 | dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1; |
175 | btrfs_init_dev_replace_tgtdev_for_resume(fs_info, | 179 | btrfs_init_dev_replace_tgtdev_for_resume(fs_info, |
@@ -329,30 +333,34 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
329 | args->start.tgtdev_name[0] == '\0') | 333 | args->start.tgtdev_name[0] == '\0') |
330 | return -EINVAL; | 334 | return -EINVAL; |
331 | 335 | ||
332 | mutex_lock(&fs_info->volume_mutex); | 336 | /* |
333 | ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, | 337 | * Here we commit the transaction to make sure commit_total_bytes |
334 | &tgt_device); | 338 | * of all the devices are updated. |
335 | if (ret) { | 339 | */ |
336 | btrfs_err(fs_info, "target device %s is invalid!", | 340 | trans = btrfs_attach_transaction(root); |
337 | args->start.tgtdev_name); | 341 | if (!IS_ERR(trans)) { |
338 | mutex_unlock(&fs_info->volume_mutex); | 342 | ret = btrfs_commit_transaction(trans, root); |
339 | return -EINVAL; | 343 | if (ret) |
344 | return ret; | ||
345 | } else if (PTR_ERR(trans) != -ENOENT) { | ||
346 | return PTR_ERR(trans); | ||
340 | } | 347 | } |
341 | 348 | ||
349 | /* the disk copy procedure reuses the scrub code */ | ||
350 | mutex_lock(&fs_info->volume_mutex); | ||
342 | ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, | 351 | ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, |
343 | args->start.srcdev_name, | 352 | args->start.srcdev_name, |
344 | &src_device); | 353 | &src_device); |
345 | mutex_unlock(&fs_info->volume_mutex); | ||
346 | if (ret) { | 354 | if (ret) { |
347 | ret = -EINVAL; | 355 | mutex_unlock(&fs_info->volume_mutex); |
348 | goto leave_no_lock; | 356 | return ret; |
349 | } | 357 | } |
350 | 358 | ||
351 | if (tgt_device->total_bytes < src_device->total_bytes) { | 359 | ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, |
352 | btrfs_err(fs_info, "target device is smaller than source device!"); | 360 | src_device, &tgt_device); |
353 | ret = -EINVAL; | 361 | mutex_unlock(&fs_info->volume_mutex); |
354 | goto leave_no_lock; | 362 | if (ret) |
355 | } | 363 | return ret; |
356 | 364 | ||
357 | btrfs_dev_replace_lock(dev_replace); | 365 | btrfs_dev_replace_lock(dev_replace); |
358 | switch (dev_replace->replace_state) { | 366 | switch (dev_replace->replace_state) { |
@@ -380,10 +388,6 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
380 | src_device->devid, | 388 | src_device->devid, |
381 | rcu_str_deref(tgt_device->name)); | 389 | rcu_str_deref(tgt_device->name)); |
382 | 390 | ||
383 | tgt_device->total_bytes = src_device->total_bytes; | ||
384 | tgt_device->disk_total_bytes = src_device->disk_total_bytes; | ||
385 | tgt_device->bytes_used = src_device->bytes_used; | ||
386 | |||
387 | /* | 391 | /* |
388 | * from now on, the writes to the srcdev are all duplicated to | 392 | * from now on, the writes to the srcdev are all duplicated to |
389 | * go to the tgtdev as well (refer to btrfs_map_block()). | 393 | * go to the tgtdev as well (refer to btrfs_map_block()). |
@@ -414,7 +418,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
414 | 418 | ||
415 | /* the disk copy procedure reuses the scrub code */ | 419 | /* the disk copy procedure reuses the scrub code */ |
416 | ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, | 420 | ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, |
417 | src_device->total_bytes, | 421 | btrfs_device_get_total_bytes(src_device), |
418 | &dev_replace->scrub_progress, 0, 1); | 422 | &dev_replace->scrub_progress, 0, 1); |
419 | 423 | ||
420 | ret = btrfs_dev_replace_finishing(root->fs_info, ret); | 424 | ret = btrfs_dev_replace_finishing(root->fs_info, ret); |
@@ -426,9 +430,7 @@ leave: | |||
426 | dev_replace->srcdev = NULL; | 430 | dev_replace->srcdev = NULL; |
427 | dev_replace->tgtdev = NULL; | 431 | dev_replace->tgtdev = NULL; |
428 | btrfs_dev_replace_unlock(dev_replace); | 432 | btrfs_dev_replace_unlock(dev_replace); |
429 | leave_no_lock: | 433 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); |
430 | if (tgt_device) | ||
431 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | ||
432 | return ret; | 434 | return ret; |
433 | } | 435 | } |
434 | 436 | ||
@@ -507,9 +509,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
507 | ret = btrfs_commit_transaction(trans, root); | 509 | ret = btrfs_commit_transaction(trans, root); |
508 | WARN_ON(ret); | 510 | WARN_ON(ret); |
509 | 511 | ||
512 | mutex_lock(&uuid_mutex); | ||
510 | /* keep away write_all_supers() during the finishing procedure */ | 513 | /* keep away write_all_supers() during the finishing procedure */ |
511 | mutex_lock(&root->fs_info->chunk_mutex); | ||
512 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 514 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
515 | mutex_lock(&root->fs_info->chunk_mutex); | ||
513 | btrfs_dev_replace_lock(dev_replace); | 516 | btrfs_dev_replace_lock(dev_replace); |
514 | dev_replace->replace_state = | 517 | dev_replace->replace_state = |
515 | scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED | 518 | scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED |
@@ -532,8 +535,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
532 | src_device->devid, | 535 | src_device->devid, |
533 | rcu_str_deref(tgt_device->name), scrub_ret); | 536 | rcu_str_deref(tgt_device->name), scrub_ret); |
534 | btrfs_dev_replace_unlock(dev_replace); | 537 | btrfs_dev_replace_unlock(dev_replace); |
535 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
536 | mutex_unlock(&root->fs_info->chunk_mutex); | 538 | mutex_unlock(&root->fs_info->chunk_mutex); |
539 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
540 | mutex_unlock(&uuid_mutex); | ||
537 | if (tgt_device) | 541 | if (tgt_device) |
538 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | 542 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); |
539 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 543 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
@@ -542,7 +546,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
542 | } | 546 | } |
543 | 547 | ||
544 | printk_in_rcu(KERN_INFO | 548 | printk_in_rcu(KERN_INFO |
545 | "BTRFS: dev_replace from %s (devid %llu) to %s) finished\n", | 549 | "BTRFS: dev_replace from %s (devid %llu) to %s finished\n", |
546 | src_device->missing ? "<missing disk>" : | 550 | src_device->missing ? "<missing disk>" : |
547 | rcu_str_deref(src_device->name), | 551 | rcu_str_deref(src_device->name), |
548 | src_device->devid, | 552 | src_device->devid, |
@@ -550,23 +554,29 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
550 | tgt_device->is_tgtdev_for_dev_replace = 0; | 554 | tgt_device->is_tgtdev_for_dev_replace = 0; |
551 | tgt_device->devid = src_device->devid; | 555 | tgt_device->devid = src_device->devid; |
552 | src_device->devid = BTRFS_DEV_REPLACE_DEVID; | 556 | src_device->devid = BTRFS_DEV_REPLACE_DEVID; |
553 | tgt_device->bytes_used = src_device->bytes_used; | ||
554 | memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); | 557 | memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); |
555 | memcpy(tgt_device->uuid, src_device->uuid, sizeof(tgt_device->uuid)); | 558 | memcpy(tgt_device->uuid, src_device->uuid, sizeof(tgt_device->uuid)); |
556 | memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid)); | 559 | memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid)); |
557 | tgt_device->total_bytes = src_device->total_bytes; | 560 | btrfs_device_set_total_bytes(tgt_device, src_device->total_bytes); |
558 | tgt_device->disk_total_bytes = src_device->disk_total_bytes; | 561 | btrfs_device_set_disk_total_bytes(tgt_device, |
559 | tgt_device->bytes_used = src_device->bytes_used; | 562 | src_device->disk_total_bytes); |
563 | btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used); | ||
564 | ASSERT(list_empty(&src_device->resized_list)); | ||
565 | tgt_device->commit_total_bytes = src_device->commit_total_bytes; | ||
566 | tgt_device->commit_bytes_used = src_device->bytes_used; | ||
560 | if (fs_info->sb->s_bdev == src_device->bdev) | 567 | if (fs_info->sb->s_bdev == src_device->bdev) |
561 | fs_info->sb->s_bdev = tgt_device->bdev; | 568 | fs_info->sb->s_bdev = tgt_device->bdev; |
562 | if (fs_info->fs_devices->latest_bdev == src_device->bdev) | 569 | if (fs_info->fs_devices->latest_bdev == src_device->bdev) |
563 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; | 570 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; |
564 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | 571 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); |
572 | fs_info->fs_devices->rw_devices++; | ||
565 | 573 | ||
566 | /* replace the sysfs entry */ | 574 | /* replace the sysfs entry */ |
567 | btrfs_kobj_rm_device(fs_info, src_device); | 575 | btrfs_kobj_rm_device(fs_info, src_device); |
568 | btrfs_kobj_add_device(fs_info, tgt_device); | 576 | btrfs_kobj_add_device(fs_info, tgt_device); |
569 | 577 | ||
578 | btrfs_dev_replace_unlock(dev_replace); | ||
579 | |||
570 | btrfs_rm_dev_replace_blocked(fs_info); | 580 | btrfs_rm_dev_replace_blocked(fs_info); |
571 | 581 | ||
572 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); | 582 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); |
@@ -580,9 +590,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
580 | * superblock is scratched out so that it is no longer marked to | 590 | * superblock is scratched out so that it is no longer marked to |
581 | * belong to this filesystem. | 591 | * belong to this filesystem. |
582 | */ | 592 | */ |
583 | btrfs_dev_replace_unlock(dev_replace); | ||
584 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
585 | mutex_unlock(&root->fs_info->chunk_mutex); | 593 | mutex_unlock(&root->fs_info->chunk_mutex); |
594 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
595 | mutex_unlock(&uuid_mutex); | ||
586 | 596 | ||
587 | /* write back the superblocks */ | 597 | /* write back the superblocks */ |
588 | trans = btrfs_start_transaction(root, 0); | 598 | trans = btrfs_start_transaction(root, 0); |
@@ -643,6 +653,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | |||
643 | struct btrfs_ioctl_dev_replace_args *args) | 653 | struct btrfs_ioctl_dev_replace_args *args) |
644 | { | 654 | { |
645 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | 655 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; |
656 | struct btrfs_device *srcdev; | ||
646 | 657 | ||
647 | btrfs_dev_replace_lock(dev_replace); | 658 | btrfs_dev_replace_lock(dev_replace); |
648 | /* even if !dev_replace_is_valid, the values are good enough for | 659 | /* even if !dev_replace_is_valid, the values are good enough for |
@@ -665,8 +676,9 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | |||
665 | break; | 676 | break; |
666 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | 677 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: |
667 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | 678 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: |
679 | srcdev = dev_replace->srcdev; | ||
668 | args->status.progress_1000 = div64_u64(dev_replace->cursor_left, | 680 | args->status.progress_1000 = div64_u64(dev_replace->cursor_left, |
669 | div64_u64(dev_replace->srcdev->total_bytes, 1000)); | 681 | div64_u64(btrfs_device_get_total_bytes(srcdev), 1000)); |
670 | break; | 682 | break; |
671 | } | 683 | } |
672 | btrfs_dev_replace_unlock(dev_replace); | 684 | btrfs_dev_replace_unlock(dev_replace); |
@@ -825,7 +837,7 @@ static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info) | |||
825 | 837 | ||
826 | ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid, | 838 | ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid, |
827 | dev_replace->committed_cursor_left, | 839 | dev_replace->committed_cursor_left, |
828 | dev_replace->srcdev->total_bytes, | 840 | btrfs_device_get_total_bytes(dev_replace->srcdev), |
829 | &dev_replace->scrub_progress, 0, 1); | 841 | &dev_replace->scrub_progress, 0, 1); |
830 | ret = btrfs_dev_replace_finishing(fs_info, ret); | 842 | ret = btrfs_dev_replace_finishing(fs_info, ret); |
831 | WARN_ON(ret); | 843 | WARN_ON(ret); |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index a0691df5dcea..fc8df866e919 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -86,7 +86,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | |||
86 | BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)); | 86 | BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)); |
87 | 87 | ||
88 | key.objectid = objectid; | 88 | key.objectid = objectid; |
89 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | 89 | key.type = BTRFS_XATTR_ITEM_KEY; |
90 | key.offset = btrfs_name_hash(name, name_len); | 90 | key.offset = btrfs_name_hash(name, name_len); |
91 | 91 | ||
92 | data_size = sizeof(*dir_item) + name_len + data_len; | 92 | data_size = sizeof(*dir_item) + name_len + data_len; |
@@ -137,7 +137,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
137 | u32 data_size; | 137 | u32 data_size; |
138 | 138 | ||
139 | key.objectid = btrfs_ino(dir); | 139 | key.objectid = btrfs_ino(dir); |
140 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 140 | key.type = BTRFS_DIR_ITEM_KEY; |
141 | key.offset = btrfs_name_hash(name, name_len); | 141 | key.offset = btrfs_name_hash(name, name_len); |
142 | 142 | ||
143 | path = btrfs_alloc_path(); | 143 | path = btrfs_alloc_path(); |
@@ -204,7 +204,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
204 | int cow = mod != 0; | 204 | int cow = mod != 0; |
205 | 205 | ||
206 | key.objectid = dir; | 206 | key.objectid = dir; |
207 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 207 | key.type = BTRFS_DIR_ITEM_KEY; |
208 | 208 | ||
209 | key.offset = btrfs_name_hash(name, name_len); | 209 | key.offset = btrfs_name_hash(name, name_len); |
210 | 210 | ||
@@ -234,7 +234,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, | |||
234 | return -ENOMEM; | 234 | return -ENOMEM; |
235 | 235 | ||
236 | key.objectid = dir; | 236 | key.objectid = dir; |
237 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 237 | key.type = BTRFS_DIR_ITEM_KEY; |
238 | key.offset = btrfs_name_hash(name, name_len); | 238 | key.offset = btrfs_name_hash(name, name_len); |
239 | 239 | ||
240 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 240 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
@@ -297,7 +297,7 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
297 | int cow = mod != 0; | 297 | int cow = mod != 0; |
298 | 298 | ||
299 | key.objectid = dir; | 299 | key.objectid = dir; |
300 | btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); | 300 | key.type = BTRFS_DIR_INDEX_KEY; |
301 | key.offset = objectid; | 301 | key.offset = objectid; |
302 | 302 | ||
303 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | 303 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); |
@@ -367,7 +367,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
367 | int cow = mod != 0; | 367 | int cow = mod != 0; |
368 | 368 | ||
369 | key.objectid = dir; | 369 | key.objectid = dir; |
370 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | 370 | key.type = BTRFS_XATTR_ITEM_KEY; |
371 | key.offset = btrfs_name_hash(name, name_len); | 371 | key.offset = btrfs_name_hash(name, name_len); |
372 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | 372 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); |
373 | if (ret < 0) | 373 | if (ret < 0) |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d0ed9e664f7d..fa45e3cae40d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -39,7 +39,6 @@ | |||
39 | #include "btrfs_inode.h" | 39 | #include "btrfs_inode.h" |
40 | #include "volumes.h" | 40 | #include "volumes.h" |
41 | #include "print-tree.h" | 41 | #include "print-tree.h" |
42 | #include "async-thread.h" | ||
43 | #include "locking.h" | 42 | #include "locking.h" |
44 | #include "tree-log.h" | 43 | #include "tree-log.h" |
45 | #include "free-space-cache.h" | 44 | #include "free-space-cache.h" |
@@ -73,21 +72,41 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root); | |||
73 | static void btrfs_error_commit_super(struct btrfs_root *root); | 72 | static void btrfs_error_commit_super(struct btrfs_root *root); |
74 | 73 | ||
75 | /* | 74 | /* |
76 | * end_io_wq structs are used to do processing in task context when an IO is | 75 | * btrfs_end_io_wq structs are used to do processing in task context when an IO |
77 | * complete. This is used during reads to verify checksums, and it is used | 76 | * is complete. This is used during reads to verify checksums, and it is used |
78 | * by writes to insert metadata for new file extents after IO is complete. | 77 | * by writes to insert metadata for new file extents after IO is complete. |
79 | */ | 78 | */ |
80 | struct end_io_wq { | 79 | struct btrfs_end_io_wq { |
81 | struct bio *bio; | 80 | struct bio *bio; |
82 | bio_end_io_t *end_io; | 81 | bio_end_io_t *end_io; |
83 | void *private; | 82 | void *private; |
84 | struct btrfs_fs_info *info; | 83 | struct btrfs_fs_info *info; |
85 | int error; | 84 | int error; |
86 | int metadata; | 85 | enum btrfs_wq_endio_type metadata; |
87 | struct list_head list; | 86 | struct list_head list; |
88 | struct btrfs_work work; | 87 | struct btrfs_work work; |
89 | }; | 88 | }; |
90 | 89 | ||
90 | static struct kmem_cache *btrfs_end_io_wq_cache; | ||
91 | |||
92 | int __init btrfs_end_io_wq_init(void) | ||
93 | { | ||
94 | btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq", | ||
95 | sizeof(struct btrfs_end_io_wq), | ||
96 | 0, | ||
97 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
98 | NULL); | ||
99 | if (!btrfs_end_io_wq_cache) | ||
100 | return -ENOMEM; | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | void btrfs_end_io_wq_exit(void) | ||
105 | { | ||
106 | if (btrfs_end_io_wq_cache) | ||
107 | kmem_cache_destroy(btrfs_end_io_wq_cache); | ||
108 | } | ||
109 | |||
91 | /* | 110 | /* |
92 | * async submit bios are used to offload expensive checksumming | 111 | * async submit bios are used to offload expensive checksumming |
93 | * onto the worker threads. They checksum file and metadata bios | 112 | * onto the worker threads. They checksum file and metadata bios |
@@ -328,8 +347,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
328 | { | 347 | { |
329 | struct extent_state *cached_state = NULL; | 348 | struct extent_state *cached_state = NULL; |
330 | int ret; | 349 | int ret; |
331 | bool need_lock = (current->journal_info == | 350 | bool need_lock = (current->journal_info == BTRFS_SEND_TRANS_STUB); |
332 | (void *)BTRFS_SEND_TRANS_STUB); | ||
333 | 351 | ||
334 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) | 352 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) |
335 | return 0; | 353 | return 0; |
@@ -349,9 +367,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
349 | ret = 0; | 367 | ret = 0; |
350 | goto out; | 368 | goto out; |
351 | } | 369 | } |
352 | printk_ratelimited("parent transid verify failed on %llu wanted %llu " | 370 | printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", |
353 | "found %llu\n", | 371 | eb->fs_info->sb->s_id, eb->start, |
354 | eb->start, parent_transid, btrfs_header_generation(eb)); | 372 | parent_transid, btrfs_header_generation(eb)); |
355 | ret = 1; | 373 | ret = 1; |
356 | 374 | ||
357 | /* | 375 | /* |
@@ -608,22 +626,22 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
608 | goto err; | 626 | goto err; |
609 | 627 | ||
610 | eb->read_mirror = mirror; | 628 | eb->read_mirror = mirror; |
611 | if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { | 629 | if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) { |
612 | ret = -EIO; | 630 | ret = -EIO; |
613 | goto err; | 631 | goto err; |
614 | } | 632 | } |
615 | 633 | ||
616 | found_start = btrfs_header_bytenr(eb); | 634 | found_start = btrfs_header_bytenr(eb); |
617 | if (found_start != eb->start) { | 635 | if (found_start != eb->start) { |
618 | printk_ratelimited(KERN_INFO "BTRFS: bad tree block start " | 636 | printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start " |
619 | "%llu %llu\n", | 637 | "%llu %llu\n", |
620 | found_start, eb->start); | 638 | eb->fs_info->sb->s_id, found_start, eb->start); |
621 | ret = -EIO; | 639 | ret = -EIO; |
622 | goto err; | 640 | goto err; |
623 | } | 641 | } |
624 | if (check_tree_block_fsid(root, eb)) { | 642 | if (check_tree_block_fsid(root, eb)) { |
625 | printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n", | 643 | printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n", |
626 | eb->start); | 644 | eb->fs_info->sb->s_id, eb->start); |
627 | ret = -EIO; | 645 | ret = -EIO; |
628 | goto err; | 646 | goto err; |
629 | } | 647 | } |
@@ -681,7 +699,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) | |||
681 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | 699 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; |
682 | 700 | ||
683 | eb = (struct extent_buffer *)page->private; | 701 | eb = (struct extent_buffer *)page->private; |
684 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 702 | set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); |
685 | eb->read_mirror = failed_mirror; | 703 | eb->read_mirror = failed_mirror; |
686 | atomic_dec(&eb->io_pages); | 704 | atomic_dec(&eb->io_pages); |
687 | if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) | 705 | if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) |
@@ -691,52 +709,55 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) | |||
691 | 709 | ||
692 | static void end_workqueue_bio(struct bio *bio, int err) | 710 | static void end_workqueue_bio(struct bio *bio, int err) |
693 | { | 711 | { |
694 | struct end_io_wq *end_io_wq = bio->bi_private; | 712 | struct btrfs_end_io_wq *end_io_wq = bio->bi_private; |
695 | struct btrfs_fs_info *fs_info; | 713 | struct btrfs_fs_info *fs_info; |
714 | struct btrfs_workqueue *wq; | ||
715 | btrfs_work_func_t func; | ||
696 | 716 | ||
697 | fs_info = end_io_wq->info; | 717 | fs_info = end_io_wq->info; |
698 | end_io_wq->error = err; | 718 | end_io_wq->error = err; |
699 | btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); | ||
700 | 719 | ||
701 | if (bio->bi_rw & REQ_WRITE) { | 720 | if (bio->bi_rw & REQ_WRITE) { |
702 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) | 721 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { |
703 | btrfs_queue_work(fs_info->endio_meta_write_workers, | 722 | wq = fs_info->endio_meta_write_workers; |
704 | &end_io_wq->work); | 723 | func = btrfs_endio_meta_write_helper; |
705 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) | 724 | } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) { |
706 | btrfs_queue_work(fs_info->endio_freespace_worker, | 725 | wq = fs_info->endio_freespace_worker; |
707 | &end_io_wq->work); | 726 | func = btrfs_freespace_write_helper; |
708 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 727 | } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { |
709 | btrfs_queue_work(fs_info->endio_raid56_workers, | 728 | wq = fs_info->endio_raid56_workers; |
710 | &end_io_wq->work); | 729 | func = btrfs_endio_raid56_helper; |
711 | else | 730 | } else { |
712 | btrfs_queue_work(fs_info->endio_write_workers, | 731 | wq = fs_info->endio_write_workers; |
713 | &end_io_wq->work); | 732 | func = btrfs_endio_write_helper; |
733 | } | ||
714 | } else { | 734 | } else { |
715 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 735 | if (unlikely(end_io_wq->metadata == |
716 | btrfs_queue_work(fs_info->endio_raid56_workers, | 736 | BTRFS_WQ_ENDIO_DIO_REPAIR)) { |
717 | &end_io_wq->work); | 737 | wq = fs_info->endio_repair_workers; |
718 | else if (end_io_wq->metadata) | 738 | func = btrfs_endio_repair_helper; |
719 | btrfs_queue_work(fs_info->endio_meta_workers, | 739 | } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { |
720 | &end_io_wq->work); | 740 | wq = fs_info->endio_raid56_workers; |
721 | else | 741 | func = btrfs_endio_raid56_helper; |
722 | btrfs_queue_work(fs_info->endio_workers, | 742 | } else if (end_io_wq->metadata) { |
723 | &end_io_wq->work); | 743 | wq = fs_info->endio_meta_workers; |
744 | func = btrfs_endio_meta_helper; | ||
745 | } else { | ||
746 | wq = fs_info->endio_workers; | ||
747 | func = btrfs_endio_helper; | ||
748 | } | ||
724 | } | 749 | } |
750 | |||
751 | btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL); | ||
752 | btrfs_queue_work(wq, &end_io_wq->work); | ||
725 | } | 753 | } |
726 | 754 | ||
727 | /* | ||
728 | * For the metadata arg you want | ||
729 | * | ||
730 | * 0 - if data | ||
731 | * 1 - if normal metadta | ||
732 | * 2 - if writing to the free space cache area | ||
733 | * 3 - raid parity work | ||
734 | */ | ||
735 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | 755 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, |
736 | int metadata) | 756 | enum btrfs_wq_endio_type metadata) |
737 | { | 757 | { |
738 | struct end_io_wq *end_io_wq; | 758 | struct btrfs_end_io_wq *end_io_wq; |
739 | end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); | 759 | |
760 | end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS); | ||
740 | if (!end_io_wq) | 761 | if (!end_io_wq) |
741 | return -ENOMEM; | 762 | return -ENOMEM; |
742 | 763 | ||
@@ -828,7 +849,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
828 | async->submit_bio_start = submit_bio_start; | 849 | async->submit_bio_start = submit_bio_start; |
829 | async->submit_bio_done = submit_bio_done; | 850 | async->submit_bio_done = submit_bio_done; |
830 | 851 | ||
831 | btrfs_init_work(&async->work, run_one_async_start, | 852 | btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start, |
832 | run_one_async_done, run_one_async_free); | 853 | run_one_async_done, run_one_async_free); |
833 | 854 | ||
834 | async->bio_flags = bio_flags; | 855 | async->bio_flags = bio_flags; |
@@ -920,7 +941,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
920 | * can happen in the async kernel threads | 941 | * can happen in the async kernel threads |
921 | */ | 942 | */ |
922 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, | 943 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, |
923 | bio, 1); | 944 | bio, BTRFS_WQ_ENDIO_METADATA); |
924 | if (ret) | 945 | if (ret) |
925 | goto out_w_error; | 946 | goto out_w_error; |
926 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 947 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
@@ -1052,20 +1073,17 @@ static const struct address_space_operations btree_aops = { | |||
1052 | .set_page_dirty = btree_set_page_dirty, | 1073 | .set_page_dirty = btree_set_page_dirty, |
1053 | }; | 1074 | }; |
1054 | 1075 | ||
1055 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 1076 | void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) |
1056 | u64 parent_transid) | ||
1057 | { | 1077 | { |
1058 | struct extent_buffer *buf = NULL; | 1078 | struct extent_buffer *buf = NULL; |
1059 | struct inode *btree_inode = root->fs_info->btree_inode; | 1079 | struct inode *btree_inode = root->fs_info->btree_inode; |
1060 | int ret = 0; | ||
1061 | 1080 | ||
1062 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1081 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); |
1063 | if (!buf) | 1082 | if (!buf) |
1064 | return 0; | 1083 | return; |
1065 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, | 1084 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, |
1066 | buf, 0, WAIT_NONE, btree_get_extent, 0); | 1085 | buf, 0, WAIT_NONE, btree_get_extent, 0); |
1067 | free_extent_buffer(buf); | 1086 | free_extent_buffer(buf); |
1068 | return ret; | ||
1069 | } | 1087 | } |
1070 | 1088 | ||
1071 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 1089 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, |
@@ -1101,7 +1119,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | |||
1101 | } | 1119 | } |
1102 | 1120 | ||
1103 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 1121 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
1104 | u64 bytenr, u32 blocksize) | 1122 | u64 bytenr) |
1105 | { | 1123 | { |
1106 | return find_extent_buffer(root->fs_info, bytenr); | 1124 | return find_extent_buffer(root->fs_info, bytenr); |
1107 | } | 1125 | } |
@@ -1109,11 +1127,9 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | |||
1109 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 1127 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
1110 | u64 bytenr, u32 blocksize) | 1128 | u64 bytenr, u32 blocksize) |
1111 | { | 1129 | { |
1112 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 1130 | if (btrfs_test_is_dummy_root(root)) |
1113 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
1114 | return alloc_test_extent_buffer(root->fs_info, bytenr, | 1131 | return alloc_test_extent_buffer(root->fs_info, bytenr, |
1115 | blocksize); | 1132 | blocksize); |
1116 | #endif | ||
1117 | return alloc_extent_buffer(root->fs_info, bytenr, blocksize); | 1133 | return alloc_extent_buffer(root->fs_info, bytenr, blocksize); |
1118 | } | 1134 | } |
1119 | 1135 | ||
@@ -1131,12 +1147,12 @@ int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) | |||
1131 | } | 1147 | } |
1132 | 1148 | ||
1133 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | 1149 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, |
1134 | u32 blocksize, u64 parent_transid) | 1150 | u64 parent_transid) |
1135 | { | 1151 | { |
1136 | struct extent_buffer *buf = NULL; | 1152 | struct extent_buffer *buf = NULL; |
1137 | int ret; | 1153 | int ret; |
1138 | 1154 | ||
1139 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1155 | buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize); |
1140 | if (!buf) | 1156 | if (!buf) |
1141 | return NULL; | 1157 | return NULL; |
1142 | 1158 | ||
@@ -1178,7 +1194,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) | |||
1178 | if (!writers) | 1194 | if (!writers) |
1179 | return ERR_PTR(-ENOMEM); | 1195 | return ERR_PTR(-ENOMEM); |
1180 | 1196 | ||
1181 | ret = percpu_counter_init(&writers->counter, 0); | 1197 | ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL); |
1182 | if (ret < 0) { | 1198 | if (ret < 0) { |
1183 | kfree(writers); | 1199 | kfree(writers); |
1184 | return ERR_PTR(ret); | 1200 | return ERR_PTR(ret); |
@@ -1195,16 +1211,14 @@ btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) | |||
1195 | kfree(writers); | 1211 | kfree(writers); |
1196 | } | 1212 | } |
1197 | 1213 | ||
1198 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | 1214 | static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, |
1199 | u32 stripesize, struct btrfs_root *root, | 1215 | struct btrfs_root *root, struct btrfs_fs_info *fs_info, |
1200 | struct btrfs_fs_info *fs_info, | ||
1201 | u64 objectid) | 1216 | u64 objectid) |
1202 | { | 1217 | { |
1203 | root->node = NULL; | 1218 | root->node = NULL; |
1204 | root->commit_root = NULL; | 1219 | root->commit_root = NULL; |
1205 | root->sectorsize = sectorsize; | 1220 | root->sectorsize = sectorsize; |
1206 | root->nodesize = nodesize; | 1221 | root->nodesize = nodesize; |
1207 | root->leafsize = leafsize; | ||
1208 | root->stripesize = stripesize; | 1222 | root->stripesize = stripesize; |
1209 | root->state = 0; | 1223 | root->state = 0; |
1210 | root->orphan_cleanup_state = 0; | 1224 | root->orphan_cleanup_state = 0; |
@@ -1290,7 +1304,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void) | |||
1290 | root = btrfs_alloc_root(NULL); | 1304 | root = btrfs_alloc_root(NULL); |
1291 | if (!root) | 1305 | if (!root) |
1292 | return ERR_PTR(-ENOMEM); | 1306 | return ERR_PTR(-ENOMEM); |
1293 | __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); | 1307 | __setup_root(4096, 4096, 4096, root, NULL, 1); |
1294 | set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); | 1308 | set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); |
1295 | root->alloc_bytenr = 0; | 1309 | root->alloc_bytenr = 0; |
1296 | 1310 | ||
@@ -1313,15 +1327,13 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
1313 | if (!root) | 1327 | if (!root) |
1314 | return ERR_PTR(-ENOMEM); | 1328 | return ERR_PTR(-ENOMEM); |
1315 | 1329 | ||
1316 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1330 | __setup_root(tree_root->nodesize, tree_root->sectorsize, |
1317 | tree_root->sectorsize, tree_root->stripesize, | 1331 | tree_root->stripesize, root, fs_info, objectid); |
1318 | root, fs_info, objectid); | ||
1319 | root->root_key.objectid = objectid; | 1332 | root->root_key.objectid = objectid; |
1320 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | 1333 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; |
1321 | root->root_key.offset = 0; | 1334 | root->root_key.offset = 0; |
1322 | 1335 | ||
1323 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 1336 | leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0); |
1324 | 0, objectid, NULL, 0, 0, 0); | ||
1325 | if (IS_ERR(leaf)) { | 1337 | if (IS_ERR(leaf)) { |
1326 | ret = PTR_ERR(leaf); | 1338 | ret = PTR_ERR(leaf); |
1327 | leaf = NULL; | 1339 | leaf = NULL; |
@@ -1391,9 +1403,9 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1391 | if (!root) | 1403 | if (!root) |
1392 | return ERR_PTR(-ENOMEM); | 1404 | return ERR_PTR(-ENOMEM); |
1393 | 1405 | ||
1394 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1406 | __setup_root(tree_root->nodesize, tree_root->sectorsize, |
1395 | tree_root->sectorsize, tree_root->stripesize, | 1407 | tree_root->stripesize, root, fs_info, |
1396 | root, fs_info, BTRFS_TREE_LOG_OBJECTID); | 1408 | BTRFS_TREE_LOG_OBJECTID); |
1397 | 1409 | ||
1398 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; | 1410 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; |
1399 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | 1411 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; |
@@ -1408,9 +1420,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1408 | * updated (along with back refs to the log tree). | 1420 | * updated (along with back refs to the log tree). |
1409 | */ | 1421 | */ |
1410 | 1422 | ||
1411 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 1423 | leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID, |
1412 | BTRFS_TREE_LOG_OBJECTID, NULL, | 1424 | NULL, 0, 0, 0); |
1413 | 0, 0, 0); | ||
1414 | if (IS_ERR(leaf)) { | 1425 | if (IS_ERR(leaf)) { |
1415 | kfree(root); | 1426 | kfree(root); |
1416 | return ERR_CAST(leaf); | 1427 | return ERR_CAST(leaf); |
@@ -1460,7 +1471,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
1460 | btrfs_set_stack_inode_generation(inode_item, 1); | 1471 | btrfs_set_stack_inode_generation(inode_item, 1); |
1461 | btrfs_set_stack_inode_size(inode_item, 3); | 1472 | btrfs_set_stack_inode_size(inode_item, 3); |
1462 | btrfs_set_stack_inode_nlink(inode_item, 1); | 1473 | btrfs_set_stack_inode_nlink(inode_item, 1); |
1463 | btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); | 1474 | btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); |
1464 | btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); | 1475 | btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); |
1465 | 1476 | ||
1466 | btrfs_set_root_node(&log_root->root_item, log_root->node); | 1477 | btrfs_set_root_node(&log_root->root_item, log_root->node); |
@@ -1480,7 +1491,6 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, | |||
1480 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | 1491 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1481 | struct btrfs_path *path; | 1492 | struct btrfs_path *path; |
1482 | u64 generation; | 1493 | u64 generation; |
1483 | u32 blocksize; | ||
1484 | int ret; | 1494 | int ret; |
1485 | 1495 | ||
1486 | path = btrfs_alloc_path(); | 1496 | path = btrfs_alloc_path(); |
@@ -1493,9 +1503,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, | |||
1493 | goto alloc_fail; | 1503 | goto alloc_fail; |
1494 | } | 1504 | } |
1495 | 1505 | ||
1496 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1506 | __setup_root(tree_root->nodesize, tree_root->sectorsize, |
1497 | tree_root->sectorsize, tree_root->stripesize, | 1507 | tree_root->stripesize, root, fs_info, key->objectid); |
1498 | root, fs_info, key->objectid); | ||
1499 | 1508 | ||
1500 | ret = btrfs_find_root(tree_root, key, path, | 1509 | ret = btrfs_find_root(tree_root, key, path, |
1501 | &root->root_item, &root->root_key); | 1510 | &root->root_item, &root->root_key); |
@@ -1506,9 +1515,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, | |||
1506 | } | 1515 | } |
1507 | 1516 | ||
1508 | generation = btrfs_root_generation(&root->root_item); | 1517 | generation = btrfs_root_generation(&root->root_item); |
1509 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | ||
1510 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1518 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
1511 | blocksize, generation); | 1519 | generation); |
1512 | if (!root->node) { | 1520 | if (!root->node) { |
1513 | ret = -ENOMEM; | 1521 | ret = -ENOMEM; |
1514 | goto find_fail; | 1522 | goto find_fail; |
@@ -1568,8 +1576,8 @@ int btrfs_init_fs_root(struct btrfs_root *root) | |||
1568 | root->subv_writers = writers; | 1576 | root->subv_writers = writers; |
1569 | 1577 | ||
1570 | btrfs_init_free_ino_ctl(root); | 1578 | btrfs_init_free_ino_ctl(root); |
1571 | spin_lock_init(&root->cache_lock); | 1579 | spin_lock_init(&root->ino_cache_lock); |
1572 | init_waitqueue_head(&root->cache_wait); | 1580 | init_waitqueue_head(&root->ino_cache_wait); |
1573 | 1581 | ||
1574 | ret = get_anon_bdev(&root->anon_dev); | 1582 | ret = get_anon_bdev(&root->anon_dev); |
1575 | if (ret) | 1583 | if (ret) |
@@ -1703,10 +1711,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1703 | return ret; | 1711 | return ret; |
1704 | } | 1712 | } |
1705 | 1713 | ||
1706 | /* | ||
1707 | * If this fails, caller must call bdi_destroy() to get rid of the | ||
1708 | * bdi again. | ||
1709 | */ | ||
1710 | static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | 1714 | static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) |
1711 | { | 1715 | { |
1712 | int err; | 1716 | int err; |
@@ -1729,16 +1733,16 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1729 | static void end_workqueue_fn(struct btrfs_work *work) | 1733 | static void end_workqueue_fn(struct btrfs_work *work) |
1730 | { | 1734 | { |
1731 | struct bio *bio; | 1735 | struct bio *bio; |
1732 | struct end_io_wq *end_io_wq; | 1736 | struct btrfs_end_io_wq *end_io_wq; |
1733 | int error; | 1737 | int error; |
1734 | 1738 | ||
1735 | end_io_wq = container_of(work, struct end_io_wq, work); | 1739 | end_io_wq = container_of(work, struct btrfs_end_io_wq, work); |
1736 | bio = end_io_wq->bio; | 1740 | bio = end_io_wq->bio; |
1737 | 1741 | ||
1738 | error = end_io_wq->error; | 1742 | error = end_io_wq->error; |
1739 | bio->bi_private = end_io_wq->private; | 1743 | bio->bi_private = end_io_wq->private; |
1740 | bio->bi_end_io = end_io_wq->end_io; | 1744 | bio->bi_end_io = end_io_wq->end_io; |
1741 | kfree(end_io_wq); | 1745 | kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); |
1742 | bio_endio_nodec(bio, error); | 1746 | bio_endio_nodec(bio, error); |
1743 | } | 1747 | } |
1744 | 1748 | ||
@@ -1767,6 +1771,7 @@ static int cleaner_kthread(void *arg) | |||
1767 | } | 1771 | } |
1768 | 1772 | ||
1769 | btrfs_run_delayed_iputs(root); | 1773 | btrfs_run_delayed_iputs(root); |
1774 | btrfs_delete_unused_bgs(root->fs_info); | ||
1770 | again = btrfs_clean_one_deleted_snapshot(root); | 1775 | again = btrfs_clean_one_deleted_snapshot(root); |
1771 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1776 | mutex_unlock(&root->fs_info->cleaner_mutex); |
1772 | 1777 | ||
@@ -2058,6 +2063,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) | |||
2058 | btrfs_destroy_workqueue(fs_info->endio_workers); | 2063 | btrfs_destroy_workqueue(fs_info->endio_workers); |
2059 | btrfs_destroy_workqueue(fs_info->endio_meta_workers); | 2064 | btrfs_destroy_workqueue(fs_info->endio_meta_workers); |
2060 | btrfs_destroy_workqueue(fs_info->endio_raid56_workers); | 2065 | btrfs_destroy_workqueue(fs_info->endio_raid56_workers); |
2066 | btrfs_destroy_workqueue(fs_info->endio_repair_workers); | ||
2061 | btrfs_destroy_workqueue(fs_info->rmw_workers); | 2067 | btrfs_destroy_workqueue(fs_info->rmw_workers); |
2062 | btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); | 2068 | btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); |
2063 | btrfs_destroy_workqueue(fs_info->endio_write_workers); | 2069 | btrfs_destroy_workqueue(fs_info->endio_write_workers); |
@@ -2138,8 +2144,6 @@ int open_ctree(struct super_block *sb, | |||
2138 | { | 2144 | { |
2139 | u32 sectorsize; | 2145 | u32 sectorsize; |
2140 | u32 nodesize; | 2146 | u32 nodesize; |
2141 | u32 leafsize; | ||
2142 | u32 blocksize; | ||
2143 | u32 stripesize; | 2147 | u32 stripesize; |
2144 | u64 generation; | 2148 | u64 generation; |
2145 | u64 features; | 2149 | u64 features; |
@@ -2183,7 +2187,7 @@ int open_ctree(struct super_block *sb, | |||
2183 | goto fail_srcu; | 2187 | goto fail_srcu; |
2184 | } | 2188 | } |
2185 | 2189 | ||
2186 | ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0); | 2190 | ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); |
2187 | if (ret) { | 2191 | if (ret) { |
2188 | err = ret; | 2192 | err = ret; |
2189 | goto fail_bdi; | 2193 | goto fail_bdi; |
@@ -2191,13 +2195,13 @@ int open_ctree(struct super_block *sb, | |||
2191 | fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE * | 2195 | fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE * |
2192 | (1 + ilog2(nr_cpu_ids)); | 2196 | (1 + ilog2(nr_cpu_ids)); |
2193 | 2197 | ||
2194 | ret = percpu_counter_init(&fs_info->delalloc_bytes, 0); | 2198 | ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL); |
2195 | if (ret) { | 2199 | if (ret) { |
2196 | err = ret; | 2200 | err = ret; |
2197 | goto fail_dirty_metadata_bytes; | 2201 | goto fail_dirty_metadata_bytes; |
2198 | } | 2202 | } |
2199 | 2203 | ||
2200 | ret = percpu_counter_init(&fs_info->bio_counter, 0); | 2204 | ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL); |
2201 | if (ret) { | 2205 | if (ret) { |
2202 | err = ret; | 2206 | err = ret; |
2203 | goto fail_delalloc_bytes; | 2207 | goto fail_delalloc_bytes; |
@@ -2228,6 +2232,7 @@ int open_ctree(struct super_block *sb, | |||
2228 | spin_lock_init(&fs_info->super_lock); | 2232 | spin_lock_init(&fs_info->super_lock); |
2229 | spin_lock_init(&fs_info->qgroup_op_lock); | 2233 | spin_lock_init(&fs_info->qgroup_op_lock); |
2230 | spin_lock_init(&fs_info->buffer_lock); | 2234 | spin_lock_init(&fs_info->buffer_lock); |
2235 | spin_lock_init(&fs_info->unused_bgs_lock); | ||
2231 | rwlock_init(&fs_info->tree_mod_log_lock); | 2236 | rwlock_init(&fs_info->tree_mod_log_lock); |
2232 | mutex_init(&fs_info->reloc_mutex); | 2237 | mutex_init(&fs_info->reloc_mutex); |
2233 | mutex_init(&fs_info->delalloc_root_mutex); | 2238 | mutex_init(&fs_info->delalloc_root_mutex); |
@@ -2237,6 +2242,7 @@ int open_ctree(struct super_block *sb, | |||
2237 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 2242 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
2238 | INIT_LIST_HEAD(&fs_info->space_info); | 2243 | INIT_LIST_HEAD(&fs_info->space_info); |
2239 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | 2244 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); |
2245 | INIT_LIST_HEAD(&fs_info->unused_bgs); | ||
2240 | btrfs_mapping_init(&fs_info->mapping_tree); | 2246 | btrfs_mapping_init(&fs_info->mapping_tree); |
2241 | btrfs_init_block_rsv(&fs_info->global_block_rsv, | 2247 | btrfs_init_block_rsv(&fs_info->global_block_rsv, |
2242 | BTRFS_BLOCK_RSV_GLOBAL); | 2248 | BTRFS_BLOCK_RSV_GLOBAL); |
@@ -2255,7 +2261,7 @@ int open_ctree(struct super_block *sb, | |||
2255 | atomic_set(&fs_info->qgroup_op_seq, 0); | 2261 | atomic_set(&fs_info->qgroup_op_seq, 0); |
2256 | atomic64_set(&fs_info->tree_mod_seq, 0); | 2262 | atomic64_set(&fs_info->tree_mod_seq, 0); |
2257 | fs_info->sb = sb; | 2263 | fs_info->sb = sb; |
2258 | fs_info->max_inline = 8192 * 1024; | 2264 | fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; |
2259 | fs_info->metadata_ratio = 0; | 2265 | fs_info->metadata_ratio = 0; |
2260 | fs_info->defrag_inodes = RB_ROOT; | 2266 | fs_info->defrag_inodes = RB_ROOT; |
2261 | fs_info->free_chunk_space = 0; | 2267 | fs_info->free_chunk_space = 0; |
@@ -2384,7 +2390,7 @@ int open_ctree(struct super_block *sb, | |||
2384 | goto fail_alloc; | 2390 | goto fail_alloc; |
2385 | } | 2391 | } |
2386 | 2392 | ||
2387 | __setup_root(4096, 4096, 4096, 4096, tree_root, | 2393 | __setup_root(4096, 4096, 4096, tree_root, |
2388 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 2394 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
2389 | 2395 | ||
2390 | invalidate_bdev(fs_devices->latest_bdev); | 2396 | invalidate_bdev(fs_devices->latest_bdev); |
@@ -2464,19 +2470,22 @@ int open_ctree(struct super_block *sb, | |||
2464 | goto fail_alloc; | 2470 | goto fail_alloc; |
2465 | } | 2471 | } |
2466 | 2472 | ||
2467 | if (btrfs_super_leafsize(disk_super) != | 2473 | /* |
2474 | * Leafsize and nodesize were always equal, this is only a sanity check. | ||
2475 | */ | ||
2476 | if (le32_to_cpu(disk_super->__unused_leafsize) != | ||
2468 | btrfs_super_nodesize(disk_super)) { | 2477 | btrfs_super_nodesize(disk_super)) { |
2469 | printk(KERN_ERR "BTRFS: couldn't mount because metadata " | 2478 | printk(KERN_ERR "BTRFS: couldn't mount because metadata " |
2470 | "blocksizes don't match. node %d leaf %d\n", | 2479 | "blocksizes don't match. node %d leaf %d\n", |
2471 | btrfs_super_nodesize(disk_super), | 2480 | btrfs_super_nodesize(disk_super), |
2472 | btrfs_super_leafsize(disk_super)); | 2481 | le32_to_cpu(disk_super->__unused_leafsize)); |
2473 | err = -EINVAL; | 2482 | err = -EINVAL; |
2474 | goto fail_alloc; | 2483 | goto fail_alloc; |
2475 | } | 2484 | } |
2476 | if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { | 2485 | if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { |
2477 | printk(KERN_ERR "BTRFS: couldn't mount because metadata " | 2486 | printk(KERN_ERR "BTRFS: couldn't mount because metadata " |
2478 | "blocksize (%d) was too large\n", | 2487 | "blocksize (%d) was too large\n", |
2479 | btrfs_super_leafsize(disk_super)); | 2488 | btrfs_super_nodesize(disk_super)); |
2480 | err = -EINVAL; | 2489 | err = -EINVAL; |
2481 | goto fail_alloc; | 2490 | goto fail_alloc; |
2482 | } | 2491 | } |
@@ -2493,17 +2502,16 @@ int open_ctree(struct super_block *sb, | |||
2493 | * flag our filesystem as having big metadata blocks if | 2502 | * flag our filesystem as having big metadata blocks if |
2494 | * they are bigger than the page size | 2503 | * they are bigger than the page size |
2495 | */ | 2504 | */ |
2496 | if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) { | 2505 | if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) { |
2497 | if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) | 2506 | if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) |
2498 | printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); | 2507 | printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); |
2499 | features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; | 2508 | features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; |
2500 | } | 2509 | } |
2501 | 2510 | ||
2502 | nodesize = btrfs_super_nodesize(disk_super); | 2511 | nodesize = btrfs_super_nodesize(disk_super); |
2503 | leafsize = btrfs_super_leafsize(disk_super); | ||
2504 | sectorsize = btrfs_super_sectorsize(disk_super); | 2512 | sectorsize = btrfs_super_sectorsize(disk_super); |
2505 | stripesize = btrfs_super_stripesize(disk_super); | 2513 | stripesize = btrfs_super_stripesize(disk_super); |
2506 | fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids)); | 2514 | fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); |
2507 | fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); | 2515 | fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); |
2508 | 2516 | ||
2509 | /* | 2517 | /* |
@@ -2511,7 +2519,7 @@ int open_ctree(struct super_block *sb, | |||
2511 | * extent buffers for the same range. It leads to corruptions | 2519 | * extent buffers for the same range. It leads to corruptions |
2512 | */ | 2520 | */ |
2513 | if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && | 2521 | if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && |
2514 | (sectorsize != leafsize)) { | 2522 | (sectorsize != nodesize)) { |
2515 | printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " | 2523 | printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " |
2516 | "are not allowed for mixed block groups on %s\n", | 2524 | "are not allowed for mixed block groups on %s\n", |
2517 | sb->s_id); | 2525 | sb->s_id); |
@@ -2574,6 +2582,8 @@ int open_ctree(struct super_block *sb, | |||
2574 | btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); | 2582 | btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); |
2575 | fs_info->endio_raid56_workers = | 2583 | fs_info->endio_raid56_workers = |
2576 | btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); | 2584 | btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); |
2585 | fs_info->endio_repair_workers = | ||
2586 | btrfs_alloc_workqueue("endio-repair", flags, 1, 0); | ||
2577 | fs_info->rmw_workers = | 2587 | fs_info->rmw_workers = |
2578 | btrfs_alloc_workqueue("rmw", flags, max_active, 2); | 2588 | btrfs_alloc_workqueue("rmw", flags, max_active, 2); |
2579 | fs_info->endio_write_workers = | 2589 | fs_info->endio_write_workers = |
@@ -2595,11 +2605,12 @@ int open_ctree(struct super_block *sb, | |||
2595 | fs_info->submit_workers && fs_info->flush_workers && | 2605 | fs_info->submit_workers && fs_info->flush_workers && |
2596 | fs_info->endio_workers && fs_info->endio_meta_workers && | 2606 | fs_info->endio_workers && fs_info->endio_meta_workers && |
2597 | fs_info->endio_meta_write_workers && | 2607 | fs_info->endio_meta_write_workers && |
2608 | fs_info->endio_repair_workers && | ||
2598 | fs_info->endio_write_workers && fs_info->endio_raid56_workers && | 2609 | fs_info->endio_write_workers && fs_info->endio_raid56_workers && |
2599 | fs_info->endio_freespace_worker && fs_info->rmw_workers && | 2610 | fs_info->endio_freespace_worker && fs_info->rmw_workers && |
2600 | fs_info->caching_workers && fs_info->readahead_workers && | 2611 | fs_info->caching_workers && fs_info->readahead_workers && |
2601 | fs_info->fixup_workers && fs_info->delayed_workers && | 2612 | fs_info->fixup_workers && fs_info->delayed_workers && |
2602 | fs_info->fixup_workers && fs_info->extent_workers && | 2613 | fs_info->extent_workers && |
2603 | fs_info->qgroup_rescan_workers)) { | 2614 | fs_info->qgroup_rescan_workers)) { |
2604 | err = -ENOMEM; | 2615 | err = -ENOMEM; |
2605 | goto fail_sb_buffer; | 2616 | goto fail_sb_buffer; |
@@ -2610,7 +2621,6 @@ int open_ctree(struct super_block *sb, | |||
2610 | 4 * 1024 * 1024 / PAGE_CACHE_SIZE); | 2621 | 4 * 1024 * 1024 / PAGE_CACHE_SIZE); |
2611 | 2622 | ||
2612 | tree_root->nodesize = nodesize; | 2623 | tree_root->nodesize = nodesize; |
2613 | tree_root->leafsize = leafsize; | ||
2614 | tree_root->sectorsize = sectorsize; | 2624 | tree_root->sectorsize = sectorsize; |
2615 | tree_root->stripesize = stripesize; | 2625 | tree_root->stripesize = stripesize; |
2616 | 2626 | ||
@@ -2637,16 +2647,14 @@ int open_ctree(struct super_block *sb, | |||
2637 | goto fail_sb_buffer; | 2647 | goto fail_sb_buffer; |
2638 | } | 2648 | } |
2639 | 2649 | ||
2640 | blocksize = btrfs_level_size(tree_root, | ||
2641 | btrfs_super_chunk_root_level(disk_super)); | ||
2642 | generation = btrfs_super_chunk_root_generation(disk_super); | 2650 | generation = btrfs_super_chunk_root_generation(disk_super); |
2643 | 2651 | ||
2644 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | 2652 | __setup_root(nodesize, sectorsize, stripesize, chunk_root, |
2645 | chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); | 2653 | fs_info, BTRFS_CHUNK_TREE_OBJECTID); |
2646 | 2654 | ||
2647 | chunk_root->node = read_tree_block(chunk_root, | 2655 | chunk_root->node = read_tree_block(chunk_root, |
2648 | btrfs_super_chunk_root(disk_super), | 2656 | btrfs_super_chunk_root(disk_super), |
2649 | blocksize, generation); | 2657 | generation); |
2650 | if (!chunk_root->node || | 2658 | if (!chunk_root->node || |
2651 | !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { | 2659 | !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { |
2652 | printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", | 2660 | printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", |
@@ -2679,13 +2687,11 @@ int open_ctree(struct super_block *sb, | |||
2679 | } | 2687 | } |
2680 | 2688 | ||
2681 | retry_root_backup: | 2689 | retry_root_backup: |
2682 | blocksize = btrfs_level_size(tree_root, | ||
2683 | btrfs_super_root_level(disk_super)); | ||
2684 | generation = btrfs_super_generation(disk_super); | 2690 | generation = btrfs_super_generation(disk_super); |
2685 | 2691 | ||
2686 | tree_root->node = read_tree_block(tree_root, | 2692 | tree_root->node = read_tree_block(tree_root, |
2687 | btrfs_super_root(disk_super), | 2693 | btrfs_super_root(disk_super), |
2688 | blocksize, generation); | 2694 | generation); |
2689 | if (!tree_root->node || | 2695 | if (!tree_root->node || |
2690 | !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { | 2696 | !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { |
2691 | printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", | 2697 | printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", |
@@ -2854,9 +2860,6 @@ retry_root_backup: | |||
2854 | err = -EIO; | 2860 | err = -EIO; |
2855 | goto fail_qgroup; | 2861 | goto fail_qgroup; |
2856 | } | 2862 | } |
2857 | blocksize = | ||
2858 | btrfs_level_size(tree_root, | ||
2859 | btrfs_super_log_root_level(disk_super)); | ||
2860 | 2863 | ||
2861 | log_tree_root = btrfs_alloc_root(fs_info); | 2864 | log_tree_root = btrfs_alloc_root(fs_info); |
2862 | if (!log_tree_root) { | 2865 | if (!log_tree_root) { |
@@ -2864,11 +2867,10 @@ retry_root_backup: | |||
2864 | goto fail_qgroup; | 2867 | goto fail_qgroup; |
2865 | } | 2868 | } |
2866 | 2869 | ||
2867 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | 2870 | __setup_root(nodesize, sectorsize, stripesize, |
2868 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); | 2871 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); |
2869 | 2872 | ||
2870 | log_tree_root->node = read_tree_block(tree_root, bytenr, | 2873 | log_tree_root->node = read_tree_block(tree_root, bytenr, |
2871 | blocksize, | ||
2872 | generation + 1); | 2874 | generation + 1); |
2873 | if (!log_tree_root->node || | 2875 | if (!log_tree_root->node || |
2874 | !extent_buffer_uptodate(log_tree_root->node)) { | 2876 | !extent_buffer_uptodate(log_tree_root->node)) { |
@@ -2975,6 +2977,8 @@ retry_root_backup: | |||
2975 | fs_info->update_uuid_tree_gen = 1; | 2977 | fs_info->update_uuid_tree_gen = 1; |
2976 | } | 2978 | } |
2977 | 2979 | ||
2980 | fs_info->open = 1; | ||
2981 | |||
2978 | return 0; | 2982 | return 0; |
2979 | 2983 | ||
2980 | fail_qgroup: | 2984 | fail_qgroup: |
@@ -3134,7 +3138,8 @@ static int write_dev_supers(struct btrfs_device *device, | |||
3134 | 3138 | ||
3135 | for (i = 0; i < max_mirrors; i++) { | 3139 | for (i = 0; i < max_mirrors; i++) { |
3136 | bytenr = btrfs_sb_offset(i); | 3140 | bytenr = btrfs_sb_offset(i); |
3137 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) | 3141 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= |
3142 | device->commit_total_bytes) | ||
3138 | break; | 3143 | break; |
3139 | 3144 | ||
3140 | if (wait) { | 3145 | if (wait) { |
@@ -3450,8 +3455,10 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
3450 | btrfs_set_stack_device_generation(dev_item, 0); | 3455 | btrfs_set_stack_device_generation(dev_item, 0); |
3451 | btrfs_set_stack_device_type(dev_item, dev->type); | 3456 | btrfs_set_stack_device_type(dev_item, dev->type); |
3452 | btrfs_set_stack_device_id(dev_item, dev->devid); | 3457 | btrfs_set_stack_device_id(dev_item, dev->devid); |
3453 | btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); | 3458 | btrfs_set_stack_device_total_bytes(dev_item, |
3454 | btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); | 3459 | dev->commit_total_bytes); |
3460 | btrfs_set_stack_device_bytes_used(dev_item, | ||
3461 | dev->commit_bytes_used); | ||
3455 | btrfs_set_stack_device_io_align(dev_item, dev->io_align); | 3462 | btrfs_set_stack_device_io_align(dev_item, dev->io_align); |
3456 | btrfs_set_stack_device_io_width(dev_item, dev->io_width); | 3463 | btrfs_set_stack_device_io_width(dev_item, dev->io_width); |
3457 | btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); | 3464 | btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); |
@@ -3526,7 +3533,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, | |||
3526 | 3533 | ||
3527 | static void free_fs_root(struct btrfs_root *root) | 3534 | static void free_fs_root(struct btrfs_root *root) |
3528 | { | 3535 | { |
3529 | iput(root->cache_inode); | 3536 | iput(root->ino_cache_inode); |
3530 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | 3537 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); |
3531 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | 3538 | btrfs_free_block_rsv(root, root->orphan_block_rsv); |
3532 | root->orphan_block_rsv = NULL; | 3539 | root->orphan_block_rsv = NULL; |
@@ -3617,7 +3624,7 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
3617 | return btrfs_commit_transaction(trans, root); | 3624 | return btrfs_commit_transaction(trans, root); |
3618 | } | 3625 | } |
3619 | 3626 | ||
3620 | int close_ctree(struct btrfs_root *root) | 3627 | void close_ctree(struct btrfs_root *root) |
3621 | { | 3628 | { |
3622 | struct btrfs_fs_info *fs_info = root->fs_info; | 3629 | struct btrfs_fs_info *fs_info = root->fs_info; |
3623 | int ret; | 3630 | int ret; |
@@ -3683,6 +3690,7 @@ int close_ctree(struct btrfs_root *root) | |||
3683 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 3690 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
3684 | btrfs_stop_all_workers(fs_info); | 3691 | btrfs_stop_all_workers(fs_info); |
3685 | 3692 | ||
3693 | fs_info->open = 0; | ||
3686 | free_root_pointers(fs_info, 1); | 3694 | free_root_pointers(fs_info, 1); |
3687 | 3695 | ||
3688 | iput(fs_info->btree_inode); | 3696 | iput(fs_info->btree_inode); |
@@ -3705,8 +3713,6 @@ int close_ctree(struct btrfs_root *root) | |||
3705 | 3713 | ||
3706 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | 3714 | btrfs_free_block_rsv(root, root->orphan_block_rsv); |
3707 | root->orphan_block_rsv = NULL; | 3715 | root->orphan_block_rsv = NULL; |
3708 | |||
3709 | return 0; | ||
3710 | } | 3716 | } |
3711 | 3717 | ||
3712 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, | 3718 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, |
@@ -3808,10 +3814,73 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
3808 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | 3814 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
3809 | int read_only) | 3815 | int read_only) |
3810 | { | 3816 | { |
3817 | struct btrfs_super_block *sb = fs_info->super_copy; | ||
3818 | int ret = 0; | ||
3819 | |||
3820 | if (sb->root_level > BTRFS_MAX_LEVEL) { | ||
3821 | printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n", | ||
3822 | sb->root_level, BTRFS_MAX_LEVEL); | ||
3823 | ret = -EINVAL; | ||
3824 | } | ||
3825 | if (sb->chunk_root_level > BTRFS_MAX_LEVEL) { | ||
3826 | printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n", | ||
3827 | sb->chunk_root_level, BTRFS_MAX_LEVEL); | ||
3828 | ret = -EINVAL; | ||
3829 | } | ||
3830 | if (sb->log_root_level > BTRFS_MAX_LEVEL) { | ||
3831 | printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n", | ||
3832 | sb->log_root_level, BTRFS_MAX_LEVEL); | ||
3833 | ret = -EINVAL; | ||
3834 | } | ||
3835 | |||
3811 | /* | 3836 | /* |
3812 | * Placeholder for checks | 3837 | * The common minimum, we don't know if we can trust the nodesize/sectorsize |
3838 | * items yet, they'll be verified later. Issue just a warning. | ||
3813 | */ | 3839 | */ |
3814 | return 0; | 3840 | if (!IS_ALIGNED(sb->root, 4096)) |
3841 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | ||
3842 | sb->root); | ||
3843 | if (!IS_ALIGNED(sb->chunk_root, 4096)) | ||
3844 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | ||
3845 | sb->chunk_root); | ||
3846 | if (!IS_ALIGNED(sb->log_root, 4096)) | ||
3847 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | ||
3848 | sb->log_root); | ||
3849 | |||
3850 | if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { | ||
3851 | printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", | ||
3852 | fs_info->fsid, sb->dev_item.fsid); | ||
3853 | ret = -EINVAL; | ||
3854 | } | ||
3855 | |||
3856 | /* | ||
3857 | * Hint to catch really bogus numbers, bitflips or so, more exact checks are | ||
3858 | * done later | ||
3859 | */ | ||
3860 | if (sb->num_devices > (1UL << 31)) | ||
3861 | printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", | ||
3862 | sb->num_devices); | ||
3863 | |||
3864 | if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) { | ||
3865 | printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", | ||
3866 | sb->bytenr, BTRFS_SUPER_INFO_OFFSET); | ||
3867 | ret = -EINVAL; | ||
3868 | } | ||
3869 | |||
3870 | /* | ||
3871 | * The generation is a global counter, we'll trust it more than the others | ||
3872 | * but it's still possible that it's the one that's wrong. | ||
3873 | */ | ||
3874 | if (sb->generation < sb->chunk_root_generation) | ||
3875 | printk(KERN_WARNING | ||
3876 | "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", | ||
3877 | sb->generation, sb->chunk_root_generation); | ||
3878 | if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1) | ||
3879 | printk(KERN_WARNING | ||
3880 | "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", | ||
3881 | sb->generation, sb->cache_generation); | ||
3882 | |||
3883 | return ret; | ||
3815 | } | 3884 | } |
3816 | 3885 | ||
3817 | static void btrfs_error_commit_super(struct btrfs_root *root) | 3886 | static void btrfs_error_commit_super(struct btrfs_root *root) |
@@ -4003,9 +4072,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, | |||
4003 | 4072 | ||
4004 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | 4073 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); |
4005 | while (start <= end) { | 4074 | while (start <= end) { |
4006 | eb = btrfs_find_tree_block(root, start, | 4075 | eb = btrfs_find_tree_block(root, start); |
4007 | root->leafsize); | 4076 | start += root->nodesize; |
4008 | start += root->leafsize; | ||
4009 | if (!eb) | 4077 | if (!eb) |
4010 | continue; | 4078 | continue; |
4011 | wait_on_extent_buffer_writeback(eb); | 4079 | wait_on_extent_buffer_writeback(eb); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 23ce3ceba0a9..414651821fb3 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -25,11 +25,12 @@ | |||
25 | #define BTRFS_SUPER_MIRROR_MAX 3 | 25 | #define BTRFS_SUPER_MIRROR_MAX 3 |
26 | #define BTRFS_SUPER_MIRROR_SHIFT 12 | 26 | #define BTRFS_SUPER_MIRROR_SHIFT 12 |
27 | 27 | ||
28 | enum { | 28 | enum btrfs_wq_endio_type { |
29 | BTRFS_WQ_ENDIO_DATA = 0, | 29 | BTRFS_WQ_ENDIO_DATA = 0, |
30 | BTRFS_WQ_ENDIO_METADATA = 1, | 30 | BTRFS_WQ_ENDIO_METADATA = 1, |
31 | BTRFS_WQ_ENDIO_FREE_SPACE = 2, | 31 | BTRFS_WQ_ENDIO_FREE_SPACE = 2, |
32 | BTRFS_WQ_ENDIO_RAID56 = 3, | 32 | BTRFS_WQ_ENDIO_RAID56 = 3, |
33 | BTRFS_WQ_ENDIO_DIO_REPAIR = 4, | ||
33 | }; | 34 | }; |
34 | 35 | ||
35 | static inline u64 btrfs_sb_offset(int mirror) | 36 | static inline u64 btrfs_sb_offset(int mirror) |
@@ -44,9 +45,8 @@ struct btrfs_device; | |||
44 | struct btrfs_fs_devices; | 45 | struct btrfs_fs_devices; |
45 | 46 | ||
46 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | 47 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, |
47 | u32 blocksize, u64 parent_transid); | 48 | u64 parent_transid); |
48 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 49 | void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); |
49 | u64 parent_transid); | ||
50 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 50 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, |
51 | int mirror_num, struct extent_buffer **eb); | 51 | int mirror_num, struct extent_buffer **eb); |
52 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 52 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
@@ -56,13 +56,13 @@ void clean_tree_block(struct btrfs_trans_handle *trans, | |||
56 | int open_ctree(struct super_block *sb, | 56 | int open_ctree(struct super_block *sb, |
57 | struct btrfs_fs_devices *fs_devices, | 57 | struct btrfs_fs_devices *fs_devices, |
58 | char *options); | 58 | char *options); |
59 | int close_ctree(struct btrfs_root *root); | 59 | void close_ctree(struct btrfs_root *root); |
60 | int write_ctree_super(struct btrfs_trans_handle *trans, | 60 | int write_ctree_super(struct btrfs_trans_handle *trans, |
61 | struct btrfs_root *root, int max_mirrors); | 61 | struct btrfs_root *root, int max_mirrors); |
62 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | 62 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); |
63 | int btrfs_commit_super(struct btrfs_root *root); | 63 | int btrfs_commit_super(struct btrfs_root *root); |
64 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 64 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
65 | u64 bytenr, u32 blocksize); | 65 | u64 bytenr); |
66 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | 66 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, |
67 | struct btrfs_key *location); | 67 | struct btrfs_key *location); |
68 | int btrfs_init_fs_root(struct btrfs_root *root); | 68 | int btrfs_init_fs_root(struct btrfs_root *root); |
@@ -119,7 +119,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); | |||
119 | u32 btrfs_csum_data(char *data, u32 seed, size_t len); | 119 | u32 btrfs_csum_data(char *data, u32 seed, size_t len); |
120 | void btrfs_csum_final(u32 crc, char *result); | 120 | void btrfs_csum_final(u32 crc, char *result); |
121 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | 121 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, |
122 | int metadata); | 122 | enum btrfs_wq_endio_type metadata); |
123 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 123 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
124 | int rw, struct bio *bio, int mirror_num, | 124 | int rw, struct bio *bio, int mirror_num, |
125 | unsigned long bio_flags, u64 bio_offset, | 125 | unsigned long bio_flags, u64 bio_offset, |
@@ -141,6 +141,8 @@ int btree_lock_page_hook(struct page *page, void *data, | |||
141 | void (*flush_fn)(void *)); | 141 | void (*flush_fn)(void *)); |
142 | int btrfs_calc_num_tolerated_disk_barrier_failures( | 142 | int btrfs_calc_num_tolerated_disk_barrier_failures( |
143 | struct btrfs_fs_info *fs_info); | 143 | struct btrfs_fs_info *fs_info); |
144 | int __init btrfs_end_io_wq_init(void); | ||
145 | void btrfs_end_io_wq_exit(void); | ||
144 | 146 | ||
145 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 147 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
146 | void btrfs_init_lockdep(void); | 148 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 41422a3de8ed..37d164540c3a 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -70,7 +70,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
70 | return ERR_PTR(-ESTALE); | 70 | return ERR_PTR(-ESTALE); |
71 | 71 | ||
72 | key.objectid = root_objectid; | 72 | key.objectid = root_objectid; |
73 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 73 | key.type = BTRFS_ROOT_ITEM_KEY; |
74 | key.offset = (u64)-1; | 74 | key.offset = (u64)-1; |
75 | 75 | ||
76 | index = srcu_read_lock(&fs_info->subvol_srcu); | 76 | index = srcu_read_lock(&fs_info->subvol_srcu); |
@@ -82,7 +82,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
82 | } | 82 | } |
83 | 83 | ||
84 | key.objectid = objectid; | 84 | key.objectid = objectid; |
85 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 85 | key.type = BTRFS_INODE_ITEM_KEY; |
86 | key.offset = 0; | 86 | key.offset = 0; |
87 | 87 | ||
88 | inode = btrfs_iget(sb, &key, root, NULL); | 88 | inode = btrfs_iget(sb, &key, root, NULL); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 102ed3143976..d56589571012 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -491,7 +491,7 @@ next: | |||
491 | key.objectid); | 491 | key.objectid); |
492 | if (key.type == BTRFS_METADATA_ITEM_KEY) | 492 | if (key.type == BTRFS_METADATA_ITEM_KEY) |
493 | last = key.objectid + | 493 | last = key.objectid + |
494 | fs_info->tree_root->leafsize; | 494 | fs_info->tree_root->nodesize; |
495 | else | 495 | else |
496 | last = key.objectid + key.offset; | 496 | last = key.objectid + key.offset; |
497 | 497 | ||
@@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
552 | caching_ctl->block_group = cache; | 552 | caching_ctl->block_group = cache; |
553 | caching_ctl->progress = cache->key.objectid; | 553 | caching_ctl->progress = cache->key.objectid; |
554 | atomic_set(&caching_ctl->count, 1); | 554 | atomic_set(&caching_ctl->count, 1); |
555 | btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); | 555 | btrfs_init_work(&caching_ctl->work, btrfs_cache_helper, |
556 | caching_thread, NULL, NULL); | ||
556 | 557 | ||
557 | spin_lock(&cache->lock); | 558 | spin_lock(&cache->lock); |
558 | /* | 559 | /* |
@@ -764,7 +765,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | |||
764 | * different | 765 | * different |
765 | */ | 766 | */ |
766 | if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) { | 767 | if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) { |
767 | offset = root->leafsize; | 768 | offset = root->nodesize; |
768 | metadata = 0; | 769 | metadata = 0; |
769 | } | 770 | } |
770 | 771 | ||
@@ -798,13 +799,13 @@ again: | |||
798 | path->slots[0]); | 799 | path->slots[0]); |
799 | if (key.objectid == bytenr && | 800 | if (key.objectid == bytenr && |
800 | key.type == BTRFS_EXTENT_ITEM_KEY && | 801 | key.type == BTRFS_EXTENT_ITEM_KEY && |
801 | key.offset == root->leafsize) | 802 | key.offset == root->nodesize) |
802 | ret = 0; | 803 | ret = 0; |
803 | } | 804 | } |
804 | if (ret) { | 805 | if (ret) { |
805 | key.objectid = bytenr; | 806 | key.objectid = bytenr; |
806 | key.type = BTRFS_EXTENT_ITEM_KEY; | 807 | key.type = BTRFS_EXTENT_ITEM_KEY; |
807 | key.offset = root->leafsize; | 808 | key.offset = root->nodesize; |
808 | btrfs_release_path(path); | 809 | btrfs_release_path(path); |
809 | goto again; | 810 | goto again; |
810 | } | 811 | } |
@@ -2650,7 +2651,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, | |||
2650 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 2651 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
2651 | num_heads = heads_to_leaves(root, num_heads); | 2652 | num_heads = heads_to_leaves(root, num_heads); |
2652 | if (num_heads > 1) | 2653 | if (num_heads > 1) |
2653 | num_bytes += (num_heads - 1) * root->leafsize; | 2654 | num_bytes += (num_heads - 1) * root->nodesize; |
2654 | num_bytes <<= 1; | 2655 | num_bytes <<= 1; |
2655 | global_rsv = &root->fs_info->global_block_rsv; | 2656 | global_rsv = &root->fs_info->global_block_rsv; |
2656 | 2657 | ||
@@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root, | |||
2749 | async->sync = 0; | 2750 | async->sync = 0; |
2750 | init_completion(&async->wait); | 2751 | init_completion(&async->wait); |
2751 | 2752 | ||
2752 | btrfs_init_work(&async->work, delayed_ref_async_start, | 2753 | btrfs_init_work(&async->work, btrfs_extent_refs_helper, |
2753 | NULL, NULL); | 2754 | delayed_ref_async_start, NULL, NULL); |
2754 | 2755 | ||
2755 | btrfs_queue_work(root->fs_info->extent_workers, &async->work); | 2756 | btrfs_queue_work(root->fs_info->extent_workers, &async->work); |
2756 | 2757 | ||
@@ -3072,10 +3073,10 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
3072 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | 3073 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, |
3073 | u64, u64, u64, u64, u64, u64, int); | 3074 | u64, u64, u64, u64, u64, u64, int); |
3074 | 3075 | ||
3075 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 3076 | |
3076 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | 3077 | if (btrfs_test_is_dummy_root(root)) |
3077 | return 0; | 3078 | return 0; |
3078 | #endif | 3079 | |
3079 | ref_root = btrfs_header_owner(buf); | 3080 | ref_root = btrfs_header_owner(buf); |
3080 | nritems = btrfs_header_nritems(buf); | 3081 | nritems = btrfs_header_nritems(buf); |
3081 | level = btrfs_header_level(buf); | 3082 | level = btrfs_header_level(buf); |
@@ -3096,7 +3097,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
3096 | for (i = 0; i < nritems; i++) { | 3097 | for (i = 0; i < nritems; i++) { |
3097 | if (level == 0) { | 3098 | if (level == 0) { |
3098 | btrfs_item_key_to_cpu(buf, &key, i); | 3099 | btrfs_item_key_to_cpu(buf, &key, i); |
3099 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | 3100 | if (key.type != BTRFS_EXTENT_DATA_KEY) |
3100 | continue; | 3101 | continue; |
3101 | fi = btrfs_item_ptr(buf, i, | 3102 | fi = btrfs_item_ptr(buf, i, |
3102 | struct btrfs_file_extent_item); | 3103 | struct btrfs_file_extent_item); |
@@ -3116,7 +3117,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
3116 | goto fail; | 3117 | goto fail; |
3117 | } else { | 3118 | } else { |
3118 | bytenr = btrfs_node_blockptr(buf, i); | 3119 | bytenr = btrfs_node_blockptr(buf, i); |
3119 | num_bytes = btrfs_level_size(root, level - 1); | 3120 | num_bytes = root->nodesize; |
3120 | ret = process_func(trans, root, bytenr, num_bytes, | 3121 | ret = process_func(trans, root, bytenr, num_bytes, |
3121 | parent, ref_root, level - 1, 0, | 3122 | parent, ref_root, level - 1, 0, |
3122 | 1); | 3123 | 1); |
@@ -3493,7 +3494,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
3493 | if (!found) | 3494 | if (!found) |
3494 | return -ENOMEM; | 3495 | return -ENOMEM; |
3495 | 3496 | ||
3496 | ret = percpu_counter_init(&found->total_bytes_pinned, 0); | 3497 | ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL); |
3497 | if (ret) { | 3498 | if (ret) { |
3498 | kfree(found); | 3499 | kfree(found); |
3499 | return ret; | 3500 | return ret; |
@@ -3586,13 +3587,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags) | |||
3586 | */ | 3587 | */ |
3587 | static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 3588 | static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
3588 | { | 3589 | { |
3589 | /* | 3590 | u64 num_devices = root->fs_info->fs_devices->rw_devices; |
3590 | * we add in the count of missing devices because we want | ||
3591 | * to make sure that any RAID levels on a degraded FS | ||
3592 | * continue to be honored. | ||
3593 | */ | ||
3594 | u64 num_devices = root->fs_info->fs_devices->rw_devices + | ||
3595 | root->fs_info->fs_devices->missing_devices; | ||
3596 | u64 target; | 3591 | u64 target; |
3597 | u64 tmp; | 3592 | u64 tmp; |
3598 | 3593 | ||
@@ -4348,11 +4343,21 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, | |||
4348 | } | 4343 | } |
4349 | 4344 | ||
4350 | static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, | 4345 | static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, |
4351 | struct btrfs_fs_info *fs_info) | 4346 | struct btrfs_fs_info *fs_info, |
4347 | int flush_state) | ||
4352 | { | 4348 | { |
4353 | u64 used; | 4349 | u64 used; |
4354 | 4350 | ||
4355 | spin_lock(&space_info->lock); | 4351 | spin_lock(&space_info->lock); |
4352 | /* | ||
4353 | * We run out of space and have not got any free space via flush_space, | ||
4354 | * so don't bother doing async reclaim. | ||
4355 | */ | ||
4356 | if (flush_state > COMMIT_TRANS && space_info->full) { | ||
4357 | spin_unlock(&space_info->lock); | ||
4358 | return 0; | ||
4359 | } | ||
4360 | |||
4356 | used = space_info->bytes_used + space_info->bytes_reserved + | 4361 | used = space_info->bytes_used + space_info->bytes_reserved + |
4357 | space_info->bytes_pinned + space_info->bytes_readonly + | 4362 | space_info->bytes_pinned + space_info->bytes_readonly + |
4358 | space_info->bytes_may_use; | 4363 | space_info->bytes_may_use; |
@@ -4385,11 +4390,12 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | |||
4385 | flush_space(fs_info->fs_root, space_info, to_reclaim, | 4390 | flush_space(fs_info->fs_root, space_info, to_reclaim, |
4386 | to_reclaim, flush_state); | 4391 | to_reclaim, flush_state); |
4387 | flush_state++; | 4392 | flush_state++; |
4388 | if (!btrfs_need_do_async_reclaim(space_info, fs_info)) | 4393 | if (!btrfs_need_do_async_reclaim(space_info, fs_info, |
4394 | flush_state)) | ||
4389 | return; | 4395 | return; |
4390 | } while (flush_state <= COMMIT_TRANS); | 4396 | } while (flush_state <= COMMIT_TRANS); |
4391 | 4397 | ||
4392 | if (btrfs_need_do_async_reclaim(space_info, fs_info)) | 4398 | if (btrfs_need_do_async_reclaim(space_info, fs_info, flush_state)) |
4393 | queue_work(system_unbound_wq, work); | 4399 | queue_work(system_unbound_wq, work); |
4394 | } | 4400 | } |
4395 | 4401 | ||
@@ -4507,7 +4513,13 @@ again: | |||
4507 | space_info->flush = 1; | 4513 | space_info->flush = 1; |
4508 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { | 4514 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { |
4509 | used += orig_bytes; | 4515 | used += orig_bytes; |
4510 | if (need_do_async_reclaim(space_info, root->fs_info, used) && | 4516 | /* |
4517 | * We will do the space reservation dance during log replay, | ||
4518 | * which means we won't have fs_info->fs_root set, so don't do | ||
4519 | * the async reclaim as we will panic. | ||
4520 | */ | ||
4521 | if (!root->fs_info->log_root_recovering && | ||
4522 | need_do_async_reclaim(space_info, root->fs_info, used) && | ||
4511 | !work_busy(&root->fs_info->async_reclaim_work)) | 4523 | !work_busy(&root->fs_info->async_reclaim_work)) |
4512 | queue_work(system_unbound_wq, | 4524 | queue_work(system_unbound_wq, |
4513 | &root->fs_info->async_reclaim_work); | 4525 | &root->fs_info->async_reclaim_work); |
@@ -4844,7 +4856,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | |||
4844 | if (num_bytes * 3 > meta_used) | 4856 | if (num_bytes * 3 > meta_used) |
4845 | num_bytes = div64_u64(meta_used, 3); | 4857 | num_bytes = div64_u64(meta_used, 3); |
4846 | 4858 | ||
4847 | return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); | 4859 | return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10); |
4848 | } | 4860 | } |
4849 | 4861 | ||
4850 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | 4862 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) |
@@ -4993,7 +5005,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, | |||
4993 | 5005 | ||
4994 | if (root->fs_info->quota_enabled) { | 5006 | if (root->fs_info->quota_enabled) { |
4995 | /* One for parent inode, two for dir entries */ | 5007 | /* One for parent inode, two for dir entries */ |
4996 | num_bytes = 3 * root->leafsize; | 5008 | num_bytes = 3 * root->nodesize; |
4997 | ret = btrfs_qgroup_reserve(root, num_bytes); | 5009 | ret = btrfs_qgroup_reserve(root, num_bytes); |
4998 | if (ret) | 5010 | if (ret) |
4999 | return ret; | 5011 | return ret; |
@@ -5181,7 +5193,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
5181 | 5193 | ||
5182 | if (root->fs_info->quota_enabled) { | 5194 | if (root->fs_info->quota_enabled) { |
5183 | ret = btrfs_qgroup_reserve(root, num_bytes + | 5195 | ret = btrfs_qgroup_reserve(root, num_bytes + |
5184 | nr_extents * root->leafsize); | 5196 | nr_extents * root->nodesize); |
5185 | if (ret) | 5197 | if (ret) |
5186 | goto out_fail; | 5198 | goto out_fail; |
5187 | } | 5199 | } |
@@ -5190,7 +5202,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
5190 | if (unlikely(ret)) { | 5202 | if (unlikely(ret)) { |
5191 | if (root->fs_info->quota_enabled) | 5203 | if (root->fs_info->quota_enabled) |
5192 | btrfs_qgroup_free(root, num_bytes + | 5204 | btrfs_qgroup_free(root, num_bytes + |
5193 | nr_extents * root->leafsize); | 5205 | nr_extents * root->nodesize); |
5194 | goto out_fail; | 5206 | goto out_fail; |
5195 | } | 5207 | } |
5196 | 5208 | ||
@@ -5306,7 +5318,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
5306 | btrfs_ino(inode), to_free, 0); | 5318 | btrfs_ino(inode), to_free, 0); |
5307 | if (root->fs_info->quota_enabled) { | 5319 | if (root->fs_info->quota_enabled) { |
5308 | btrfs_qgroup_free(root, num_bytes + | 5320 | btrfs_qgroup_free(root, num_bytes + |
5309 | dropped * root->leafsize); | 5321 | dropped * root->nodesize); |
5310 | } | 5322 | } |
5311 | 5323 | ||
5312 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 5324 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
@@ -5427,6 +5439,20 @@ static int update_block_group(struct btrfs_root *root, | |||
5427 | spin_unlock(&cache->space_info->lock); | 5439 | spin_unlock(&cache->space_info->lock); |
5428 | } else { | 5440 | } else { |
5429 | old_val -= num_bytes; | 5441 | old_val -= num_bytes; |
5442 | |||
5443 | /* | ||
5444 | * No longer have used bytes in this block group, queue | ||
5445 | * it for deletion. | ||
5446 | */ | ||
5447 | if (old_val == 0) { | ||
5448 | spin_lock(&info->unused_bgs_lock); | ||
5449 | if (list_empty(&cache->bg_list)) { | ||
5450 | btrfs_get_block_group(cache); | ||
5451 | list_add_tail(&cache->bg_list, | ||
5452 | &info->unused_bgs); | ||
5453 | } | ||
5454 | spin_unlock(&info->unused_bgs_lock); | ||
5455 | } | ||
5430 | btrfs_set_block_group_used(&cache->item, old_val); | 5456 | btrfs_set_block_group_used(&cache->item, old_val); |
5431 | cache->pinned += num_bytes; | 5457 | cache->pinned += num_bytes; |
5432 | cache->space_info->bytes_pinned += num_bytes; | 5458 | cache->space_info->bytes_pinned += num_bytes; |
@@ -6238,10 +6264,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
6238 | int ret; | 6264 | int ret; |
6239 | struct btrfs_fs_info *fs_info = root->fs_info; | 6265 | struct btrfs_fs_info *fs_info = root->fs_info; |
6240 | 6266 | ||
6241 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 6267 | if (btrfs_test_is_dummy_root(root)) |
6242 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
6243 | return 0; | 6268 | return 0; |
6244 | #endif | 6269 | |
6245 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); | 6270 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); |
6246 | 6271 | ||
6247 | /* | 6272 | /* |
@@ -6268,14 +6293,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
6268 | return ret; | 6293 | return ret; |
6269 | } | 6294 | } |
6270 | 6295 | ||
6271 | static u64 stripe_align(struct btrfs_root *root, | ||
6272 | struct btrfs_block_group_cache *cache, | ||
6273 | u64 val, u64 num_bytes) | ||
6274 | { | ||
6275 | u64 ret = ALIGN(val, root->stripesize); | ||
6276 | return ret; | ||
6277 | } | ||
6278 | |||
6279 | /* | 6296 | /* |
6280 | * when we wait for progress in the block group caching, its because | 6297 | * when we wait for progress in the block group caching, its because |
6281 | * our allocation attempt failed at least once. So, we must sleep | 6298 | * our allocation attempt failed at least once. So, we must sleep |
@@ -6469,7 +6486,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, | |||
6469 | bool have_caching_bg = false; | 6486 | bool have_caching_bg = false; |
6470 | 6487 | ||
6471 | WARN_ON(num_bytes < root->sectorsize); | 6488 | WARN_ON(num_bytes < root->sectorsize); |
6472 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 6489 | ins->type = BTRFS_EXTENT_ITEM_KEY; |
6473 | ins->objectid = 0; | 6490 | ins->objectid = 0; |
6474 | ins->offset = 0; | 6491 | ins->offset = 0; |
6475 | 6492 | ||
@@ -6756,8 +6773,7 @@ unclustered_alloc: | |||
6756 | goto loop; | 6773 | goto loop; |
6757 | } | 6774 | } |
6758 | checks: | 6775 | checks: |
6759 | search_start = stripe_align(root, block_group, | 6776 | search_start = ALIGN(offset, root->stripesize); |
6760 | offset, num_bytes); | ||
6761 | 6777 | ||
6762 | /* move on to the next group */ | 6778 | /* move on to the next group */ |
6763 | if (search_start + num_bytes > | 6779 | if (search_start + num_bytes > |
@@ -7082,7 +7098,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
7082 | path = btrfs_alloc_path(); | 7098 | path = btrfs_alloc_path(); |
7083 | if (!path) { | 7099 | if (!path) { |
7084 | btrfs_free_and_pin_reserved_extent(root, ins->objectid, | 7100 | btrfs_free_and_pin_reserved_extent(root, ins->objectid, |
7085 | root->leafsize); | 7101 | root->nodesize); |
7086 | return -ENOMEM; | 7102 | return -ENOMEM; |
7087 | } | 7103 | } |
7088 | 7104 | ||
@@ -7091,7 +7107,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
7091 | ins, size); | 7107 | ins, size); |
7092 | if (ret) { | 7108 | if (ret) { |
7093 | btrfs_free_and_pin_reserved_extent(root, ins->objectid, | 7109 | btrfs_free_and_pin_reserved_extent(root, ins->objectid, |
7094 | root->leafsize); | 7110 | root->nodesize); |
7095 | btrfs_free_path(path); | 7111 | btrfs_free_path(path); |
7096 | return ret; | 7112 | return ret; |
7097 | } | 7113 | } |
@@ -7106,7 +7122,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
7106 | 7122 | ||
7107 | if (skinny_metadata) { | 7123 | if (skinny_metadata) { |
7108 | iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); | 7124 | iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); |
7109 | num_bytes = root->leafsize; | 7125 | num_bytes = root->nodesize; |
7110 | } else { | 7126 | } else { |
7111 | block_info = (struct btrfs_tree_block_info *)(extent_item + 1); | 7127 | block_info = (struct btrfs_tree_block_info *)(extent_item + 1); |
7112 | btrfs_set_tree_block_key(leaf, block_info, key); | 7128 | btrfs_set_tree_block_key(leaf, block_info, key); |
@@ -7136,14 +7152,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
7136 | return ret; | 7152 | return ret; |
7137 | } | 7153 | } |
7138 | 7154 | ||
7139 | ret = update_block_group(root, ins->objectid, root->leafsize, 1); | 7155 | ret = update_block_group(root, ins->objectid, root->nodesize, 1); |
7140 | if (ret) { /* -ENOENT, logic error */ | 7156 | if (ret) { /* -ENOENT, logic error */ |
7141 | btrfs_err(fs_info, "update block group failed for %llu %llu", | 7157 | btrfs_err(fs_info, "update block group failed for %llu %llu", |
7142 | ins->objectid, ins->offset); | 7158 | ins->objectid, ins->offset); |
7143 | BUG(); | 7159 | BUG(); |
7144 | } | 7160 | } |
7145 | 7161 | ||
7146 | trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize); | 7162 | trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->nodesize); |
7147 | return ret; | 7163 | return ret; |
7148 | } | 7164 | } |
7149 | 7165 | ||
@@ -7218,17 +7234,19 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
7218 | btrfs_set_buffer_uptodate(buf); | 7234 | btrfs_set_buffer_uptodate(buf); |
7219 | 7235 | ||
7220 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | 7236 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { |
7237 | buf->log_index = root->log_transid % 2; | ||
7221 | /* | 7238 | /* |
7222 | * we allow two log transactions at a time, use different | 7239 | * we allow two log transactions at a time, use different |
7223 | * EXENT bit to differentiate dirty pages. | 7240 | * EXENT bit to differentiate dirty pages. |
7224 | */ | 7241 | */ |
7225 | if (root->log_transid % 2 == 0) | 7242 | if (buf->log_index == 0) |
7226 | set_extent_dirty(&root->dirty_log_pages, buf->start, | 7243 | set_extent_dirty(&root->dirty_log_pages, buf->start, |
7227 | buf->start + buf->len - 1, GFP_NOFS); | 7244 | buf->start + buf->len - 1, GFP_NOFS); |
7228 | else | 7245 | else |
7229 | set_extent_new(&root->dirty_log_pages, buf->start, | 7246 | set_extent_new(&root->dirty_log_pages, buf->start, |
7230 | buf->start + buf->len - 1, GFP_NOFS); | 7247 | buf->start + buf->len - 1, GFP_NOFS); |
7231 | } else { | 7248 | } else { |
7249 | buf->log_index = -1; | ||
7232 | set_extent_dirty(&trans->transaction->dirty_pages, buf->start, | 7250 | set_extent_dirty(&trans->transaction->dirty_pages, buf->start, |
7233 | buf->start + buf->len - 1, GFP_NOFS); | 7251 | buf->start + buf->len - 1, GFP_NOFS); |
7234 | } | 7252 | } |
@@ -7305,8 +7323,8 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, | |||
7305 | * | 7323 | * |
7306 | * returns the tree buffer or NULL. | 7324 | * returns the tree buffer or NULL. |
7307 | */ | 7325 | */ |
7308 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 7326 | struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, |
7309 | struct btrfs_root *root, u32 blocksize, | 7327 | struct btrfs_root *root, |
7310 | u64 parent, u64 root_objectid, | 7328 | u64 parent, u64 root_objectid, |
7311 | struct btrfs_disk_key *key, int level, | 7329 | struct btrfs_disk_key *key, int level, |
7312 | u64 hint, u64 empty_size) | 7330 | u64 hint, u64 empty_size) |
@@ -7316,18 +7334,18 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
7316 | struct extent_buffer *buf; | 7334 | struct extent_buffer *buf; |
7317 | u64 flags = 0; | 7335 | u64 flags = 0; |
7318 | int ret; | 7336 | int ret; |
7337 | u32 blocksize = root->nodesize; | ||
7319 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, | 7338 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, |
7320 | SKINNY_METADATA); | 7339 | SKINNY_METADATA); |
7321 | 7340 | ||
7322 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 7341 | if (btrfs_test_is_dummy_root(root)) { |
7323 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) { | ||
7324 | buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, | 7342 | buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, |
7325 | blocksize, level); | 7343 | blocksize, level); |
7326 | if (!IS_ERR(buf)) | 7344 | if (!IS_ERR(buf)) |
7327 | root->alloc_bytenr += blocksize; | 7345 | root->alloc_bytenr += blocksize; |
7328 | return buf; | 7346 | return buf; |
7329 | } | 7347 | } |
7330 | #endif | 7348 | |
7331 | block_rsv = use_block_rsv(trans, root, blocksize); | 7349 | block_rsv = use_block_rsv(trans, root, blocksize); |
7332 | if (IS_ERR(block_rsv)) | 7350 | if (IS_ERR(block_rsv)) |
7333 | return ERR_CAST(block_rsv); | 7351 | return ERR_CAST(block_rsv); |
@@ -7422,7 +7440,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
7422 | 7440 | ||
7423 | eb = path->nodes[wc->level]; | 7441 | eb = path->nodes[wc->level]; |
7424 | nritems = btrfs_header_nritems(eb); | 7442 | nritems = btrfs_header_nritems(eb); |
7425 | blocksize = btrfs_level_size(root, wc->level - 1); | 7443 | blocksize = root->nodesize; |
7426 | 7444 | ||
7427 | for (slot = path->slots[wc->level]; slot < nritems; slot++) { | 7445 | for (slot = path->slots[wc->level]; slot < nritems; slot++) { |
7428 | if (nread >= wc->reada_count) | 7446 | if (nread >= wc->reada_count) |
@@ -7469,10 +7487,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
7469 | continue; | 7487 | continue; |
7470 | } | 7488 | } |
7471 | reada: | 7489 | reada: |
7472 | ret = readahead_tree_block(root, bytenr, blocksize, | 7490 | readahead_tree_block(root, bytenr, blocksize); |
7473 | generation); | ||
7474 | if (ret) | ||
7475 | break; | ||
7476 | nread++; | 7491 | nread++; |
7477 | } | 7492 | } |
7478 | wc->reada_slot = slot; | 7493 | wc->reada_slot = slot; |
@@ -7631,7 +7646,6 @@ walk_down: | |||
7631 | level = root_level; | 7646 | level = root_level; |
7632 | while (level >= 0) { | 7647 | while (level >= 0) { |
7633 | if (path->nodes[level] == NULL) { | 7648 | if (path->nodes[level] == NULL) { |
7634 | int child_bsize = root->nodesize; | ||
7635 | int parent_slot; | 7649 | int parent_slot; |
7636 | u64 child_gen; | 7650 | u64 child_gen; |
7637 | u64 child_bytenr; | 7651 | u64 child_bytenr; |
@@ -7643,8 +7657,7 @@ walk_down: | |||
7643 | child_bytenr = btrfs_node_blockptr(eb, parent_slot); | 7657 | child_bytenr = btrfs_node_blockptr(eb, parent_slot); |
7644 | child_gen = btrfs_node_ptr_generation(eb, parent_slot); | 7658 | child_gen = btrfs_node_ptr_generation(eb, parent_slot); |
7645 | 7659 | ||
7646 | eb = read_tree_block(root, child_bytenr, child_bsize, | 7660 | eb = read_tree_block(root, child_bytenr, child_gen); |
7647 | child_gen); | ||
7648 | if (!eb || !extent_buffer_uptodate(eb)) { | 7661 | if (!eb || !extent_buffer_uptodate(eb)) { |
7649 | ret = -EIO; | 7662 | ret = -EIO; |
7650 | goto out; | 7663 | goto out; |
@@ -7660,7 +7673,7 @@ walk_down: | |||
7660 | ret = btrfs_qgroup_record_ref(trans, root->fs_info, | 7673 | ret = btrfs_qgroup_record_ref(trans, root->fs_info, |
7661 | root->objectid, | 7674 | root->objectid, |
7662 | child_bytenr, | 7675 | child_bytenr, |
7663 | child_bsize, | 7676 | root->nodesize, |
7664 | BTRFS_QGROUP_OPER_SUB_SUBTREE, | 7677 | BTRFS_QGROUP_OPER_SUB_SUBTREE, |
7665 | 0); | 7678 | 0); |
7666 | if (ret) | 7679 | if (ret) |
@@ -7811,9 +7824,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
7811 | } | 7824 | } |
7812 | 7825 | ||
7813 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | 7826 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); |
7814 | blocksize = btrfs_level_size(root, level - 1); | 7827 | blocksize = root->nodesize; |
7815 | 7828 | ||
7816 | next = btrfs_find_tree_block(root, bytenr, blocksize); | 7829 | next = btrfs_find_tree_block(root, bytenr); |
7817 | if (!next) { | 7830 | if (!next) { |
7818 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 7831 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
7819 | if (!next) | 7832 | if (!next) |
@@ -7875,7 +7888,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
7875 | if (!next) { | 7888 | if (!next) { |
7876 | if (reada && level == 1) | 7889 | if (reada && level == 1) |
7877 | reada_walk_down(trans, root, wc, path); | 7890 | reada_walk_down(trans, root, wc, path); |
7878 | next = read_tree_block(root, bytenr, blocksize, generation); | 7891 | next = read_tree_block(root, bytenr, generation); |
7879 | if (!next || !extent_buffer_uptodate(next)) { | 7892 | if (!next || !extent_buffer_uptodate(next)) { |
7880 | free_extent_buffer(next); | 7893 | free_extent_buffer(next); |
7881 | return -EIO; | 7894 | return -EIO; |
@@ -8440,13 +8453,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
8440 | if (stripped) | 8453 | if (stripped) |
8441 | return extended_to_chunk(stripped); | 8454 | return extended_to_chunk(stripped); |
8442 | 8455 | ||
8443 | /* | 8456 | num_devices = root->fs_info->fs_devices->rw_devices; |
8444 | * we add in the count of missing devices because we want | ||
8445 | * to make sure that any RAID levels on a degraded FS | ||
8446 | * continue to be honored. | ||
8447 | */ | ||
8448 | num_devices = root->fs_info->fs_devices->rw_devices + | ||
8449 | root->fs_info->fs_devices->missing_devices; | ||
8450 | 8457 | ||
8451 | stripped = BTRFS_BLOCK_GROUP_RAID0 | | 8458 | stripped = BTRFS_BLOCK_GROUP_RAID0 | |
8452 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | | 8459 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | |
@@ -8864,6 +8871,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
8864 | } | 8871 | } |
8865 | up_write(&info->commit_root_sem); | 8872 | up_write(&info->commit_root_sem); |
8866 | 8873 | ||
8874 | spin_lock(&info->unused_bgs_lock); | ||
8875 | while (!list_empty(&info->unused_bgs)) { | ||
8876 | block_group = list_first_entry(&info->unused_bgs, | ||
8877 | struct btrfs_block_group_cache, | ||
8878 | bg_list); | ||
8879 | list_del_init(&block_group->bg_list); | ||
8880 | btrfs_put_block_group(block_group); | ||
8881 | } | ||
8882 | spin_unlock(&info->unused_bgs_lock); | ||
8883 | |||
8867 | spin_lock(&info->block_group_cache_lock); | 8884 | spin_lock(&info->block_group_cache_lock); |
8868 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | 8885 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { |
8869 | block_group = rb_entry(n, struct btrfs_block_group_cache, | 8886 | block_group = rb_entry(n, struct btrfs_block_group_cache, |
@@ -8998,7 +9015,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) | |||
8998 | init_rwsem(&cache->data_rwsem); | 9015 | init_rwsem(&cache->data_rwsem); |
8999 | INIT_LIST_HEAD(&cache->list); | 9016 | INIT_LIST_HEAD(&cache->list); |
9000 | INIT_LIST_HEAD(&cache->cluster_list); | 9017 | INIT_LIST_HEAD(&cache->cluster_list); |
9001 | INIT_LIST_HEAD(&cache->new_bg_list); | 9018 | INIT_LIST_HEAD(&cache->bg_list); |
9002 | btrfs_init_free_space_ctl(cache); | 9019 | btrfs_init_free_space_ctl(cache); |
9003 | 9020 | ||
9004 | return cache; | 9021 | return cache; |
@@ -9020,7 +9037,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
9020 | root = info->extent_root; | 9037 | root = info->extent_root; |
9021 | key.objectid = 0; | 9038 | key.objectid = 0; |
9022 | key.offset = 0; | 9039 | key.offset = 0; |
9023 | btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); | 9040 | key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
9024 | path = btrfs_alloc_path(); | 9041 | path = btrfs_alloc_path(); |
9025 | if (!path) | 9042 | if (!path) |
9026 | return -ENOMEM; | 9043 | return -ENOMEM; |
@@ -9139,8 +9156,18 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
9139 | __link_block_group(space_info, cache); | 9156 | __link_block_group(space_info, cache); |
9140 | 9157 | ||
9141 | set_avail_alloc_bits(root->fs_info, cache->flags); | 9158 | set_avail_alloc_bits(root->fs_info, cache->flags); |
9142 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 9159 | if (btrfs_chunk_readonly(root, cache->key.objectid)) { |
9143 | set_block_group_ro(cache, 1); | 9160 | set_block_group_ro(cache, 1); |
9161 | } else if (btrfs_block_group_used(&cache->item) == 0) { | ||
9162 | spin_lock(&info->unused_bgs_lock); | ||
9163 | /* Should always be true but just in case. */ | ||
9164 | if (list_empty(&cache->bg_list)) { | ||
9165 | btrfs_get_block_group(cache); | ||
9166 | list_add_tail(&cache->bg_list, | ||
9167 | &info->unused_bgs); | ||
9168 | } | ||
9169 | spin_unlock(&info->unused_bgs_lock); | ||
9170 | } | ||
9144 | } | 9171 | } |
9145 | 9172 | ||
9146 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | 9173 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { |
@@ -9181,10 +9208,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | |||
9181 | struct btrfs_key key; | 9208 | struct btrfs_key key; |
9182 | int ret = 0; | 9209 | int ret = 0; |
9183 | 9210 | ||
9184 | list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, | 9211 | list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { |
9185 | new_bg_list) { | 9212 | list_del_init(&block_group->bg_list); |
9186 | list_del_init(&block_group->new_bg_list); | ||
9187 | |||
9188 | if (ret) | 9213 | if (ret) |
9189 | continue; | 9214 | continue; |
9190 | 9215 | ||
@@ -9270,7 +9295,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
9270 | 9295 | ||
9271 | __link_block_group(cache->space_info, cache); | 9296 | __link_block_group(cache->space_info, cache); |
9272 | 9297 | ||
9273 | list_add_tail(&cache->new_bg_list, &trans->new_bgs); | 9298 | list_add_tail(&cache->bg_list, &trans->new_bgs); |
9274 | 9299 | ||
9275 | set_avail_alloc_bits(extent_root->fs_info, type); | 9300 | set_avail_alloc_bits(extent_root->fs_info, type); |
9276 | 9301 | ||
@@ -9424,8 +9449,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9424 | 9449 | ||
9425 | memcpy(&key, &block_group->key, sizeof(key)); | 9450 | memcpy(&key, &block_group->key, sizeof(key)); |
9426 | 9451 | ||
9427 | btrfs_clear_space_info_full(root->fs_info); | ||
9428 | |||
9429 | btrfs_put_block_group(block_group); | 9452 | btrfs_put_block_group(block_group); |
9430 | btrfs_put_block_group(block_group); | 9453 | btrfs_put_block_group(block_group); |
9431 | 9454 | ||
@@ -9441,6 +9464,101 @@ out: | |||
9441 | return ret; | 9464 | return ret; |
9442 | } | 9465 | } |
9443 | 9466 | ||
9467 | /* | ||
9468 | * Process the unused_bgs list and remove any that don't have any allocated | ||
9469 | * space inside of them. | ||
9470 | */ | ||
9471 | void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | ||
9472 | { | ||
9473 | struct btrfs_block_group_cache *block_group; | ||
9474 | struct btrfs_space_info *space_info; | ||
9475 | struct btrfs_root *root = fs_info->extent_root; | ||
9476 | struct btrfs_trans_handle *trans; | ||
9477 | int ret = 0; | ||
9478 | |||
9479 | if (!fs_info->open) | ||
9480 | return; | ||
9481 | |||
9482 | spin_lock(&fs_info->unused_bgs_lock); | ||
9483 | while (!list_empty(&fs_info->unused_bgs)) { | ||
9484 | u64 start, end; | ||
9485 | |||
9486 | block_group = list_first_entry(&fs_info->unused_bgs, | ||
9487 | struct btrfs_block_group_cache, | ||
9488 | bg_list); | ||
9489 | space_info = block_group->space_info; | ||
9490 | list_del_init(&block_group->bg_list); | ||
9491 | if (ret || btrfs_mixed_space_info(space_info)) { | ||
9492 | btrfs_put_block_group(block_group); | ||
9493 | continue; | ||
9494 | } | ||
9495 | spin_unlock(&fs_info->unused_bgs_lock); | ||
9496 | |||
9497 | /* Don't want to race with allocators so take the groups_sem */ | ||
9498 | down_write(&space_info->groups_sem); | ||
9499 | spin_lock(&block_group->lock); | ||
9500 | if (block_group->reserved || | ||
9501 | btrfs_block_group_used(&block_group->item) || | ||
9502 | block_group->ro) { | ||
9503 | /* | ||
9504 | * We want to bail if we made new allocations or have | ||
9505 | * outstanding allocations in this block group. We do | ||
9506 | * the ro check in case balance is currently acting on | ||
9507 | * this block group. | ||
9508 | */ | ||
9509 | spin_unlock(&block_group->lock); | ||
9510 | up_write(&space_info->groups_sem); | ||
9511 | goto next; | ||
9512 | } | ||
9513 | spin_unlock(&block_group->lock); | ||
9514 | |||
9515 | /* We don't want to force the issue, only flip if it's ok. */ | ||
9516 | ret = set_block_group_ro(block_group, 0); | ||
9517 | up_write(&space_info->groups_sem); | ||
9518 | if (ret < 0) { | ||
9519 | ret = 0; | ||
9520 | goto next; | ||
9521 | } | ||
9522 | |||
9523 | /* | ||
9524 | * Want to do this before we do anything else so we can recover | ||
9525 | * properly if we fail to join the transaction. | ||
9526 | */ | ||
9527 | trans = btrfs_join_transaction(root); | ||
9528 | if (IS_ERR(trans)) { | ||
9529 | btrfs_set_block_group_rw(root, block_group); | ||
9530 | ret = PTR_ERR(trans); | ||
9531 | goto next; | ||
9532 | } | ||
9533 | |||
9534 | /* | ||
9535 | * We could have pending pinned extents for this block group, | ||
9536 | * just delete them, we don't care about them anymore. | ||
9537 | */ | ||
9538 | start = block_group->key.objectid; | ||
9539 | end = start + block_group->key.offset - 1; | ||
9540 | clear_extent_bits(&fs_info->freed_extents[0], start, end, | ||
9541 | EXTENT_DIRTY, GFP_NOFS); | ||
9542 | clear_extent_bits(&fs_info->freed_extents[1], start, end, | ||
9543 | EXTENT_DIRTY, GFP_NOFS); | ||
9544 | |||
9545 | /* Reset pinned so btrfs_put_block_group doesn't complain */ | ||
9546 | block_group->pinned = 0; | ||
9547 | |||
9548 | /* | ||
9549 | * Btrfs_remove_chunk will abort the transaction if things go | ||
9550 | * horribly wrong. | ||
9551 | */ | ||
9552 | ret = btrfs_remove_chunk(trans, root, | ||
9553 | block_group->key.objectid); | ||
9554 | btrfs_end_transaction(trans, root); | ||
9555 | next: | ||
9556 | btrfs_put_block_group(block_group); | ||
9557 | spin_lock(&fs_info->unused_bgs_lock); | ||
9558 | } | ||
9559 | spin_unlock(&fs_info->unused_bgs_lock); | ||
9560 | } | ||
9561 | |||
9444 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | 9562 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info) |
9445 | { | 9563 | { |
9446 | struct btrfs_space_info *space_info; | 9564 | struct btrfs_space_info *space_info; |
@@ -9572,7 +9690,7 @@ void btrfs_end_nocow_write(struct btrfs_root *root) | |||
9572 | 9690 | ||
9573 | int btrfs_start_nocow_write(struct btrfs_root *root) | 9691 | int btrfs_start_nocow_write(struct btrfs_root *root) |
9574 | { | 9692 | { |
9575 | if (unlikely(atomic_read(&root->will_be_snapshoted))) | 9693 | if (atomic_read(&root->will_be_snapshoted)) |
9576 | return 0; | 9694 | return 0; |
9577 | 9695 | ||
9578 | percpu_counter_inc(&root->subv_writers->counter); | 9696 | percpu_counter_inc(&root->subv_writers->counter); |
@@ -9580,7 +9698,7 @@ int btrfs_start_nocow_write(struct btrfs_root *root) | |||
9580 | * Make sure counter is updated before we check for snapshot creation. | 9698 | * Make sure counter is updated before we check for snapshot creation. |
9581 | */ | 9699 | */ |
9582 | smp_mb(); | 9700 | smp_mb(); |
9583 | if (unlikely(atomic_read(&root->will_be_snapshoted))) { | 9701 | if (atomic_read(&root->will_be_snapshoted)) { |
9584 | btrfs_end_nocow_write(root); | 9702 | btrfs_end_nocow_write(root); |
9585 | return 0; | 9703 | return 0; |
9586 | } | 9704 | } |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e11aab9f391..bf3f424e0013 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -25,6 +25,11 @@ static struct kmem_cache *extent_state_cache; | |||
25 | static struct kmem_cache *extent_buffer_cache; | 25 | static struct kmem_cache *extent_buffer_cache; |
26 | static struct bio_set *btrfs_bioset; | 26 | static struct bio_set *btrfs_bioset; |
27 | 27 | ||
28 | static inline bool extent_state_in_tree(const struct extent_state *state) | ||
29 | { | ||
30 | return !RB_EMPTY_NODE(&state->rb_node); | ||
31 | } | ||
32 | |||
28 | #ifdef CONFIG_BTRFS_DEBUG | 33 | #ifdef CONFIG_BTRFS_DEBUG |
29 | static LIST_HEAD(buffers); | 34 | static LIST_HEAD(buffers); |
30 | static LIST_HEAD(states); | 35 | static LIST_HEAD(states); |
@@ -59,9 +64,9 @@ void btrfs_leak_debug_check(void) | |||
59 | 64 | ||
60 | while (!list_empty(&states)) { | 65 | while (!list_empty(&states)) { |
61 | state = list_entry(states.next, struct extent_state, leak_list); | 66 | state = list_entry(states.next, struct extent_state, leak_list); |
62 | printk(KERN_ERR "BTRFS: state leak: start %llu end %llu " | 67 | pr_err("BTRFS: state leak: start %llu end %llu state %lu in tree %d refs %d\n", |
63 | "state %lu in tree %p refs %d\n", | 68 | state->start, state->end, state->state, |
64 | state->start, state->end, state->state, state->tree, | 69 | extent_state_in_tree(state), |
65 | atomic_read(&state->refs)); | 70 | atomic_read(&state->refs)); |
66 | list_del(&state->leak_list); | 71 | list_del(&state->leak_list); |
67 | kmem_cache_free(extent_state_cache, state); | 72 | kmem_cache_free(extent_state_cache, state); |
@@ -209,7 +214,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) | |||
209 | return state; | 214 | return state; |
210 | state->state = 0; | 215 | state->state = 0; |
211 | state->private = 0; | 216 | state->private = 0; |
212 | state->tree = NULL; | 217 | RB_CLEAR_NODE(&state->rb_node); |
213 | btrfs_leak_debug_add(&state->leak_list, &states); | 218 | btrfs_leak_debug_add(&state->leak_list, &states); |
214 | atomic_set(&state->refs, 1); | 219 | atomic_set(&state->refs, 1); |
215 | init_waitqueue_head(&state->wq); | 220 | init_waitqueue_head(&state->wq); |
@@ -222,7 +227,7 @@ void free_extent_state(struct extent_state *state) | |||
222 | if (!state) | 227 | if (!state) |
223 | return; | 228 | return; |
224 | if (atomic_dec_and_test(&state->refs)) { | 229 | if (atomic_dec_and_test(&state->refs)) { |
225 | WARN_ON(state->tree); | 230 | WARN_ON(extent_state_in_tree(state)); |
226 | btrfs_leak_debug_del(&state->leak_list); | 231 | btrfs_leak_debug_del(&state->leak_list); |
227 | trace_free_extent_state(state, _RET_IP_); | 232 | trace_free_extent_state(state, _RET_IP_); |
228 | kmem_cache_free(extent_state_cache, state); | 233 | kmem_cache_free(extent_state_cache, state); |
@@ -371,8 +376,8 @@ static void merge_state(struct extent_io_tree *tree, | |||
371 | other->state == state->state) { | 376 | other->state == state->state) { |
372 | merge_cb(tree, state, other); | 377 | merge_cb(tree, state, other); |
373 | state->start = other->start; | 378 | state->start = other->start; |
374 | other->tree = NULL; | ||
375 | rb_erase(&other->rb_node, &tree->state); | 379 | rb_erase(&other->rb_node, &tree->state); |
380 | RB_CLEAR_NODE(&other->rb_node); | ||
376 | free_extent_state(other); | 381 | free_extent_state(other); |
377 | } | 382 | } |
378 | } | 383 | } |
@@ -383,8 +388,8 @@ static void merge_state(struct extent_io_tree *tree, | |||
383 | other->state == state->state) { | 388 | other->state == state->state) { |
384 | merge_cb(tree, state, other); | 389 | merge_cb(tree, state, other); |
385 | state->end = other->end; | 390 | state->end = other->end; |
386 | other->tree = NULL; | ||
387 | rb_erase(&other->rb_node, &tree->state); | 391 | rb_erase(&other->rb_node, &tree->state); |
392 | RB_CLEAR_NODE(&other->rb_node); | ||
388 | free_extent_state(other); | 393 | free_extent_state(other); |
389 | } | 394 | } |
390 | } | 395 | } |
@@ -442,7 +447,6 @@ static int insert_state(struct extent_io_tree *tree, | |||
442 | found->start, found->end, start, end); | 447 | found->start, found->end, start, end); |
443 | return -EEXIST; | 448 | return -EEXIST; |
444 | } | 449 | } |
445 | state->tree = tree; | ||
446 | merge_state(tree, state); | 450 | merge_state(tree, state); |
447 | return 0; | 451 | return 0; |
448 | } | 452 | } |
@@ -486,7 +490,6 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
486 | free_extent_state(prealloc); | 490 | free_extent_state(prealloc); |
487 | return -EEXIST; | 491 | return -EEXIST; |
488 | } | 492 | } |
489 | prealloc->tree = tree; | ||
490 | return 0; | 493 | return 0; |
491 | } | 494 | } |
492 | 495 | ||
@@ -524,9 +527,9 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree, | |||
524 | wake_up(&state->wq); | 527 | wake_up(&state->wq); |
525 | if (state->state == 0) { | 528 | if (state->state == 0) { |
526 | next = next_state(state); | 529 | next = next_state(state); |
527 | if (state->tree) { | 530 | if (extent_state_in_tree(state)) { |
528 | rb_erase(&state->rb_node, &tree->state); | 531 | rb_erase(&state->rb_node, &tree->state); |
529 | state->tree = NULL; | 532 | RB_CLEAR_NODE(&state->rb_node); |
530 | free_extent_state(state); | 533 | free_extent_state(state); |
531 | } else { | 534 | } else { |
532 | WARN_ON(1); | 535 | WARN_ON(1); |
@@ -606,8 +609,8 @@ again: | |||
606 | cached_state = NULL; | 609 | cached_state = NULL; |
607 | } | 610 | } |
608 | 611 | ||
609 | if (cached && cached->tree && cached->start <= start && | 612 | if (cached && extent_state_in_tree(cached) && |
610 | cached->end > start) { | 613 | cached->start <= start && cached->end > start) { |
611 | if (clear) | 614 | if (clear) |
612 | atomic_dec(&cached->refs); | 615 | atomic_dec(&cached->refs); |
613 | state = cached; | 616 | state = cached; |
@@ -843,7 +846,7 @@ again: | |||
843 | if (cached_state && *cached_state) { | 846 | if (cached_state && *cached_state) { |
844 | state = *cached_state; | 847 | state = *cached_state; |
845 | if (state->start <= start && state->end > start && | 848 | if (state->start <= start && state->end > start && |
846 | state->tree) { | 849 | extent_state_in_tree(state)) { |
847 | node = &state->rb_node; | 850 | node = &state->rb_node; |
848 | goto hit_next; | 851 | goto hit_next; |
849 | } | 852 | } |
@@ -1069,7 +1072,7 @@ again: | |||
1069 | if (cached_state && *cached_state) { | 1072 | if (cached_state && *cached_state) { |
1070 | state = *cached_state; | 1073 | state = *cached_state; |
1071 | if (state->start <= start && state->end > start && | 1074 | if (state->start <= start && state->end > start && |
1072 | state->tree) { | 1075 | extent_state_in_tree(state)) { |
1073 | node = &state->rb_node; | 1076 | node = &state->rb_node; |
1074 | goto hit_next; | 1077 | goto hit_next; |
1075 | } | 1078 | } |
@@ -1459,7 +1462,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | |||
1459 | spin_lock(&tree->lock); | 1462 | spin_lock(&tree->lock); |
1460 | if (cached_state && *cached_state) { | 1463 | if (cached_state && *cached_state) { |
1461 | state = *cached_state; | 1464 | state = *cached_state; |
1462 | if (state->end == start - 1 && state->tree) { | 1465 | if (state->end == start - 1 && extent_state_in_tree(state)) { |
1463 | n = rb_next(&state->rb_node); | 1466 | n = rb_next(&state->rb_node); |
1464 | while (n) { | 1467 | while (n) { |
1465 | state = rb_entry(n, struct extent_state, | 1468 | state = rb_entry(n, struct extent_state, |
@@ -1905,7 +1908,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1905 | int bitset = 0; | 1908 | int bitset = 0; |
1906 | 1909 | ||
1907 | spin_lock(&tree->lock); | 1910 | spin_lock(&tree->lock); |
1908 | if (cached && cached->tree && cached->start <= start && | 1911 | if (cached && extent_state_in_tree(cached) && cached->start <= start && |
1909 | cached->end > start) | 1912 | cached->end > start) |
1910 | node = &cached->rb_node; | 1913 | node = &cached->rb_node; |
1911 | else | 1914 | else |
@@ -1959,27 +1962,7 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) | |||
1959 | SetPageUptodate(page); | 1962 | SetPageUptodate(page); |
1960 | } | 1963 | } |
1961 | 1964 | ||
1962 | /* | 1965 | int free_io_failure(struct inode *inode, struct io_failure_record *rec) |
1963 | * When IO fails, either with EIO or csum verification fails, we | ||
1964 | * try other mirrors that might have a good copy of the data. This | ||
1965 | * io_failure_record is used to record state as we go through all the | ||
1966 | * mirrors. If another mirror has good data, the page is set up to date | ||
1967 | * and things continue. If a good mirror can't be found, the original | ||
1968 | * bio end_io callback is called to indicate things have failed. | ||
1969 | */ | ||
1970 | struct io_failure_record { | ||
1971 | struct page *page; | ||
1972 | u64 start; | ||
1973 | u64 len; | ||
1974 | u64 logical; | ||
1975 | unsigned long bio_flags; | ||
1976 | int this_mirror; | ||
1977 | int failed_mirror; | ||
1978 | int in_validation; | ||
1979 | }; | ||
1980 | |||
1981 | static int free_io_failure(struct inode *inode, struct io_failure_record *rec, | ||
1982 | int did_repair) | ||
1983 | { | 1966 | { |
1984 | int ret; | 1967 | int ret; |
1985 | int err = 0; | 1968 | int err = 0; |
@@ -2012,10 +1995,10 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec, | |||
2012 | * currently, there can be no more than two copies of every data bit. thus, | 1995 | * currently, there can be no more than two copies of every data bit. thus, |
2013 | * exactly one rewrite is required. | 1996 | * exactly one rewrite is required. |
2014 | */ | 1997 | */ |
2015 | int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | 1998 | int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, |
2016 | u64 length, u64 logical, struct page *page, | 1999 | struct page *page, unsigned int pg_offset, int mirror_num) |
2017 | int mirror_num) | ||
2018 | { | 2000 | { |
2001 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; | ||
2019 | struct bio *bio; | 2002 | struct bio *bio; |
2020 | struct btrfs_device *dev; | 2003 | struct btrfs_device *dev; |
2021 | u64 map_length = 0; | 2004 | u64 map_length = 0; |
@@ -2053,7 +2036,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | |||
2053 | return -EIO; | 2036 | return -EIO; |
2054 | } | 2037 | } |
2055 | bio->bi_bdev = dev->bdev; | 2038 | bio->bi_bdev = dev->bdev; |
2056 | bio_add_page(bio, page, length, start - page_offset(page)); | 2039 | bio_add_page(bio, page, length, pg_offset); |
2057 | 2040 | ||
2058 | if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { | 2041 | if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { |
2059 | /* try to remap that extent elsewhere? */ | 2042 | /* try to remap that extent elsewhere? */ |
@@ -2063,10 +2046,9 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | |||
2063 | } | 2046 | } |
2064 | 2047 | ||
2065 | printk_ratelimited_in_rcu(KERN_INFO | 2048 | printk_ratelimited_in_rcu(KERN_INFO |
2066 | "BTRFS: read error corrected: ino %lu off %llu " | 2049 | "BTRFS: read error corrected: ino %llu off %llu (dev %s sector %llu)\n", |
2067 | "(dev %s sector %llu)\n", page->mapping->host->i_ino, | 2050 | btrfs_ino(inode), start, |
2068 | start, rcu_str_deref(dev->name), sector); | 2051 | rcu_str_deref(dev->name), sector); |
2069 | |||
2070 | bio_put(bio); | 2052 | bio_put(bio); |
2071 | return 0; | 2053 | return 0; |
2072 | } | 2054 | } |
@@ -2082,9 +2064,11 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | |||
2082 | return -EROFS; | 2064 | return -EROFS; |
2083 | 2065 | ||
2084 | for (i = 0; i < num_pages; i++) { | 2066 | for (i = 0; i < num_pages; i++) { |
2085 | struct page *p = extent_buffer_page(eb, i); | 2067 | struct page *p = eb->pages[i]; |
2086 | ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, | 2068 | |
2087 | start, p, mirror_num); | 2069 | ret = repair_io_failure(root->fs_info->btree_inode, start, |
2070 | PAGE_CACHE_SIZE, start, p, | ||
2071 | start - page_offset(p), mirror_num); | ||
2088 | if (ret) | 2072 | if (ret) |
2089 | break; | 2073 | break; |
2090 | start += PAGE_CACHE_SIZE; | 2074 | start += PAGE_CACHE_SIZE; |
@@ -2097,16 +2081,15 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | |||
2097 | * each time an IO finishes, we do a fast check in the IO failure tree | 2081 | * each time an IO finishes, we do a fast check in the IO failure tree |
2098 | * to see if we need to process or clean up an io_failure_record | 2082 | * to see if we need to process or clean up an io_failure_record |
2099 | */ | 2083 | */ |
2100 | static int clean_io_failure(u64 start, struct page *page) | 2084 | int clean_io_failure(struct inode *inode, u64 start, struct page *page, |
2085 | unsigned int pg_offset) | ||
2101 | { | 2086 | { |
2102 | u64 private; | 2087 | u64 private; |
2103 | u64 private_failure; | 2088 | u64 private_failure; |
2104 | struct io_failure_record *failrec; | 2089 | struct io_failure_record *failrec; |
2105 | struct inode *inode = page->mapping->host; | ||
2106 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; | 2090 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; |
2107 | struct extent_state *state; | 2091 | struct extent_state *state; |
2108 | int num_copies; | 2092 | int num_copies; |
2109 | int did_repair = 0; | ||
2110 | int ret; | 2093 | int ret; |
2111 | 2094 | ||
2112 | private = 0; | 2095 | private = 0; |
@@ -2127,7 +2110,6 @@ static int clean_io_failure(u64 start, struct page *page) | |||
2127 | /* there was no real error, just free the record */ | 2110 | /* there was no real error, just free the record */ |
2128 | pr_debug("clean_io_failure: freeing dummy error at %llu\n", | 2111 | pr_debug("clean_io_failure: freeing dummy error at %llu\n", |
2129 | failrec->start); | 2112 | failrec->start); |
2130 | did_repair = 1; | ||
2131 | goto out; | 2113 | goto out; |
2132 | } | 2114 | } |
2133 | if (fs_info->sb->s_flags & MS_RDONLY) | 2115 | if (fs_info->sb->s_flags & MS_RDONLY) |
@@ -2144,55 +2126,70 @@ static int clean_io_failure(u64 start, struct page *page) | |||
2144 | num_copies = btrfs_num_copies(fs_info, failrec->logical, | 2126 | num_copies = btrfs_num_copies(fs_info, failrec->logical, |
2145 | failrec->len); | 2127 | failrec->len); |
2146 | if (num_copies > 1) { | 2128 | if (num_copies > 1) { |
2147 | ret = repair_io_failure(fs_info, start, failrec->len, | 2129 | repair_io_failure(inode, start, failrec->len, |
2148 | failrec->logical, page, | 2130 | failrec->logical, page, |
2149 | failrec->failed_mirror); | 2131 | pg_offset, failrec->failed_mirror); |
2150 | did_repair = !ret; | ||
2151 | } | 2132 | } |
2152 | ret = 0; | ||
2153 | } | 2133 | } |
2154 | 2134 | ||
2155 | out: | 2135 | out: |
2156 | if (!ret) | 2136 | free_io_failure(inode, failrec); |
2157 | ret = free_io_failure(inode, failrec, did_repair); | ||
2158 | 2137 | ||
2159 | return ret; | 2138 | return 0; |
2160 | } | 2139 | } |
2161 | 2140 | ||
2162 | /* | 2141 | /* |
2163 | * this is a generic handler for readpage errors (default | 2142 | * Can be called when |
2164 | * readpage_io_failed_hook). if other copies exist, read those and write back | 2143 | * - hold extent lock |
2165 | * good data to the failed position. does not investigate in remapping the | 2144 | * - under ordered extent |
2166 | * failed extent elsewhere, hoping the device will be smart enough to do this as | 2145 | * - the inode is freeing |
2167 | * needed | ||
2168 | */ | 2146 | */ |
2147 | void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end) | ||
2148 | { | ||
2149 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
2150 | struct io_failure_record *failrec; | ||
2151 | struct extent_state *state, *next; | ||
2169 | 2152 | ||
2170 | static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | 2153 | if (RB_EMPTY_ROOT(&failure_tree->state)) |
2171 | struct page *page, u64 start, u64 end, | 2154 | return; |
2172 | int failed_mirror) | 2155 | |
2156 | spin_lock(&failure_tree->lock); | ||
2157 | state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY); | ||
2158 | while (state) { | ||
2159 | if (state->start > end) | ||
2160 | break; | ||
2161 | |||
2162 | ASSERT(state->end <= end); | ||
2163 | |||
2164 | next = next_state(state); | ||
2165 | |||
2166 | failrec = (struct io_failure_record *)state->private; | ||
2167 | free_extent_state(state); | ||
2168 | kfree(failrec); | ||
2169 | |||
2170 | state = next; | ||
2171 | } | ||
2172 | spin_unlock(&failure_tree->lock); | ||
2173 | } | ||
2174 | |||
2175 | int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, | ||
2176 | struct io_failure_record **failrec_ret) | ||
2173 | { | 2177 | { |
2174 | struct io_failure_record *failrec = NULL; | 2178 | struct io_failure_record *failrec; |
2175 | u64 private; | 2179 | u64 private; |
2176 | struct extent_map *em; | 2180 | struct extent_map *em; |
2177 | struct inode *inode = page->mapping->host; | ||
2178 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | 2181 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; |
2179 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | 2182 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
2180 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 2183 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
2181 | struct bio *bio; | ||
2182 | struct btrfs_io_bio *btrfs_failed_bio; | ||
2183 | struct btrfs_io_bio *btrfs_bio; | ||
2184 | int num_copies; | ||
2185 | int ret; | 2184 | int ret; |
2186 | int read_mode; | ||
2187 | u64 logical; | 2185 | u64 logical; |
2188 | 2186 | ||
2189 | BUG_ON(failed_bio->bi_rw & REQ_WRITE); | ||
2190 | |||
2191 | ret = get_state_private(failure_tree, start, &private); | 2187 | ret = get_state_private(failure_tree, start, &private); |
2192 | if (ret) { | 2188 | if (ret) { |
2193 | failrec = kzalloc(sizeof(*failrec), GFP_NOFS); | 2189 | failrec = kzalloc(sizeof(*failrec), GFP_NOFS); |
2194 | if (!failrec) | 2190 | if (!failrec) |
2195 | return -ENOMEM; | 2191 | return -ENOMEM; |
2192 | |||
2196 | failrec->start = start; | 2193 | failrec->start = start; |
2197 | failrec->len = end - start + 1; | 2194 | failrec->len = end - start + 1; |
2198 | failrec->this_mirror = 0; | 2195 | failrec->this_mirror = 0; |
@@ -2212,11 +2209,11 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2212 | em = NULL; | 2209 | em = NULL; |
2213 | } | 2210 | } |
2214 | read_unlock(&em_tree->lock); | 2211 | read_unlock(&em_tree->lock); |
2215 | |||
2216 | if (!em) { | 2212 | if (!em) { |
2217 | kfree(failrec); | 2213 | kfree(failrec); |
2218 | return -EIO; | 2214 | return -EIO; |
2219 | } | 2215 | } |
2216 | |||
2220 | logical = start - em->start; | 2217 | logical = start - em->start; |
2221 | logical = em->block_start + logical; | 2218 | logical = em->block_start + logical; |
2222 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 2219 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
@@ -2225,8 +2222,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2225 | extent_set_compress_type(&failrec->bio_flags, | 2222 | extent_set_compress_type(&failrec->bio_flags, |
2226 | em->compress_type); | 2223 | em->compress_type); |
2227 | } | 2224 | } |
2228 | pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, " | 2225 | |
2229 | "len=%llu\n", logical, start, failrec->len); | 2226 | pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n", |
2227 | logical, start, failrec->len); | ||
2228 | |||
2230 | failrec->logical = logical; | 2229 | failrec->logical = logical; |
2231 | free_extent_map(em); | 2230 | free_extent_map(em); |
2232 | 2231 | ||
@@ -2246,8 +2245,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2246 | } | 2245 | } |
2247 | } else { | 2246 | } else { |
2248 | failrec = (struct io_failure_record *)(unsigned long)private; | 2247 | failrec = (struct io_failure_record *)(unsigned long)private; |
2249 | pr_debug("bio_readpage_error: (found) logical=%llu, " | 2248 | pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n", |
2250 | "start=%llu, len=%llu, validation=%d\n", | ||
2251 | failrec->logical, failrec->start, failrec->len, | 2249 | failrec->logical, failrec->start, failrec->len, |
2252 | failrec->in_validation); | 2250 | failrec->in_validation); |
2253 | /* | 2251 | /* |
@@ -2256,6 +2254,17 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2256 | * clean_io_failure() clean all those errors at once. | 2254 | * clean_io_failure() clean all those errors at once. |
2257 | */ | 2255 | */ |
2258 | } | 2256 | } |
2257 | |||
2258 | *failrec_ret = failrec; | ||
2259 | |||
2260 | return 0; | ||
2261 | } | ||
2262 | |||
2263 | int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, | ||
2264 | struct io_failure_record *failrec, int failed_mirror) | ||
2265 | { | ||
2266 | int num_copies; | ||
2267 | |||
2259 | num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, | 2268 | num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, |
2260 | failrec->logical, failrec->len); | 2269 | failrec->logical, failrec->len); |
2261 | if (num_copies == 1) { | 2270 | if (num_copies == 1) { |
@@ -2264,10 +2273,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2264 | * all the retry and error correction code that follows. no | 2273 | * all the retry and error correction code that follows. no |
2265 | * matter what the error is, it is very likely to persist. | 2274 | * matter what the error is, it is very likely to persist. |
2266 | */ | 2275 | */ |
2267 | pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", | 2276 | pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", |
2268 | num_copies, failrec->this_mirror, failed_mirror); | 2277 | num_copies, failrec->this_mirror, failed_mirror); |
2269 | free_io_failure(inode, failrec, 0); | 2278 | return 0; |
2270 | return -EIO; | ||
2271 | } | 2279 | } |
2272 | 2280 | ||
2273 | /* | 2281 | /* |
@@ -2287,7 +2295,6 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2287 | BUG_ON(failrec->in_validation); | 2295 | BUG_ON(failrec->in_validation); |
2288 | failrec->in_validation = 1; | 2296 | failrec->in_validation = 1; |
2289 | failrec->this_mirror = failed_mirror; | 2297 | failrec->this_mirror = failed_mirror; |
2290 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; | ||
2291 | } else { | 2298 | } else { |
2292 | /* | 2299 | /* |
2293 | * we're ready to fulfill a) and b) alongside. get a good copy | 2300 | * we're ready to fulfill a) and b) alongside. get a good copy |
@@ -2303,25 +2310,36 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2303 | failrec->this_mirror++; | 2310 | failrec->this_mirror++; |
2304 | if (failrec->this_mirror == failed_mirror) | 2311 | if (failrec->this_mirror == failed_mirror) |
2305 | failrec->this_mirror++; | 2312 | failrec->this_mirror++; |
2306 | read_mode = READ_SYNC; | ||
2307 | } | 2313 | } |
2308 | 2314 | ||
2309 | if (failrec->this_mirror > num_copies) { | 2315 | if (failrec->this_mirror > num_copies) { |
2310 | pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", | 2316 | pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", |
2311 | num_copies, failrec->this_mirror, failed_mirror); | 2317 | num_copies, failrec->this_mirror, failed_mirror); |
2312 | free_io_failure(inode, failrec, 0); | 2318 | return 0; |
2313 | return -EIO; | ||
2314 | } | 2319 | } |
2315 | 2320 | ||
2321 | return 1; | ||
2322 | } | ||
2323 | |||
2324 | |||
2325 | struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, | ||
2326 | struct io_failure_record *failrec, | ||
2327 | struct page *page, int pg_offset, int icsum, | ||
2328 | bio_end_io_t *endio_func, void *data) | ||
2329 | { | ||
2330 | struct bio *bio; | ||
2331 | struct btrfs_io_bio *btrfs_failed_bio; | ||
2332 | struct btrfs_io_bio *btrfs_bio; | ||
2333 | |||
2316 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); | 2334 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); |
2317 | if (!bio) { | 2335 | if (!bio) |
2318 | free_io_failure(inode, failrec, 0); | 2336 | return NULL; |
2319 | return -EIO; | 2337 | |
2320 | } | 2338 | bio->bi_end_io = endio_func; |
2321 | bio->bi_end_io = failed_bio->bi_end_io; | ||
2322 | bio->bi_iter.bi_sector = failrec->logical >> 9; | 2339 | bio->bi_iter.bi_sector = failrec->logical >> 9; |
2323 | bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 2340 | bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
2324 | bio->bi_iter.bi_size = 0; | 2341 | bio->bi_iter.bi_size = 0; |
2342 | bio->bi_private = data; | ||
2325 | 2343 | ||
2326 | btrfs_failed_bio = btrfs_io_bio(failed_bio); | 2344 | btrfs_failed_bio = btrfs_io_bio(failed_bio); |
2327 | if (btrfs_failed_bio->csum) { | 2345 | if (btrfs_failed_bio->csum) { |
@@ -2330,21 +2348,73 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2330 | 2348 | ||
2331 | btrfs_bio = btrfs_io_bio(bio); | 2349 | btrfs_bio = btrfs_io_bio(bio); |
2332 | btrfs_bio->csum = btrfs_bio->csum_inline; | 2350 | btrfs_bio->csum = btrfs_bio->csum_inline; |
2333 | phy_offset >>= inode->i_sb->s_blocksize_bits; | 2351 | icsum *= csum_size; |
2334 | phy_offset *= csum_size; | 2352 | memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum, |
2335 | memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset, | ||
2336 | csum_size); | 2353 | csum_size); |
2337 | } | 2354 | } |
2338 | 2355 | ||
2339 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | 2356 | bio_add_page(bio, page, failrec->len, pg_offset); |
2357 | |||
2358 | return bio; | ||
2359 | } | ||
2360 | |||
2361 | /* | ||
2362 | * this is a generic handler for readpage errors (default | ||
2363 | * readpage_io_failed_hook). if other copies exist, read those and write back | ||
2364 | * good data to the failed position. does not investigate in remapping the | ||
2365 | * failed extent elsewhere, hoping the device will be smart enough to do this as | ||
2366 | * needed | ||
2367 | */ | ||
2368 | |||
2369 | static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | ||
2370 | struct page *page, u64 start, u64 end, | ||
2371 | int failed_mirror) | ||
2372 | { | ||
2373 | struct io_failure_record *failrec; | ||
2374 | struct inode *inode = page->mapping->host; | ||
2375 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | ||
2376 | struct bio *bio; | ||
2377 | int read_mode; | ||
2378 | int ret; | ||
2379 | |||
2380 | BUG_ON(failed_bio->bi_rw & REQ_WRITE); | ||
2381 | |||
2382 | ret = btrfs_get_io_failure_record(inode, start, end, &failrec); | ||
2383 | if (ret) | ||
2384 | return ret; | ||
2385 | |||
2386 | ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror); | ||
2387 | if (!ret) { | ||
2388 | free_io_failure(inode, failrec); | ||
2389 | return -EIO; | ||
2390 | } | ||
2391 | |||
2392 | if (failed_bio->bi_vcnt > 1) | ||
2393 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; | ||
2394 | else | ||
2395 | read_mode = READ_SYNC; | ||
2396 | |||
2397 | phy_offset >>= inode->i_sb->s_blocksize_bits; | ||
2398 | bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, | ||
2399 | start - page_offset(page), | ||
2400 | (int)phy_offset, failed_bio->bi_end_io, | ||
2401 | NULL); | ||
2402 | if (!bio) { | ||
2403 | free_io_failure(inode, failrec); | ||
2404 | return -EIO; | ||
2405 | } | ||
2340 | 2406 | ||
2341 | pr_debug("bio_readpage_error: submitting new read[%#x] to " | 2407 | pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n", |
2342 | "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, | 2408 | read_mode, failrec->this_mirror, failrec->in_validation); |
2343 | failrec->this_mirror, num_copies, failrec->in_validation); | ||
2344 | 2409 | ||
2345 | ret = tree->ops->submit_bio_hook(inode, read_mode, bio, | 2410 | ret = tree->ops->submit_bio_hook(inode, read_mode, bio, |
2346 | failrec->this_mirror, | 2411 | failrec->this_mirror, |
2347 | failrec->bio_flags, 0); | 2412 | failrec->bio_flags, 0); |
2413 | if (ret) { | ||
2414 | free_io_failure(inode, failrec); | ||
2415 | bio_put(bio); | ||
2416 | } | ||
2417 | |||
2348 | return ret; | 2418 | return ret; |
2349 | } | 2419 | } |
2350 | 2420 | ||
@@ -2469,7 +2539,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2469 | struct inode *inode = page->mapping->host; | 2539 | struct inode *inode = page->mapping->host; |
2470 | 2540 | ||
2471 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " | 2541 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " |
2472 | "mirror=%lu\n", (u64)bio->bi_iter.bi_sector, err, | 2542 | "mirror=%u\n", (u64)bio->bi_iter.bi_sector, err, |
2473 | io_bio->mirror_num); | 2543 | io_bio->mirror_num); |
2474 | tree = &BTRFS_I(inode)->io_tree; | 2544 | tree = &BTRFS_I(inode)->io_tree; |
2475 | 2545 | ||
@@ -2503,7 +2573,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2503 | if (ret) | 2573 | if (ret) |
2504 | uptodate = 0; | 2574 | uptodate = 0; |
2505 | else | 2575 | else |
2506 | clean_io_failure(start, page); | 2576 | clean_io_failure(inode, start, page, 0); |
2507 | } | 2577 | } |
2508 | 2578 | ||
2509 | if (likely(uptodate)) | 2579 | if (likely(uptodate)) |
@@ -2532,6 +2602,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2532 | test_bit(BIO_UPTODATE, &bio->bi_flags); | 2602 | test_bit(BIO_UPTODATE, &bio->bi_flags); |
2533 | if (err) | 2603 | if (err) |
2534 | uptodate = 0; | 2604 | uptodate = 0; |
2605 | offset += len; | ||
2535 | continue; | 2606 | continue; |
2536 | } | 2607 | } |
2537 | } | 2608 | } |
@@ -2539,12 +2610,12 @@ readpage_ok: | |||
2539 | if (likely(uptodate)) { | 2610 | if (likely(uptodate)) { |
2540 | loff_t i_size = i_size_read(inode); | 2611 | loff_t i_size = i_size_read(inode); |
2541 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | 2612 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; |
2542 | unsigned offset; | 2613 | unsigned off; |
2543 | 2614 | ||
2544 | /* Zero out the end if this page straddles i_size */ | 2615 | /* Zero out the end if this page straddles i_size */ |
2545 | offset = i_size & (PAGE_CACHE_SIZE-1); | 2616 | off = i_size & (PAGE_CACHE_SIZE-1); |
2546 | if (page->index == end_index && offset) | 2617 | if (page->index == end_index && off) |
2547 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); | 2618 | zero_user_segment(page, off, PAGE_CACHE_SIZE); |
2548 | SetPageUptodate(page); | 2619 | SetPageUptodate(page); |
2549 | } else { | 2620 | } else { |
2550 | ClearPageUptodate(page); | 2621 | ClearPageUptodate(page); |
@@ -2617,9 +2688,18 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | |||
2617 | 2688 | ||
2618 | struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) | 2689 | struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) |
2619 | { | 2690 | { |
2620 | return bio_clone_bioset(bio, gfp_mask, btrfs_bioset); | 2691 | struct btrfs_io_bio *btrfs_bio; |
2621 | } | 2692 | struct bio *new; |
2622 | 2693 | ||
2694 | new = bio_clone_bioset(bio, gfp_mask, btrfs_bioset); | ||
2695 | if (new) { | ||
2696 | btrfs_bio = btrfs_io_bio(new); | ||
2697 | btrfs_bio->csum = NULL; | ||
2698 | btrfs_bio->csum_allocated = NULL; | ||
2699 | btrfs_bio->end_io = NULL; | ||
2700 | } | ||
2701 | return new; | ||
2702 | } | ||
2623 | 2703 | ||
2624 | /* this also allocates from the btrfs_bioset */ | 2704 | /* this also allocates from the btrfs_bioset */ |
2625 | struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) | 2705 | struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) |
@@ -3500,7 +3580,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, | |||
3500 | 3580 | ||
3501 | num_pages = num_extent_pages(eb->start, eb->len); | 3581 | num_pages = num_extent_pages(eb->start, eb->len); |
3502 | for (i = 0; i < num_pages; i++) { | 3582 | for (i = 0; i < num_pages; i++) { |
3503 | struct page *p = extent_buffer_page(eb, i); | 3583 | struct page *p = eb->pages[i]; |
3504 | 3584 | ||
3505 | if (!trylock_page(p)) { | 3585 | if (!trylock_page(p)) { |
3506 | if (!flush) { | 3586 | if (!flush) { |
@@ -3521,6 +3601,68 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb) | |||
3521 | wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); | 3601 | wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); |
3522 | } | 3602 | } |
3523 | 3603 | ||
3604 | static void set_btree_ioerr(struct page *page) | ||
3605 | { | ||
3606 | struct extent_buffer *eb = (struct extent_buffer *)page->private; | ||
3607 | struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode); | ||
3608 | |||
3609 | SetPageError(page); | ||
3610 | if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) | ||
3611 | return; | ||
3612 | |||
3613 | /* | ||
3614 | * If writeback for a btree extent that doesn't belong to a log tree | ||
3615 | * failed, increment the counter transaction->eb_write_errors. | ||
3616 | * We do this because while the transaction is running and before it's | ||
3617 | * committing (when we call filemap_fdata[write|wait]_range against | ||
3618 | * the btree inode), we might have | ||
3619 | * btree_inode->i_mapping->a_ops->writepages() called by the VM - if it | ||
3620 | * returns an error or an error happens during writeback, when we're | ||
3621 | * committing the transaction we wouldn't know about it, since the pages | ||
3622 | * can be no longer dirty nor marked anymore for writeback (if a | ||
3623 | * subsequent modification to the extent buffer didn't happen before the | ||
3624 | * transaction commit), which makes filemap_fdata[write|wait]_range not | ||
3625 | * able to find the pages tagged with SetPageError at transaction | ||
3626 | * commit time. So if this happens we must abort the transaction, | ||
3627 | * otherwise we commit a super block with btree roots that point to | ||
3628 | * btree nodes/leafs whose content on disk is invalid - either garbage | ||
3629 | * or the content of some node/leaf from a past generation that got | ||
3630 | * cowed or deleted and is no longer valid. | ||
3631 | * | ||
3632 | * Note: setting AS_EIO/AS_ENOSPC in the btree inode's i_mapping would | ||
3633 | * not be enough - we need to distinguish between log tree extents vs | ||
3634 | * non-log tree extents, and the next filemap_fdatawait_range() call | ||
3635 | * will catch and clear such errors in the mapping - and that call might | ||
3636 | * be from a log sync and not from a transaction commit. Also, checking | ||
3637 | * for the eb flag EXTENT_BUFFER_WRITE_ERR at transaction commit time is | ||
3638 | * not done and would not be reliable - the eb might have been released | ||
3639 | * from memory and reading it back again means that flag would not be | ||
3640 | * set (since it's a runtime flag, not persisted on disk). | ||
3641 | * | ||
3642 | * Using the flags below in the btree inode also makes us achieve the | ||
3643 | * goal of AS_EIO/AS_ENOSPC when writepages() returns success, started | ||
3644 | * writeback for all dirty pages and before filemap_fdatawait_range() | ||
3645 | * is called, the writeback for all dirty pages had already finished | ||
3646 | * with errors - because we were not using AS_EIO/AS_ENOSPC, | ||
3647 | * filemap_fdatawait_range() would return success, as it could not know | ||
3648 | * that writeback errors happened (the pages were no longer tagged for | ||
3649 | * writeback). | ||
3650 | */ | ||
3651 | switch (eb->log_index) { | ||
3652 | case -1: | ||
3653 | set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags); | ||
3654 | break; | ||
3655 | case 0: | ||
3656 | set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); | ||
3657 | break; | ||
3658 | case 1: | ||
3659 | set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); | ||
3660 | break; | ||
3661 | default: | ||
3662 | BUG(); /* unexpected, logic error */ | ||
3663 | } | ||
3664 | } | ||
3665 | |||
3524 | static void end_bio_extent_buffer_writepage(struct bio *bio, int err) | 3666 | static void end_bio_extent_buffer_writepage(struct bio *bio, int err) |
3525 | { | 3667 | { |
3526 | struct bio_vec *bvec; | 3668 | struct bio_vec *bvec; |
@@ -3534,10 +3676,9 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err) | |||
3534 | BUG_ON(!eb); | 3676 | BUG_ON(!eb); |
3535 | done = atomic_dec_and_test(&eb->io_pages); | 3677 | done = atomic_dec_and_test(&eb->io_pages); |
3536 | 3678 | ||
3537 | if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { | 3679 | if (err || test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) { |
3538 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | ||
3539 | ClearPageUptodate(page); | 3680 | ClearPageUptodate(page); |
3540 | SetPageError(page); | 3681 | set_btree_ioerr(page); |
3541 | } | 3682 | } |
3542 | 3683 | ||
3543 | end_page_writeback(page); | 3684 | end_page_writeback(page); |
@@ -3564,14 +3705,14 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3564 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META; | 3705 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META; |
3565 | int ret = 0; | 3706 | int ret = 0; |
3566 | 3707 | ||
3567 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3708 | clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); |
3568 | num_pages = num_extent_pages(eb->start, eb->len); | 3709 | num_pages = num_extent_pages(eb->start, eb->len); |
3569 | atomic_set(&eb->io_pages, num_pages); | 3710 | atomic_set(&eb->io_pages, num_pages); |
3570 | if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) | 3711 | if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) |
3571 | bio_flags = EXTENT_BIO_TREE_LOG; | 3712 | bio_flags = EXTENT_BIO_TREE_LOG; |
3572 | 3713 | ||
3573 | for (i = 0; i < num_pages; i++) { | 3714 | for (i = 0; i < num_pages; i++) { |
3574 | struct page *p = extent_buffer_page(eb, i); | 3715 | struct page *p = eb->pages[i]; |
3575 | 3716 | ||
3576 | clear_page_dirty_for_io(p); | 3717 | clear_page_dirty_for_io(p); |
3577 | set_page_writeback(p); | 3718 | set_page_writeback(p); |
@@ -3581,8 +3722,8 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3581 | 0, epd->bio_flags, bio_flags); | 3722 | 0, epd->bio_flags, bio_flags); |
3582 | epd->bio_flags = bio_flags; | 3723 | epd->bio_flags = bio_flags; |
3583 | if (ret) { | 3724 | if (ret) { |
3584 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3725 | set_btree_ioerr(p); |
3585 | SetPageError(p); | 3726 | end_page_writeback(p); |
3586 | if (atomic_sub_and_test(num_pages - i, &eb->io_pages)) | 3727 | if (atomic_sub_and_test(num_pages - i, &eb->io_pages)) |
3587 | end_extent_buffer_writeback(eb); | 3728 | end_extent_buffer_writeback(eb); |
3588 | ret = -EIO; | 3729 | ret = -EIO; |
@@ -3595,7 +3736,8 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3595 | 3736 | ||
3596 | if (unlikely(ret)) { | 3737 | if (unlikely(ret)) { |
3597 | for (; i < num_pages; i++) { | 3738 | for (; i < num_pages; i++) { |
3598 | struct page *p = extent_buffer_page(eb, i); | 3739 | struct page *p = eb->pages[i]; |
3740 | clear_page_dirty_for_io(p); | ||
3599 | unlock_page(p); | 3741 | unlock_page(p); |
3600 | } | 3742 | } |
3601 | } | 3743 | } |
@@ -4165,19 +4307,6 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, | |||
4165 | return NULL; | 4307 | return NULL; |
4166 | } | 4308 | } |
4167 | 4309 | ||
4168 | static noinline int count_ext_ref(u64 inum, u64 offset, u64 root_id, void *ctx) | ||
4169 | { | ||
4170 | unsigned long cnt = *((unsigned long *)ctx); | ||
4171 | |||
4172 | cnt++; | ||
4173 | *((unsigned long *)ctx) = cnt; | ||
4174 | |||
4175 | /* Now we're sure that the extent is shared. */ | ||
4176 | if (cnt > 1) | ||
4177 | return 1; | ||
4178 | return 0; | ||
4179 | } | ||
4180 | |||
4181 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 4310 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
4182 | __u64 start, __u64 len, get_extent_t *get_extent) | 4311 | __u64 start, __u64 len, get_extent_t *get_extent) |
4183 | { | 4312 | { |
@@ -4194,6 +4323,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4194 | struct extent_map *em = NULL; | 4323 | struct extent_map *em = NULL; |
4195 | struct extent_state *cached_state = NULL; | 4324 | struct extent_state *cached_state = NULL; |
4196 | struct btrfs_path *path; | 4325 | struct btrfs_path *path; |
4326 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4197 | int end = 0; | 4327 | int end = 0; |
4198 | u64 em_start = 0; | 4328 | u64 em_start = 0; |
4199 | u64 em_len = 0; | 4329 | u64 em_len = 0; |
@@ -4207,15 +4337,15 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4207 | return -ENOMEM; | 4337 | return -ENOMEM; |
4208 | path->leave_spinning = 1; | 4338 | path->leave_spinning = 1; |
4209 | 4339 | ||
4210 | start = ALIGN(start, BTRFS_I(inode)->root->sectorsize); | 4340 | start = round_down(start, BTRFS_I(inode)->root->sectorsize); |
4211 | len = ALIGN(len, BTRFS_I(inode)->root->sectorsize); | 4341 | len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start; |
4212 | 4342 | ||
4213 | /* | 4343 | /* |
4214 | * lookup the last file extent. We're not using i_size here | 4344 | * lookup the last file extent. We're not using i_size here |
4215 | * because there might be preallocation past i_size | 4345 | * because there might be preallocation past i_size |
4216 | */ | 4346 | */ |
4217 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, | 4347 | ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1, |
4218 | path, btrfs_ino(inode), -1, 0); | 4348 | 0); |
4219 | if (ret < 0) { | 4349 | if (ret < 0) { |
4220 | btrfs_free_path(path); | 4350 | btrfs_free_path(path); |
4221 | return ret; | 4351 | return ret; |
@@ -4223,7 +4353,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4223 | WARN_ON(!ret); | 4353 | WARN_ON(!ret); |
4224 | path->slots[0]--; | 4354 | path->slots[0]--; |
4225 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); | 4355 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); |
4226 | found_type = btrfs_key_type(&found_key); | 4356 | found_type = found_key.type; |
4227 | 4357 | ||
4228 | /* No extents, but there might be delalloc bits */ | 4358 | /* No extents, but there might be delalloc bits */ |
4229 | if (found_key.objectid != btrfs_ino(inode) || | 4359 | if (found_key.objectid != btrfs_ino(inode) || |
@@ -4308,25 +4438,27 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4308 | } else if (em->block_start == EXTENT_MAP_DELALLOC) { | 4438 | } else if (em->block_start == EXTENT_MAP_DELALLOC) { |
4309 | flags |= (FIEMAP_EXTENT_DELALLOC | | 4439 | flags |= (FIEMAP_EXTENT_DELALLOC | |
4310 | FIEMAP_EXTENT_UNKNOWN); | 4440 | FIEMAP_EXTENT_UNKNOWN); |
4311 | } else { | 4441 | } else if (fieinfo->fi_extents_max) { |
4312 | unsigned long ref_cnt = 0; | 4442 | u64 bytenr = em->block_start - |
4443 | (em->start - em->orig_start); | ||
4313 | 4444 | ||
4314 | disko = em->block_start + offset_in_extent; | 4445 | disko = em->block_start + offset_in_extent; |
4315 | 4446 | ||
4316 | /* | 4447 | /* |
4317 | * As btrfs supports shared space, this information | 4448 | * As btrfs supports shared space, this information |
4318 | * can be exported to userspace tools via | 4449 | * can be exported to userspace tools via |
4319 | * flag FIEMAP_EXTENT_SHARED. | 4450 | * flag FIEMAP_EXTENT_SHARED. If fi_extents_max == 0 |
4451 | * then we're just getting a count and we can skip the | ||
4452 | * lookup stuff. | ||
4320 | */ | 4453 | */ |
4321 | ret = iterate_inodes_from_logical( | 4454 | ret = btrfs_check_shared(NULL, root->fs_info, |
4322 | em->block_start, | 4455 | root->objectid, |
4323 | BTRFS_I(inode)->root->fs_info, | 4456 | btrfs_ino(inode), bytenr); |
4324 | path, count_ext_ref, &ref_cnt); | 4457 | if (ret < 0) |
4325 | if (ret < 0 && ret != -ENOENT) | ||
4326 | goto out_free; | 4458 | goto out_free; |
4327 | 4459 | if (ret) | |
4328 | if (ref_cnt > 1) | ||
4329 | flags |= FIEMAP_EXTENT_SHARED; | 4460 | flags |= FIEMAP_EXTENT_SHARED; |
4461 | ret = 0; | ||
4330 | } | 4462 | } |
4331 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 4463 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
4332 | flags |= FIEMAP_EXTENT_ENCODED; | 4464 | flags |= FIEMAP_EXTENT_ENCODED; |
@@ -4380,24 +4512,21 @@ int extent_buffer_under_io(struct extent_buffer *eb) | |||
4380 | /* | 4512 | /* |
4381 | * Helper for releasing extent buffer page. | 4513 | * Helper for releasing extent buffer page. |
4382 | */ | 4514 | */ |
4383 | static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | 4515 | static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) |
4384 | unsigned long start_idx) | ||
4385 | { | 4516 | { |
4386 | unsigned long index; | 4517 | unsigned long index; |
4387 | unsigned long num_pages; | ||
4388 | struct page *page; | 4518 | struct page *page; |
4389 | int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); | 4519 | int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); |
4390 | 4520 | ||
4391 | BUG_ON(extent_buffer_under_io(eb)); | 4521 | BUG_ON(extent_buffer_under_io(eb)); |
4392 | 4522 | ||
4393 | num_pages = num_extent_pages(eb->start, eb->len); | 4523 | index = num_extent_pages(eb->start, eb->len); |
4394 | index = start_idx + num_pages; | 4524 | if (index == 0) |
4395 | if (start_idx >= index) | ||
4396 | return; | 4525 | return; |
4397 | 4526 | ||
4398 | do { | 4527 | do { |
4399 | index--; | 4528 | index--; |
4400 | page = extent_buffer_page(eb, index); | 4529 | page = eb->pages[index]; |
4401 | if (page && mapped) { | 4530 | if (page && mapped) { |
4402 | spin_lock(&page->mapping->private_lock); | 4531 | spin_lock(&page->mapping->private_lock); |
4403 | /* | 4532 | /* |
@@ -4428,7 +4557,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
4428 | /* One for when we alloced the page */ | 4557 | /* One for when we alloced the page */ |
4429 | page_cache_release(page); | 4558 | page_cache_release(page); |
4430 | } | 4559 | } |
4431 | } while (index != start_idx); | 4560 | } while (index != 0); |
4432 | } | 4561 | } |
4433 | 4562 | ||
4434 | /* | 4563 | /* |
@@ -4436,7 +4565,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
4436 | */ | 4565 | */ |
4437 | static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) | 4566 | static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) |
4438 | { | 4567 | { |
4439 | btrfs_release_extent_buffer_page(eb, 0); | 4568 | btrfs_release_extent_buffer_page(eb); |
4440 | __free_extent_buffer(eb); | 4569 | __free_extent_buffer(eb); |
4441 | } | 4570 | } |
4442 | 4571 | ||
@@ -4579,7 +4708,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb, | |||
4579 | 4708 | ||
4580 | num_pages = num_extent_pages(eb->start, eb->len); | 4709 | num_pages = num_extent_pages(eb->start, eb->len); |
4581 | for (i = 0; i < num_pages; i++) { | 4710 | for (i = 0; i < num_pages; i++) { |
4582 | struct page *p = extent_buffer_page(eb, i); | 4711 | struct page *p = eb->pages[i]; |
4712 | |||
4583 | if (p != accessed) | 4713 | if (p != accessed) |
4584 | mark_page_accessed(p); | 4714 | mark_page_accessed(p); |
4585 | } | 4715 | } |
@@ -4748,7 +4878,7 @@ again: | |||
4748 | */ | 4878 | */ |
4749 | SetPageChecked(eb->pages[0]); | 4879 | SetPageChecked(eb->pages[0]); |
4750 | for (i = 1; i < num_pages; i++) { | 4880 | for (i = 1; i < num_pages; i++) { |
4751 | p = extent_buffer_page(eb, i); | 4881 | p = eb->pages[i]; |
4752 | ClearPageChecked(p); | 4882 | ClearPageChecked(p); |
4753 | unlock_page(p); | 4883 | unlock_page(p); |
4754 | } | 4884 | } |
@@ -4793,7 +4923,7 @@ static int release_extent_buffer(struct extent_buffer *eb) | |||
4793 | } | 4923 | } |
4794 | 4924 | ||
4795 | /* Should be safe to release our pages at this point */ | 4925 | /* Should be safe to release our pages at this point */ |
4796 | btrfs_release_extent_buffer_page(eb, 0); | 4926 | btrfs_release_extent_buffer_page(eb); |
4797 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | 4927 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); |
4798 | return 1; | 4928 | return 1; |
4799 | } | 4929 | } |
@@ -4859,7 +4989,7 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb) | |||
4859 | num_pages = num_extent_pages(eb->start, eb->len); | 4989 | num_pages = num_extent_pages(eb->start, eb->len); |
4860 | 4990 | ||
4861 | for (i = 0; i < num_pages; i++) { | 4991 | for (i = 0; i < num_pages; i++) { |
4862 | page = extent_buffer_page(eb, i); | 4992 | page = eb->pages[i]; |
4863 | if (!PageDirty(page)) | 4993 | if (!PageDirty(page)) |
4864 | continue; | 4994 | continue; |
4865 | 4995 | ||
@@ -4895,7 +5025,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb) | |||
4895 | WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); | 5025 | WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); |
4896 | 5026 | ||
4897 | for (i = 0; i < num_pages; i++) | 5027 | for (i = 0; i < num_pages; i++) |
4898 | set_page_dirty(extent_buffer_page(eb, i)); | 5028 | set_page_dirty(eb->pages[i]); |
4899 | return was_dirty; | 5029 | return was_dirty; |
4900 | } | 5030 | } |
4901 | 5031 | ||
@@ -4908,7 +5038,7 @@ int clear_extent_buffer_uptodate(struct extent_buffer *eb) | |||
4908 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 5038 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
4909 | num_pages = num_extent_pages(eb->start, eb->len); | 5039 | num_pages = num_extent_pages(eb->start, eb->len); |
4910 | for (i = 0; i < num_pages; i++) { | 5040 | for (i = 0; i < num_pages; i++) { |
4911 | page = extent_buffer_page(eb, i); | 5041 | page = eb->pages[i]; |
4912 | if (page) | 5042 | if (page) |
4913 | ClearPageUptodate(page); | 5043 | ClearPageUptodate(page); |
4914 | } | 5044 | } |
@@ -4924,7 +5054,7 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb) | |||
4924 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 5054 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
4925 | num_pages = num_extent_pages(eb->start, eb->len); | 5055 | num_pages = num_extent_pages(eb->start, eb->len); |
4926 | for (i = 0; i < num_pages; i++) { | 5056 | for (i = 0; i < num_pages; i++) { |
4927 | page = extent_buffer_page(eb, i); | 5057 | page = eb->pages[i]; |
4928 | SetPageUptodate(page); | 5058 | SetPageUptodate(page); |
4929 | } | 5059 | } |
4930 | return 0; | 5060 | return 0; |
@@ -4964,7 +5094,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
4964 | 5094 | ||
4965 | num_pages = num_extent_pages(eb->start, eb->len); | 5095 | num_pages = num_extent_pages(eb->start, eb->len); |
4966 | for (i = start_i; i < num_pages; i++) { | 5096 | for (i = start_i; i < num_pages; i++) { |
4967 | page = extent_buffer_page(eb, i); | 5097 | page = eb->pages[i]; |
4968 | if (wait == WAIT_NONE) { | 5098 | if (wait == WAIT_NONE) { |
4969 | if (!trylock_page(page)) | 5099 | if (!trylock_page(page)) |
4970 | goto unlock_exit; | 5100 | goto unlock_exit; |
@@ -4983,11 +5113,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
4983 | goto unlock_exit; | 5113 | goto unlock_exit; |
4984 | } | 5114 | } |
4985 | 5115 | ||
4986 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 5116 | clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); |
4987 | eb->read_mirror = 0; | 5117 | eb->read_mirror = 0; |
4988 | atomic_set(&eb->io_pages, num_reads); | 5118 | atomic_set(&eb->io_pages, num_reads); |
4989 | for (i = start_i; i < num_pages; i++) { | 5119 | for (i = start_i; i < num_pages; i++) { |
4990 | page = extent_buffer_page(eb, i); | 5120 | page = eb->pages[i]; |
4991 | if (!PageUptodate(page)) { | 5121 | if (!PageUptodate(page)) { |
4992 | ClearPageError(page); | 5122 | ClearPageError(page); |
4993 | err = __extent_read_full_page(tree, page, | 5123 | err = __extent_read_full_page(tree, page, |
@@ -5012,7 +5142,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
5012 | return ret; | 5142 | return ret; |
5013 | 5143 | ||
5014 | for (i = start_i; i < num_pages; i++) { | 5144 | for (i = start_i; i < num_pages; i++) { |
5015 | page = extent_buffer_page(eb, i); | 5145 | page = eb->pages[i]; |
5016 | wait_on_page_locked(page); | 5146 | wait_on_page_locked(page); |
5017 | if (!PageUptodate(page)) | 5147 | if (!PageUptodate(page)) |
5018 | ret = -EIO; | 5148 | ret = -EIO; |
@@ -5023,7 +5153,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
5023 | unlock_exit: | 5153 | unlock_exit: |
5024 | i = start_i; | 5154 | i = start_i; |
5025 | while (locked_pages > 0) { | 5155 | while (locked_pages > 0) { |
5026 | page = extent_buffer_page(eb, i); | 5156 | page = eb->pages[i]; |
5027 | i++; | 5157 | i++; |
5028 | unlock_page(page); | 5158 | unlock_page(page); |
5029 | locked_pages--; | 5159 | locked_pages--; |
@@ -5049,7 +5179,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
5049 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | 5179 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); |
5050 | 5180 | ||
5051 | while (len > 0) { | 5181 | while (len > 0) { |
5052 | page = extent_buffer_page(eb, i); | 5182 | page = eb->pages[i]; |
5053 | 5183 | ||
5054 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 5184 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
5055 | kaddr = page_address(page); | 5185 | kaddr = page_address(page); |
@@ -5081,7 +5211,7 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, | |||
5081 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | 5211 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); |
5082 | 5212 | ||
5083 | while (len > 0) { | 5213 | while (len > 0) { |
5084 | page = extent_buffer_page(eb, i); | 5214 | page = eb->pages[i]; |
5085 | 5215 | ||
5086 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 5216 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
5087 | kaddr = page_address(page); | 5217 | kaddr = page_address(page); |
@@ -5130,7 +5260,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
5130 | return -EINVAL; | 5260 | return -EINVAL; |
5131 | } | 5261 | } |
5132 | 5262 | ||
5133 | p = extent_buffer_page(eb, i); | 5263 | p = eb->pages[i]; |
5134 | kaddr = page_address(p); | 5264 | kaddr = page_address(p); |
5135 | *map = kaddr + offset; | 5265 | *map = kaddr + offset; |
5136 | *map_len = PAGE_CACHE_SIZE - offset; | 5266 | *map_len = PAGE_CACHE_SIZE - offset; |
@@ -5156,7 +5286,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | |||
5156 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | 5286 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); |
5157 | 5287 | ||
5158 | while (len > 0) { | 5288 | while (len > 0) { |
5159 | page = extent_buffer_page(eb, i); | 5289 | page = eb->pages[i]; |
5160 | 5290 | ||
5161 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 5291 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
5162 | 5292 | ||
@@ -5190,7 +5320,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, | |||
5190 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | 5320 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); |
5191 | 5321 | ||
5192 | while (len > 0) { | 5322 | while (len > 0) { |
5193 | page = extent_buffer_page(eb, i); | 5323 | page = eb->pages[i]; |
5194 | WARN_ON(!PageUptodate(page)); | 5324 | WARN_ON(!PageUptodate(page)); |
5195 | 5325 | ||
5196 | cur = min(len, PAGE_CACHE_SIZE - offset); | 5326 | cur = min(len, PAGE_CACHE_SIZE - offset); |
@@ -5220,7 +5350,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, | |||
5220 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | 5350 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); |
5221 | 5351 | ||
5222 | while (len > 0) { | 5352 | while (len > 0) { |
5223 | page = extent_buffer_page(eb, i); | 5353 | page = eb->pages[i]; |
5224 | WARN_ON(!PageUptodate(page)); | 5354 | WARN_ON(!PageUptodate(page)); |
5225 | 5355 | ||
5226 | cur = min(len, PAGE_CACHE_SIZE - offset); | 5356 | cur = min(len, PAGE_CACHE_SIZE - offset); |
@@ -5251,7 +5381,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | |||
5251 | (PAGE_CACHE_SIZE - 1); | 5381 | (PAGE_CACHE_SIZE - 1); |
5252 | 5382 | ||
5253 | while (len > 0) { | 5383 | while (len > 0) { |
5254 | page = extent_buffer_page(dst, i); | 5384 | page = dst->pages[i]; |
5255 | WARN_ON(!PageUptodate(page)); | 5385 | WARN_ON(!PageUptodate(page)); |
5256 | 5386 | ||
5257 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); | 5387 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); |
@@ -5329,8 +5459,7 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
5329 | cur = min_t(unsigned long, cur, | 5459 | cur = min_t(unsigned long, cur, |
5330 | (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); | 5460 | (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); |
5331 | 5461 | ||
5332 | copy_pages(extent_buffer_page(dst, dst_i), | 5462 | copy_pages(dst->pages[dst_i], dst->pages[src_i], |
5333 | extent_buffer_page(dst, src_i), | ||
5334 | dst_off_in_page, src_off_in_page, cur); | 5463 | dst_off_in_page, src_off_in_page, cur); |
5335 | 5464 | ||
5336 | src_offset += cur; | 5465 | src_offset += cur; |
@@ -5376,8 +5505,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
5376 | 5505 | ||
5377 | cur = min_t(unsigned long, len, src_off_in_page + 1); | 5506 | cur = min_t(unsigned long, len, src_off_in_page + 1); |
5378 | cur = min(cur, dst_off_in_page + 1); | 5507 | cur = min(cur, dst_off_in_page + 1); |
5379 | copy_pages(extent_buffer_page(dst, dst_i), | 5508 | copy_pages(dst->pages[dst_i], dst->pages[src_i], |
5380 | extent_buffer_page(dst, src_i), | ||
5381 | dst_off_in_page - cur + 1, | 5509 | dst_off_in_page - cur + 1, |
5382 | src_off_in_page - cur + 1, cur); | 5510 | src_off_in_page - cur + 1, cur); |
5383 | 5511 | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index ccc264e7bde1..6d4b938be986 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -11,8 +11,6 @@ | |||
11 | #define EXTENT_NEW (1 << 4) | 11 | #define EXTENT_NEW (1 << 4) |
12 | #define EXTENT_DELALLOC (1 << 5) | 12 | #define EXTENT_DELALLOC (1 << 5) |
13 | #define EXTENT_DEFRAG (1 << 6) | 13 | #define EXTENT_DEFRAG (1 << 6) |
14 | #define EXTENT_DEFRAG_DONE (1 << 7) | ||
15 | #define EXTENT_BUFFER_FILLED (1 << 8) | ||
16 | #define EXTENT_BOUNDARY (1 << 9) | 14 | #define EXTENT_BOUNDARY (1 << 9) |
17 | #define EXTENT_NODATASUM (1 << 10) | 15 | #define EXTENT_NODATASUM (1 << 10) |
18 | #define EXTENT_DO_ACCOUNTING (1 << 11) | 16 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
@@ -34,16 +32,16 @@ | |||
34 | 32 | ||
35 | /* these are bit numbers for test/set bit */ | 33 | /* these are bit numbers for test/set bit */ |
36 | #define EXTENT_BUFFER_UPTODATE 0 | 34 | #define EXTENT_BUFFER_UPTODATE 0 |
37 | #define EXTENT_BUFFER_BLOCKING 1 | ||
38 | #define EXTENT_BUFFER_DIRTY 2 | 35 | #define EXTENT_BUFFER_DIRTY 2 |
39 | #define EXTENT_BUFFER_CORRUPT 3 | 36 | #define EXTENT_BUFFER_CORRUPT 3 |
40 | #define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ | 37 | #define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ |
41 | #define EXTENT_BUFFER_TREE_REF 5 | 38 | #define EXTENT_BUFFER_TREE_REF 5 |
42 | #define EXTENT_BUFFER_STALE 6 | 39 | #define EXTENT_BUFFER_STALE 6 |
43 | #define EXTENT_BUFFER_WRITEBACK 7 | 40 | #define EXTENT_BUFFER_WRITEBACK 7 |
44 | #define EXTENT_BUFFER_IOERR 8 | 41 | #define EXTENT_BUFFER_READ_ERR 8 /* read IO error */ |
45 | #define EXTENT_BUFFER_DUMMY 9 | 42 | #define EXTENT_BUFFER_DUMMY 9 |
46 | #define EXTENT_BUFFER_IN_TREE 10 | 43 | #define EXTENT_BUFFER_IN_TREE 10 |
44 | #define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */ | ||
47 | 45 | ||
48 | /* these are flags for extent_clear_unlock_delalloc */ | 46 | /* these are flags for extent_clear_unlock_delalloc */ |
49 | #define PAGE_UNLOCK (1 << 0) | 47 | #define PAGE_UNLOCK (1 << 0) |
@@ -57,7 +55,6 @@ | |||
57 | * map has page->private set to one. | 55 | * map has page->private set to one. |
58 | */ | 56 | */ |
59 | #define EXTENT_PAGE_PRIVATE 1 | 57 | #define EXTENT_PAGE_PRIVATE 1 |
60 | #define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 | ||
61 | 58 | ||
62 | struct extent_state; | 59 | struct extent_state; |
63 | struct btrfs_root; | 60 | struct btrfs_root; |
@@ -108,7 +105,6 @@ struct extent_state { | |||
108 | struct rb_node rb_node; | 105 | struct rb_node rb_node; |
109 | 106 | ||
110 | /* ADD NEW ELEMENTS AFTER THIS */ | 107 | /* ADD NEW ELEMENTS AFTER THIS */ |
111 | struct extent_io_tree *tree; | ||
112 | wait_queue_head_t wq; | 108 | wait_queue_head_t wq; |
113 | atomic_t refs; | 109 | atomic_t refs; |
114 | unsigned long state; | 110 | unsigned long state; |
@@ -126,8 +122,6 @@ struct extent_state { | |||
126 | struct extent_buffer { | 122 | struct extent_buffer { |
127 | u64 start; | 123 | u64 start; |
128 | unsigned long len; | 124 | unsigned long len; |
129 | unsigned long map_start; | ||
130 | unsigned long map_len; | ||
131 | unsigned long bflags; | 125 | unsigned long bflags; |
132 | struct btrfs_fs_info *fs_info; | 126 | struct btrfs_fs_info *fs_info; |
133 | spinlock_t refs_lock; | 127 | spinlock_t refs_lock; |
@@ -144,7 +138,9 @@ struct extent_buffer { | |||
144 | atomic_t blocking_readers; | 138 | atomic_t blocking_readers; |
145 | atomic_t spinning_readers; | 139 | atomic_t spinning_readers; |
146 | atomic_t spinning_writers; | 140 | atomic_t spinning_writers; |
147 | int lock_nested; | 141 | short lock_nested; |
142 | /* >= 0 if eb belongs to a log tree, -1 otherwise */ | ||
143 | short log_index; | ||
148 | 144 | ||
149 | /* protects write locks */ | 145 | /* protects write locks */ |
150 | rwlock_t lock; | 146 | rwlock_t lock; |
@@ -286,12 +282,6 @@ static inline unsigned long num_extent_pages(u64 start, u64 len) | |||
286 | (start >> PAGE_CACHE_SHIFT); | 282 | (start >> PAGE_CACHE_SHIFT); |
287 | } | 283 | } |
288 | 284 | ||
289 | static inline struct page *extent_buffer_page(struct extent_buffer *eb, | ||
290 | unsigned long i) | ||
291 | { | ||
292 | return eb->pages[i]; | ||
293 | } | ||
294 | |||
295 | static inline void extent_buffer_get(struct extent_buffer *eb) | 285 | static inline void extent_buffer_get(struct extent_buffer *eb) |
296 | { | 286 | { |
297 | atomic_inc(&eb->refs); | 287 | atomic_inc(&eb->refs); |
@@ -341,18 +331,50 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask); | |||
341 | 331 | ||
342 | struct btrfs_fs_info; | 332 | struct btrfs_fs_info; |
343 | 333 | ||
344 | int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | 334 | int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, |
345 | u64 length, u64 logical, struct page *page, | 335 | struct page *page, unsigned int pg_offset, |
346 | int mirror_num); | 336 | int mirror_num); |
337 | int clean_io_failure(struct inode *inode, u64 start, struct page *page, | ||
338 | unsigned int pg_offset); | ||
347 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); | 339 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); |
348 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | 340 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, |
349 | int mirror_num); | 341 | int mirror_num); |
342 | |||
343 | /* | ||
344 | * When IO fails, either with EIO or csum verification fails, we | ||
345 | * try other mirrors that might have a good copy of the data. This | ||
346 | * io_failure_record is used to record state as we go through all the | ||
347 | * mirrors. If another mirror has good data, the page is set up to date | ||
348 | * and things continue. If a good mirror can't be found, the original | ||
349 | * bio end_io callback is called to indicate things have failed. | ||
350 | */ | ||
351 | struct io_failure_record { | ||
352 | struct page *page; | ||
353 | u64 start; | ||
354 | u64 len; | ||
355 | u64 logical; | ||
356 | unsigned long bio_flags; | ||
357 | int this_mirror; | ||
358 | int failed_mirror; | ||
359 | int in_validation; | ||
360 | }; | ||
361 | |||
362 | void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end); | ||
363 | int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, | ||
364 | struct io_failure_record **failrec_ret); | ||
365 | int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, | ||
366 | struct io_failure_record *failrec, int fail_mirror); | ||
367 | struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, | ||
368 | struct io_failure_record *failrec, | ||
369 | struct page *page, int pg_offset, int icsum, | ||
370 | bio_end_io_t *endio_func, void *data); | ||
371 | int free_io_failure(struct inode *inode, struct io_failure_record *rec); | ||
350 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 372 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
351 | noinline u64 find_lock_delalloc_range(struct inode *inode, | 373 | noinline u64 find_lock_delalloc_range(struct inode *inode, |
352 | struct extent_io_tree *tree, | 374 | struct extent_io_tree *tree, |
353 | struct page *locked_page, u64 *start, | 375 | struct page *locked_page, u64 *start, |
354 | u64 *end, u64 max_bytes); | 376 | u64 *end, u64 max_bytes); |
377 | #endif | ||
355 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | 378 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, |
356 | u64 start, unsigned long len); | 379 | u64 start, unsigned long len); |
357 | #endif | 380 | #endif |
358 | #endif | ||
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 54c84daec9b5..783a94355efd 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -55,7 +55,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
55 | return -ENOMEM; | 55 | return -ENOMEM; |
56 | file_key.objectid = objectid; | 56 | file_key.objectid = objectid; |
57 | file_key.offset = pos; | 57 | file_key.offset = pos; |
58 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | 58 | file_key.type = BTRFS_EXTENT_DATA_KEY; |
59 | 59 | ||
60 | path->leave_spinning = 1; | 60 | path->leave_spinning = 1; |
61 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, | 61 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, |
@@ -100,7 +100,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans, | |||
100 | 100 | ||
101 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 101 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
102 | file_key.offset = bytenr; | 102 | file_key.offset = bytenr; |
103 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); | 103 | file_key.type = BTRFS_EXTENT_CSUM_KEY; |
104 | ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); | 104 | ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); |
105 | if (ret < 0) | 105 | if (ret < 0) |
106 | goto fail; | 106 | goto fail; |
@@ -111,7 +111,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans, | |||
111 | goto fail; | 111 | goto fail; |
112 | path->slots[0]--; | 112 | path->slots[0]--; |
113 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 113 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
114 | if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY) | 114 | if (found_key.type != BTRFS_EXTENT_CSUM_KEY) |
115 | goto fail; | 115 | goto fail; |
116 | 116 | ||
117 | csum_offset = (bytenr - found_key.offset) >> | 117 | csum_offset = (bytenr - found_key.offset) >> |
@@ -148,7 +148,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
148 | 148 | ||
149 | file_key.objectid = objectid; | 149 | file_key.objectid = objectid; |
150 | file_key.offset = offset; | 150 | file_key.offset = offset; |
151 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | 151 | file_key.type = BTRFS_EXTENT_DATA_KEY; |
152 | ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); | 152 | ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); |
153 | return ret; | 153 | return ret; |
154 | } | 154 | } |
@@ -299,19 +299,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | |||
299 | } | 299 | } |
300 | 300 | ||
301 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | 301 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, |
302 | struct btrfs_dio_private *dip, struct bio *bio, | 302 | struct bio *bio, u64 offset) |
303 | u64 offset) | ||
304 | { | 303 | { |
305 | int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr; | 304 | return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); |
306 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | ||
307 | int ret; | ||
308 | |||
309 | len >>= inode->i_sb->s_blocksize_bits; | ||
310 | len *= csum_size; | ||
311 | |||
312 | ret = __btrfs_lookup_bio_sums(root, inode, bio, offset, | ||
313 | (u32 *)(dip->csum + len), 1); | ||
314 | return ret; | ||
315 | } | 305 | } |
316 | 306 | ||
317 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 307 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
@@ -329,8 +319,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
329 | u64 csum_end; | 319 | u64 csum_end; |
330 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | 320 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
331 | 321 | ||
332 | ASSERT(start == ALIGN(start, root->sectorsize) && | 322 | ASSERT(IS_ALIGNED(start, root->sectorsize) && |
333 | (end + 1) == ALIGN(end + 1, root->sectorsize)); | 323 | IS_ALIGNED(end + 1, root->sectorsize)); |
334 | 324 | ||
335 | path = btrfs_alloc_path(); | 325 | path = btrfs_alloc_path(); |
336 | if (!path) | 326 | if (!path) |
@@ -720,7 +710,7 @@ again: | |||
720 | bytenr = sums->bytenr + total_bytes; | 710 | bytenr = sums->bytenr + total_bytes; |
721 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 711 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
722 | file_key.offset = bytenr; | 712 | file_key.offset = bytenr; |
723 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); | 713 | file_key.type = BTRFS_EXTENT_CSUM_KEY; |
724 | 714 | ||
725 | item = btrfs_lookup_csum(trans, root, path, bytenr, 1); | 715 | item = btrfs_lookup_csum(trans, root, path, bytenr, 1); |
726 | if (!IS_ERR(item)) { | 716 | if (!IS_ERR(item)) { |
@@ -790,7 +780,7 @@ again: | |||
790 | csum_offset = (bytenr - found_key.offset) >> | 780 | csum_offset = (bytenr - found_key.offset) >> |
791 | root->fs_info->sb->s_blocksize_bits; | 781 | root->fs_info->sb->s_blocksize_bits; |
792 | 782 | ||
793 | if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY || | 783 | if (found_key.type != BTRFS_EXTENT_CSUM_KEY || |
794 | found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || | 784 | found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
795 | csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) { | 785 | csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) { |
796 | goto insert; | 786 | goto insert; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d3afac292d67..a18ceabd99a8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -299,7 +299,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | |||
299 | 299 | ||
300 | /* get the inode */ | 300 | /* get the inode */ |
301 | key.objectid = defrag->root; | 301 | key.objectid = defrag->root; |
302 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 302 | key.type = BTRFS_ROOT_ITEM_KEY; |
303 | key.offset = (u64)-1; | 303 | key.offset = (u64)-1; |
304 | 304 | ||
305 | index = srcu_read_lock(&fs_info->subvol_srcu); | 305 | index = srcu_read_lock(&fs_info->subvol_srcu); |
@@ -311,7 +311,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | |||
311 | } | 311 | } |
312 | 312 | ||
313 | key.objectid = defrag->ino; | 313 | key.objectid = defrag->ino; |
314 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 314 | key.type = BTRFS_INODE_ITEM_KEY; |
315 | key.offset = 0; | 315 | key.offset = 0; |
316 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | 316 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); |
317 | if (IS_ERR(inode)) { | 317 | if (IS_ERR(inode)) { |
@@ -452,7 +452,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
452 | if (unlikely(copied == 0)) | 452 | if (unlikely(copied == 0)) |
453 | break; | 453 | break; |
454 | 454 | ||
455 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | 455 | if (copied < PAGE_CACHE_SIZE - offset) { |
456 | offset += copied; | 456 | offset += copied; |
457 | } else { | 457 | } else { |
458 | pg++; | 458 | pg++; |
@@ -1481,9 +1481,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1481 | bool force_page_uptodate = false; | 1481 | bool force_page_uptodate = false; |
1482 | bool need_unlock; | 1482 | bool need_unlock; |
1483 | 1483 | ||
1484 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / | 1484 | nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_CACHE_SIZE), |
1485 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 1485 | PAGE_CACHE_SIZE / (sizeof(struct page *))); |
1486 | (sizeof(struct page *))); | ||
1487 | nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); | 1486 | nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); |
1488 | nrptrs = max(nrptrs, 8); | 1487 | nrptrs = max(nrptrs, 8); |
1489 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 1488 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
@@ -1497,8 +1496,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1497 | size_t write_bytes = min(iov_iter_count(i), | 1496 | size_t write_bytes = min(iov_iter_count(i), |
1498 | nrptrs * (size_t)PAGE_CACHE_SIZE - | 1497 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
1499 | offset); | 1498 | offset); |
1500 | size_t num_pages = (write_bytes + offset + | 1499 | size_t num_pages = DIV_ROUND_UP(write_bytes + offset, |
1501 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1500 | PAGE_CACHE_SIZE); |
1502 | size_t reserve_bytes; | 1501 | size_t reserve_bytes; |
1503 | size_t dirty_pages; | 1502 | size_t dirty_pages; |
1504 | size_t copied; | 1503 | size_t copied; |
@@ -1526,9 +1525,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1526 | * our prealloc extent may be smaller than | 1525 | * our prealloc extent may be smaller than |
1527 | * write_bytes, so scale down. | 1526 | * write_bytes, so scale down. |
1528 | */ | 1527 | */ |
1529 | num_pages = (write_bytes + offset + | 1528 | num_pages = DIV_ROUND_UP(write_bytes + offset, |
1530 | PAGE_CACHE_SIZE - 1) >> | 1529 | PAGE_CACHE_SIZE); |
1531 | PAGE_CACHE_SHIFT; | ||
1532 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; | 1530 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; |
1533 | ret = 0; | 1531 | ret = 0; |
1534 | } else { | 1532 | } else { |
@@ -1590,9 +1588,8 @@ again: | |||
1590 | dirty_pages = 0; | 1588 | dirty_pages = 0; |
1591 | } else { | 1589 | } else { |
1592 | force_page_uptodate = false; | 1590 | force_page_uptodate = false; |
1593 | dirty_pages = (copied + offset + | 1591 | dirty_pages = DIV_ROUND_UP(copied + offset, |
1594 | PAGE_CACHE_SIZE - 1) >> | 1592 | PAGE_CACHE_SIZE); |
1595 | PAGE_CACHE_SHIFT; | ||
1596 | } | 1593 | } |
1597 | 1594 | ||
1598 | /* | 1595 | /* |
@@ -1653,7 +1650,7 @@ again: | |||
1653 | cond_resched(); | 1650 | cond_resched(); |
1654 | 1651 | ||
1655 | balance_dirty_pages_ratelimited(inode->i_mapping); | 1652 | balance_dirty_pages_ratelimited(inode->i_mapping); |
1656 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1653 | if (dirty_pages < (root->nodesize >> PAGE_CACHE_SHIFT) + 1) |
1657 | btrfs_btree_balance_dirty(root); | 1654 | btrfs_btree_balance_dirty(root); |
1658 | 1655 | ||
1659 | pos += copied; | 1656 | pos += copied; |
@@ -1795,7 +1792,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, | |||
1795 | if (sync) | 1792 | if (sync) |
1796 | atomic_inc(&BTRFS_I(inode)->sync_writers); | 1793 | atomic_inc(&BTRFS_I(inode)->sync_writers); |
1797 | 1794 | ||
1798 | if (unlikely(file->f_flags & O_DIRECT)) { | 1795 | if (file->f_flags & O_DIRECT) { |
1799 | num_written = __btrfs_direct_write(iocb, from, pos); | 1796 | num_written = __btrfs_direct_write(iocb, from, pos); |
1800 | } else { | 1797 | } else { |
1801 | num_written = __btrfs_buffered_write(file, from, pos); | 1798 | num_written = __btrfs_buffered_write(file, from, pos); |
@@ -1840,10 +1837,32 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
1840 | { | 1837 | { |
1841 | if (filp->private_data) | 1838 | if (filp->private_data) |
1842 | btrfs_ioctl_trans_end(filp); | 1839 | btrfs_ioctl_trans_end(filp); |
1843 | filemap_flush(inode->i_mapping); | 1840 | /* |
1841 | * ordered_data_close is set by settattr when we are about to truncate | ||
1842 | * a file from a non-zero size to a zero size. This tries to | ||
1843 | * flush down new bytes that may have been written if the | ||
1844 | * application were using truncate to replace a file in place. | ||
1845 | */ | ||
1846 | if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, | ||
1847 | &BTRFS_I(inode)->runtime_flags)) | ||
1848 | filemap_flush(inode->i_mapping); | ||
1844 | return 0; | 1849 | return 0; |
1845 | } | 1850 | } |
1846 | 1851 | ||
1852 | static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) | ||
1853 | { | ||
1854 | int ret; | ||
1855 | |||
1856 | atomic_inc(&BTRFS_I(inode)->sync_writers); | ||
1857 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
1858 | if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
1859 | &BTRFS_I(inode)->runtime_flags)) | ||
1860 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
1861 | atomic_dec(&BTRFS_I(inode)->sync_writers); | ||
1862 | |||
1863 | return ret; | ||
1864 | } | ||
1865 | |||
1847 | /* | 1866 | /* |
1848 | * fsync call for both files and directories. This logs the inode into | 1867 | * fsync call for both files and directories. This logs the inode into |
1849 | * the tree log instead of forcing full commits whenever possible. | 1868 | * the tree log instead of forcing full commits whenever possible. |
@@ -1873,30 +1892,64 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1873 | * multi-task, and make the performance up. See | 1892 | * multi-task, and make the performance up. See |
1874 | * btrfs_wait_ordered_range for an explanation of the ASYNC check. | 1893 | * btrfs_wait_ordered_range for an explanation of the ASYNC check. |
1875 | */ | 1894 | */ |
1876 | atomic_inc(&BTRFS_I(inode)->sync_writers); | 1895 | ret = start_ordered_ops(inode, start, end); |
1877 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
1878 | if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
1879 | &BTRFS_I(inode)->runtime_flags)) | ||
1880 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
1881 | atomic_dec(&BTRFS_I(inode)->sync_writers); | ||
1882 | if (ret) | 1896 | if (ret) |
1883 | return ret; | 1897 | return ret; |
1884 | 1898 | ||
1885 | mutex_lock(&inode->i_mutex); | 1899 | mutex_lock(&inode->i_mutex); |
1886 | |||
1887 | /* | ||
1888 | * We flush the dirty pages again to avoid some dirty pages in the | ||
1889 | * range being left. | ||
1890 | */ | ||
1891 | atomic_inc(&root->log_batch); | 1900 | atomic_inc(&root->log_batch); |
1892 | full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 1901 | full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
1893 | &BTRFS_I(inode)->runtime_flags); | 1902 | &BTRFS_I(inode)->runtime_flags); |
1903 | /* | ||
1904 | * We might have have had more pages made dirty after calling | ||
1905 | * start_ordered_ops and before acquiring the inode's i_mutex. | ||
1906 | */ | ||
1894 | if (full_sync) { | 1907 | if (full_sync) { |
1908 | /* | ||
1909 | * For a full sync, we need to make sure any ordered operations | ||
1910 | * start and finish before we start logging the inode, so that | ||
1911 | * all extents are persisted and the respective file extent | ||
1912 | * items are in the fs/subvol btree. | ||
1913 | */ | ||
1895 | ret = btrfs_wait_ordered_range(inode, start, end - start + 1); | 1914 | ret = btrfs_wait_ordered_range(inode, start, end - start + 1); |
1896 | if (ret) { | 1915 | } else { |
1897 | mutex_unlock(&inode->i_mutex); | 1916 | /* |
1898 | goto out; | 1917 | * Start any new ordered operations before starting to log the |
1899 | } | 1918 | * inode. We will wait for them to finish in btrfs_sync_log(). |
1919 | * | ||
1920 | * Right before acquiring the inode's mutex, we might have new | ||
1921 | * writes dirtying pages, which won't immediately start the | ||
1922 | * respective ordered operations - that is done through the | ||
1923 | * fill_delalloc callbacks invoked from the writepage and | ||
1924 | * writepages address space operations. So make sure we start | ||
1925 | * all ordered operations before starting to log our inode. Not | ||
1926 | * doing this means that while logging the inode, writeback | ||
1927 | * could start and invoke writepage/writepages, which would call | ||
1928 | * the fill_delalloc callbacks (cow_file_range, | ||
1929 | * submit_compressed_extents). These callbacks add first an | ||
1930 | * extent map to the modified list of extents and then create | ||
1931 | * the respective ordered operation, which means in | ||
1932 | * tree-log.c:btrfs_log_inode() we might capture all existing | ||
1933 | * ordered operations (with btrfs_get_logged_extents()) before | ||
1934 | * the fill_delalloc callback adds its ordered operation, and by | ||
1935 | * the time we visit the modified list of extent maps (with | ||
1936 | * btrfs_log_changed_extents()), we see and process the extent | ||
1937 | * map they created. We then use the extent map to construct a | ||
1938 | * file extent item for logging without waiting for the | ||
1939 | * respective ordered operation to finish - this file extent | ||
1940 | * item points to a disk location that might not have yet been | ||
1941 | * written to, containing random data - so after a crash a log | ||
1942 | * replay will make our inode have file extent items that point | ||
1943 | * to disk locations containing invalid data, as we returned | ||
1944 | * success to userspace without waiting for the respective | ||
1945 | * ordered operation to finish, because it wasn't captured by | ||
1946 | * btrfs_get_logged_extents(). | ||
1947 | */ | ||
1948 | ret = start_ordered_ops(inode, start, end); | ||
1949 | } | ||
1950 | if (ret) { | ||
1951 | mutex_unlock(&inode->i_mutex); | ||
1952 | goto out; | ||
1900 | } | 1953 | } |
1901 | atomic_inc(&root->log_batch); | 1954 | atomic_inc(&root->log_batch); |
1902 | 1955 | ||
@@ -1958,7 +2011,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1958 | 2011 | ||
1959 | btrfs_init_log_ctx(&ctx); | 2012 | btrfs_init_log_ctx(&ctx); |
1960 | 2013 | ||
1961 | ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); | 2014 | ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); |
1962 | if (ret < 0) { | 2015 | if (ret < 0) { |
1963 | /* Fallthrough and commit/free transaction. */ | 2016 | /* Fallthrough and commit/free transaction. */ |
1964 | ret = 1; | 2017 | ret = 1; |
@@ -1976,6 +2029,25 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1976 | */ | 2029 | */ |
1977 | mutex_unlock(&inode->i_mutex); | 2030 | mutex_unlock(&inode->i_mutex); |
1978 | 2031 | ||
2032 | /* | ||
2033 | * If any of the ordered extents had an error, just return it to user | ||
2034 | * space, so that the application knows some writes didn't succeed and | ||
2035 | * can take proper action (retry for e.g.). Blindly committing the | ||
2036 | * transaction in this case, would fool userspace that everything was | ||
2037 | * successful. And we also want to make sure our log doesn't contain | ||
2038 | * file extent items pointing to extents that weren't fully written to - | ||
2039 | * just like in the non fast fsync path, where we check for the ordered | ||
2040 | * operation's error flag before writing to the log tree and return -EIO | ||
2041 | * if any of them had this flag set (btrfs_wait_ordered_range) - | ||
2042 | * therefore we need to check for errors in the ordered operations, | ||
2043 | * which are indicated by ctx.io_err. | ||
2044 | */ | ||
2045 | if (ctx.io_err) { | ||
2046 | btrfs_end_transaction(trans, root); | ||
2047 | ret = ctx.io_err; | ||
2048 | goto out; | ||
2049 | } | ||
2050 | |||
1979 | if (ret != BTRFS_NO_LOG_SYNC) { | 2051 | if (ret != BTRFS_NO_LOG_SYNC) { |
1980 | if (!ret) { | 2052 | if (!ret) { |
1981 | ret = btrfs_sync_log(trans, root, &ctx); | 2053 | ret = btrfs_sync_log(trans, root, &ctx); |
@@ -2088,10 +2160,9 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, | |||
2088 | goto out; | 2160 | goto out; |
2089 | } | 2161 | } |
2090 | 2162 | ||
2091 | if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { | 2163 | if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) { |
2092 | u64 num_bytes; | 2164 | u64 num_bytes; |
2093 | 2165 | ||
2094 | path->slots[0]++; | ||
2095 | key.offset = offset; | 2166 | key.offset = offset; |
2096 | btrfs_set_item_key_safe(root, path, &key); | 2167 | btrfs_set_item_key_safe(root, path, &key); |
2097 | fi = btrfs_item_ptr(leaf, path->slots[0], | 2168 | fi = btrfs_item_ptr(leaf, path->slots[0], |
@@ -2216,7 +2287,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2216 | goto out_only_mutex; | 2287 | goto out_only_mutex; |
2217 | } | 2288 | } |
2218 | 2289 | ||
2219 | lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize); | 2290 | lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); |
2220 | lockend = round_down(offset + len, | 2291 | lockend = round_down(offset + len, |
2221 | BTRFS_I(inode)->root->sectorsize) - 1; | 2292 | BTRFS_I(inode)->root->sectorsize) - 1; |
2222 | same_page = ((offset >> PAGE_CACHE_SHIFT) == | 2293 | same_page = ((offset >> PAGE_CACHE_SHIFT) == |
@@ -2277,7 +2348,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2277 | tail_start + tail_len, 0, 1); | 2348 | tail_start + tail_len, 0, 1); |
2278 | if (ret) | 2349 | if (ret) |
2279 | goto out_only_mutex; | 2350 | goto out_only_mutex; |
2280 | } | 2351 | } |
2281 | } | 2352 | } |
2282 | } | 2353 | } |
2283 | 2354 | ||
@@ -2614,23 +2685,28 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) | |||
2614 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2685 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2615 | struct extent_map *em = NULL; | 2686 | struct extent_map *em = NULL; |
2616 | struct extent_state *cached_state = NULL; | 2687 | struct extent_state *cached_state = NULL; |
2617 | u64 lockstart = *offset; | 2688 | u64 lockstart; |
2618 | u64 lockend = i_size_read(inode); | 2689 | u64 lockend; |
2619 | u64 start = *offset; | 2690 | u64 start; |
2620 | u64 len = i_size_read(inode); | 2691 | u64 len; |
2621 | int ret = 0; | 2692 | int ret = 0; |
2622 | 2693 | ||
2623 | lockend = max_t(u64, root->sectorsize, lockend); | 2694 | if (inode->i_size == 0) |
2695 | return -ENXIO; | ||
2696 | |||
2697 | /* | ||
2698 | * *offset can be negative, in this case we start finding DATA/HOLE from | ||
2699 | * the very start of the file. | ||
2700 | */ | ||
2701 | start = max_t(loff_t, 0, *offset); | ||
2702 | |||
2703 | lockstart = round_down(start, root->sectorsize); | ||
2704 | lockend = round_up(i_size_read(inode), root->sectorsize); | ||
2624 | if (lockend <= lockstart) | 2705 | if (lockend <= lockstart) |
2625 | lockend = lockstart + root->sectorsize; | 2706 | lockend = lockstart + root->sectorsize; |
2626 | |||
2627 | lockend--; | 2707 | lockend--; |
2628 | len = lockend - lockstart + 1; | 2708 | len = lockend - lockstart + 1; |
2629 | 2709 | ||
2630 | len = max_t(u64, len, root->sectorsize); | ||
2631 | if (inode->i_size == 0) | ||
2632 | return -ENXIO; | ||
2633 | |||
2634 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, | 2710 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, |
2635 | &cached_state); | 2711 | &cached_state); |
2636 | 2712 | ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 2b0a627cb5f9..33848196550e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -279,8 +279,7 @@ static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, | |||
279 | int num_pages; | 279 | int num_pages; |
280 | int check_crcs = 0; | 280 | int check_crcs = 0; |
281 | 281 | ||
282 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 282 | num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); |
283 | PAGE_CACHE_SHIFT; | ||
284 | 283 | ||
285 | if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) | 284 | if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) |
286 | check_crcs = 1; | 285 | check_crcs = 1; |
@@ -1998,6 +1997,128 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, | |||
1998 | return merged; | 1997 | return merged; |
1999 | } | 1998 | } |
2000 | 1999 | ||
2000 | static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, | ||
2001 | struct btrfs_free_space *info, | ||
2002 | bool update_stat) | ||
2003 | { | ||
2004 | struct btrfs_free_space *bitmap; | ||
2005 | unsigned long i; | ||
2006 | unsigned long j; | ||
2007 | const u64 end = info->offset + info->bytes; | ||
2008 | const u64 bitmap_offset = offset_to_bitmap(ctl, end); | ||
2009 | u64 bytes; | ||
2010 | |||
2011 | bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); | ||
2012 | if (!bitmap) | ||
2013 | return false; | ||
2014 | |||
2015 | i = offset_to_bit(bitmap->offset, ctl->unit, end); | ||
2016 | j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); | ||
2017 | if (j == i) | ||
2018 | return false; | ||
2019 | bytes = (j - i) * ctl->unit; | ||
2020 | info->bytes += bytes; | ||
2021 | |||
2022 | if (update_stat) | ||
2023 | bitmap_clear_bits(ctl, bitmap, end, bytes); | ||
2024 | else | ||
2025 | __bitmap_clear_bits(ctl, bitmap, end, bytes); | ||
2026 | |||
2027 | if (!bitmap->bytes) | ||
2028 | free_bitmap(ctl, bitmap); | ||
2029 | |||
2030 | return true; | ||
2031 | } | ||
2032 | |||
2033 | static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, | ||
2034 | struct btrfs_free_space *info, | ||
2035 | bool update_stat) | ||
2036 | { | ||
2037 | struct btrfs_free_space *bitmap; | ||
2038 | u64 bitmap_offset; | ||
2039 | unsigned long i; | ||
2040 | unsigned long j; | ||
2041 | unsigned long prev_j; | ||
2042 | u64 bytes; | ||
2043 | |||
2044 | bitmap_offset = offset_to_bitmap(ctl, info->offset); | ||
2045 | /* If we're on a boundary, try the previous logical bitmap. */ | ||
2046 | if (bitmap_offset == info->offset) { | ||
2047 | if (info->offset == 0) | ||
2048 | return false; | ||
2049 | bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); | ||
2050 | } | ||
2051 | |||
2052 | bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); | ||
2053 | if (!bitmap) | ||
2054 | return false; | ||
2055 | |||
2056 | i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; | ||
2057 | j = 0; | ||
2058 | prev_j = (unsigned long)-1; | ||
2059 | for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { | ||
2060 | if (j > i) | ||
2061 | break; | ||
2062 | prev_j = j; | ||
2063 | } | ||
2064 | if (prev_j == i) | ||
2065 | return false; | ||
2066 | |||
2067 | if (prev_j == (unsigned long)-1) | ||
2068 | bytes = (i + 1) * ctl->unit; | ||
2069 | else | ||
2070 | bytes = (i - prev_j) * ctl->unit; | ||
2071 | |||
2072 | info->offset -= bytes; | ||
2073 | info->bytes += bytes; | ||
2074 | |||
2075 | if (update_stat) | ||
2076 | bitmap_clear_bits(ctl, bitmap, info->offset, bytes); | ||
2077 | else | ||
2078 | __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); | ||
2079 | |||
2080 | if (!bitmap->bytes) | ||
2081 | free_bitmap(ctl, bitmap); | ||
2082 | |||
2083 | return true; | ||
2084 | } | ||
2085 | |||
2086 | /* | ||
2087 | * We prefer always to allocate from extent entries, both for clustered and | ||
2088 | * non-clustered allocation requests. So when attempting to add a new extent | ||
2089 | * entry, try to see if there's adjacent free space in bitmap entries, and if | ||
2090 | * there is, migrate that space from the bitmaps to the extent. | ||
2091 | * Like this we get better chances of satisfying space allocation requests | ||
2092 | * because we attempt to satisfy them based on a single cache entry, and never | ||
2093 | * on 2 or more entries - even if the entries represent a contiguous free space | ||
2094 | * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry | ||
2095 | * ends). | ||
2096 | */ | ||
2097 | static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, | ||
2098 | struct btrfs_free_space *info, | ||
2099 | bool update_stat) | ||
2100 | { | ||
2101 | /* | ||
2102 | * Only work with disconnected entries, as we can change their offset, | ||
2103 | * and must be extent entries. | ||
2104 | */ | ||
2105 | ASSERT(!info->bitmap); | ||
2106 | ASSERT(RB_EMPTY_NODE(&info->offset_index)); | ||
2107 | |||
2108 | if (ctl->total_bitmaps > 0) { | ||
2109 | bool stole_end; | ||
2110 | bool stole_front = false; | ||
2111 | |||
2112 | stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); | ||
2113 | if (ctl->total_bitmaps > 0) | ||
2114 | stole_front = steal_from_bitmap_to_front(ctl, info, | ||
2115 | update_stat); | ||
2116 | |||
2117 | if (stole_end || stole_front) | ||
2118 | try_merge_free_space(ctl, info, update_stat); | ||
2119 | } | ||
2120 | } | ||
2121 | |||
2001 | int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, | 2122 | int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, |
2002 | u64 offset, u64 bytes) | 2123 | u64 offset, u64 bytes) |
2003 | { | 2124 | { |
@@ -2010,6 +2131,7 @@ int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, | |||
2010 | 2131 | ||
2011 | info->offset = offset; | 2132 | info->offset = offset; |
2012 | info->bytes = bytes; | 2133 | info->bytes = bytes; |
2134 | RB_CLEAR_NODE(&info->offset_index); | ||
2013 | 2135 | ||
2014 | spin_lock(&ctl->tree_lock); | 2136 | spin_lock(&ctl->tree_lock); |
2015 | 2137 | ||
@@ -2029,6 +2151,14 @@ int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, | |||
2029 | goto out; | 2151 | goto out; |
2030 | } | 2152 | } |
2031 | link: | 2153 | link: |
2154 | /* | ||
2155 | * Only steal free space from adjacent bitmaps if we're sure we're not | ||
2156 | * going to add the new free space to existing bitmap entries - because | ||
2157 | * that would mean unnecessary work that would be reverted. Therefore | ||
2158 | * attempt to steal space from bitmaps if we're adding an extent entry. | ||
2159 | */ | ||
2160 | steal_from_bitmap(ctl, info, true); | ||
2161 | |||
2032 | ret = link_free_space(ctl, info); | 2162 | ret = link_free_space(ctl, info); |
2033 | if (ret) | 2163 | if (ret) |
2034 | kmem_cache_free(btrfs_free_space_cachep, info); | 2164 | kmem_cache_free(btrfs_free_space_cachep, info); |
@@ -2205,10 +2335,13 @@ __btrfs_return_cluster_to_free_space( | |||
2205 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2335 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2206 | node = rb_next(&entry->offset_index); | 2336 | node = rb_next(&entry->offset_index); |
2207 | rb_erase(&entry->offset_index, &cluster->root); | 2337 | rb_erase(&entry->offset_index, &cluster->root); |
2338 | RB_CLEAR_NODE(&entry->offset_index); | ||
2208 | 2339 | ||
2209 | bitmap = (entry->bitmap != NULL); | 2340 | bitmap = (entry->bitmap != NULL); |
2210 | if (!bitmap) | 2341 | if (!bitmap) { |
2211 | try_merge_free_space(ctl, entry, false); | 2342 | try_merge_free_space(ctl, entry, false); |
2343 | steal_from_bitmap(ctl, entry, false); | ||
2344 | } | ||
2212 | tree_insert_offset(&ctl->free_space_offset, | 2345 | tree_insert_offset(&ctl->free_space_offset, |
2213 | entry->offset, &entry->offset_index, bitmap); | 2346 | entry->offset, &entry->offset_index, bitmap); |
2214 | } | 2347 | } |
@@ -3033,10 +3166,10 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, | |||
3033 | { | 3166 | { |
3034 | struct inode *inode = NULL; | 3167 | struct inode *inode = NULL; |
3035 | 3168 | ||
3036 | spin_lock(&root->cache_lock); | 3169 | spin_lock(&root->ino_cache_lock); |
3037 | if (root->cache_inode) | 3170 | if (root->ino_cache_inode) |
3038 | inode = igrab(root->cache_inode); | 3171 | inode = igrab(root->ino_cache_inode); |
3039 | spin_unlock(&root->cache_lock); | 3172 | spin_unlock(&root->ino_cache_lock); |
3040 | if (inode) | 3173 | if (inode) |
3041 | return inode; | 3174 | return inode; |
3042 | 3175 | ||
@@ -3044,10 +3177,10 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, | |||
3044 | if (IS_ERR(inode)) | 3177 | if (IS_ERR(inode)) |
3045 | return inode; | 3178 | return inode; |
3046 | 3179 | ||
3047 | spin_lock(&root->cache_lock); | 3180 | spin_lock(&root->ino_cache_lock); |
3048 | if (!btrfs_fs_closing(root->fs_info)) | 3181 | if (!btrfs_fs_closing(root->fs_info)) |
3049 | root->cache_inode = igrab(inode); | 3182 | root->ino_cache_inode = igrab(inode); |
3050 | spin_unlock(&root->cache_lock); | 3183 | spin_unlock(&root->ino_cache_lock); |
3051 | 3184 | ||
3052 | return inode; | 3185 | return inode; |
3053 | } | 3186 | } |
@@ -3176,6 +3309,7 @@ again: | |||
3176 | map = NULL; | 3309 | map = NULL; |
3177 | add_new_bitmap(ctl, info, offset); | 3310 | add_new_bitmap(ctl, info, offset); |
3178 | bitmap_info = info; | 3311 | bitmap_info = info; |
3312 | info = NULL; | ||
3179 | } | 3313 | } |
3180 | 3314 | ||
3181 | bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); | 3315 | bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); |
@@ -3186,6 +3320,8 @@ again: | |||
3186 | if (bytes) | 3320 | if (bytes) |
3187 | goto again; | 3321 | goto again; |
3188 | 3322 | ||
3323 | if (info) | ||
3324 | kmem_cache_free(btrfs_free_space_cachep, info); | ||
3189 | if (map) | 3325 | if (map) |
3190 | kfree(map); | 3326 | kfree(map); |
3191 | return 0; | 3327 | return 0; |
@@ -3260,6 +3396,7 @@ have_info: | |||
3260 | goto have_info; | 3396 | goto have_info; |
3261 | } | 3397 | } |
3262 | 3398 | ||
3399 | ret = 0; | ||
3263 | goto out; | 3400 | goto out; |
3264 | } | 3401 | } |
3265 | 3402 | ||
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 85889aa82c62..64f15bb30a81 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c | |||
@@ -20,10 +20,8 @@ static struct crypto_shash *tfm; | |||
20 | int __init btrfs_hash_init(void) | 20 | int __init btrfs_hash_init(void) |
21 | { | 21 | { |
22 | tfm = crypto_alloc_shash("crc32c", 0, 0); | 22 | tfm = crypto_alloc_shash("crc32c", 0, 0); |
23 | if (IS_ERR(tfm)) | ||
24 | return PTR_ERR(tfm); | ||
25 | 23 | ||
26 | return 0; | 24 | return PTR_ERR_OR_ZERO(tfm); |
27 | } | 25 | } |
28 | 26 | ||
29 | void btrfs_hash_exit(void) | 27 | void btrfs_hash_exit(void) |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 2be38df703c9..8ffa4783cbf4 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -135,7 +135,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, | |||
135 | u32 item_size; | 135 | u32 item_size; |
136 | 136 | ||
137 | key.objectid = inode_objectid; | 137 | key.objectid = inode_objectid; |
138 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); | 138 | key.type = BTRFS_INODE_EXTREF_KEY; |
139 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | 139 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); |
140 | 140 | ||
141 | path = btrfs_alloc_path(); | 141 | path = btrfs_alloc_path(); |
@@ -209,7 +209,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
209 | 209 | ||
210 | key.objectid = inode_objectid; | 210 | key.objectid = inode_objectid; |
211 | key.offset = ref_objectid; | 211 | key.offset = ref_objectid; |
212 | btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); | 212 | key.type = BTRFS_INODE_REF_KEY; |
213 | 213 | ||
214 | path = btrfs_alloc_path(); | 214 | path = btrfs_alloc_path(); |
215 | if (!path) | 215 | if (!path) |
@@ -337,7 +337,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
337 | 337 | ||
338 | key.objectid = inode_objectid; | 338 | key.objectid = inode_objectid; |
339 | key.offset = ref_objectid; | 339 | key.offset = ref_objectid; |
340 | btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); | 340 | key.type = BTRFS_INODE_REF_KEY; |
341 | 341 | ||
342 | path = btrfs_alloc_path(); | 342 | path = btrfs_alloc_path(); |
343 | if (!path) | 343 | if (!path) |
@@ -400,7 +400,7 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | |||
400 | struct btrfs_key key; | 400 | struct btrfs_key key; |
401 | int ret; | 401 | int ret; |
402 | key.objectid = objectid; | 402 | key.objectid = objectid; |
403 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 403 | key.type = BTRFS_INODE_ITEM_KEY; |
404 | key.offset = 0; | 404 | key.offset = 0; |
405 | 405 | ||
406 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 406 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
@@ -420,13 +420,13 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root | |||
420 | struct btrfs_key found_key; | 420 | struct btrfs_key found_key; |
421 | 421 | ||
422 | ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); | 422 | ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); |
423 | if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && | 423 | if (ret > 0 && location->type == BTRFS_ROOT_ITEM_KEY && |
424 | location->offset == (u64)-1 && path->slots[0] != 0) { | 424 | location->offset == (u64)-1 && path->slots[0] != 0) { |
425 | slot = path->slots[0] - 1; | 425 | slot = path->slots[0] - 1; |
426 | leaf = path->nodes[0]; | 426 | leaf = path->nodes[0]; |
427 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 427 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
428 | if (found_key.objectid == location->objectid && | 428 | if (found_key.objectid == location->objectid && |
429 | btrfs_key_type(&found_key) == btrfs_key_type(location)) { | 429 | found_key.type == location->type) { |
430 | path->slots[0]--; | 430 | path->slots[0]--; |
431 | return 0; | 431 | return 0; |
432 | } | 432 | } |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 888fbe19079f..83d646bd2e4b 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -87,7 +87,7 @@ again: | |||
87 | */ | 87 | */ |
88 | btrfs_item_key_to_cpu(leaf, &key, 0); | 88 | btrfs_item_key_to_cpu(leaf, &key, 0); |
89 | btrfs_release_path(path); | 89 | btrfs_release_path(path); |
90 | root->cache_progress = last; | 90 | root->ino_cache_progress = last; |
91 | up_read(&fs_info->commit_root_sem); | 91 | up_read(&fs_info->commit_root_sem); |
92 | schedule_timeout(1); | 92 | schedule_timeout(1); |
93 | goto again; | 93 | goto again; |
@@ -106,7 +106,7 @@ again: | |||
106 | if (last != (u64)-1 && last + 1 != key.objectid) { | 106 | if (last != (u64)-1 && last + 1 != key.objectid) { |
107 | __btrfs_add_free_space(ctl, last + 1, | 107 | __btrfs_add_free_space(ctl, last + 1, |
108 | key.objectid - last - 1); | 108 | key.objectid - last - 1); |
109 | wake_up(&root->cache_wait); | 109 | wake_up(&root->ino_cache_wait); |
110 | } | 110 | } |
111 | 111 | ||
112 | last = key.objectid; | 112 | last = key.objectid; |
@@ -119,14 +119,14 @@ next: | |||
119 | root->highest_objectid - last - 1); | 119 | root->highest_objectid - last - 1); |
120 | } | 120 | } |
121 | 121 | ||
122 | spin_lock(&root->cache_lock); | 122 | spin_lock(&root->ino_cache_lock); |
123 | root->cached = BTRFS_CACHE_FINISHED; | 123 | root->ino_cache_state = BTRFS_CACHE_FINISHED; |
124 | spin_unlock(&root->cache_lock); | 124 | spin_unlock(&root->ino_cache_lock); |
125 | 125 | ||
126 | root->cache_progress = (u64)-1; | 126 | root->ino_cache_progress = (u64)-1; |
127 | btrfs_unpin_free_ino(root); | 127 | btrfs_unpin_free_ino(root); |
128 | out: | 128 | out: |
129 | wake_up(&root->cache_wait); | 129 | wake_up(&root->ino_cache_wait); |
130 | up_read(&fs_info->commit_root_sem); | 130 | up_read(&fs_info->commit_root_sem); |
131 | 131 | ||
132 | btrfs_free_path(path); | 132 | btrfs_free_path(path); |
@@ -144,20 +144,20 @@ static void start_caching(struct btrfs_root *root) | |||
144 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | 144 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
145 | return; | 145 | return; |
146 | 146 | ||
147 | spin_lock(&root->cache_lock); | 147 | spin_lock(&root->ino_cache_lock); |
148 | if (root->cached != BTRFS_CACHE_NO) { | 148 | if (root->ino_cache_state != BTRFS_CACHE_NO) { |
149 | spin_unlock(&root->cache_lock); | 149 | spin_unlock(&root->ino_cache_lock); |
150 | return; | 150 | return; |
151 | } | 151 | } |
152 | 152 | ||
153 | root->cached = BTRFS_CACHE_STARTED; | 153 | root->ino_cache_state = BTRFS_CACHE_STARTED; |
154 | spin_unlock(&root->cache_lock); | 154 | spin_unlock(&root->ino_cache_lock); |
155 | 155 | ||
156 | ret = load_free_ino_cache(root->fs_info, root); | 156 | ret = load_free_ino_cache(root->fs_info, root); |
157 | if (ret == 1) { | 157 | if (ret == 1) { |
158 | spin_lock(&root->cache_lock); | 158 | spin_lock(&root->ino_cache_lock); |
159 | root->cached = BTRFS_CACHE_FINISHED; | 159 | root->ino_cache_state = BTRFS_CACHE_FINISHED; |
160 | spin_unlock(&root->cache_lock); | 160 | spin_unlock(&root->ino_cache_lock); |
161 | return; | 161 | return; |
162 | } | 162 | } |
163 | 163 | ||
@@ -196,11 +196,11 @@ again: | |||
196 | 196 | ||
197 | start_caching(root); | 197 | start_caching(root); |
198 | 198 | ||
199 | wait_event(root->cache_wait, | 199 | wait_event(root->ino_cache_wait, |
200 | root->cached == BTRFS_CACHE_FINISHED || | 200 | root->ino_cache_state == BTRFS_CACHE_FINISHED || |
201 | root->free_ino_ctl->free_space > 0); | 201 | root->free_ino_ctl->free_space > 0); |
202 | 202 | ||
203 | if (root->cached == BTRFS_CACHE_FINISHED && | 203 | if (root->ino_cache_state == BTRFS_CACHE_FINISHED && |
204 | root->free_ino_ctl->free_space == 0) | 204 | root->free_ino_ctl->free_space == 0) |
205 | return -ENOSPC; | 205 | return -ENOSPC; |
206 | else | 206 | else |
@@ -214,17 +214,17 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid) | |||
214 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | 214 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
215 | return; | 215 | return; |
216 | again: | 216 | again: |
217 | if (root->cached == BTRFS_CACHE_FINISHED) { | 217 | if (root->ino_cache_state == BTRFS_CACHE_FINISHED) { |
218 | __btrfs_add_free_space(pinned, objectid, 1); | 218 | __btrfs_add_free_space(pinned, objectid, 1); |
219 | } else { | 219 | } else { |
220 | down_write(&root->fs_info->commit_root_sem); | 220 | down_write(&root->fs_info->commit_root_sem); |
221 | spin_lock(&root->cache_lock); | 221 | spin_lock(&root->ino_cache_lock); |
222 | if (root->cached == BTRFS_CACHE_FINISHED) { | 222 | if (root->ino_cache_state == BTRFS_CACHE_FINISHED) { |
223 | spin_unlock(&root->cache_lock); | 223 | spin_unlock(&root->ino_cache_lock); |
224 | up_write(&root->fs_info->commit_root_sem); | 224 | up_write(&root->fs_info->commit_root_sem); |
225 | goto again; | 225 | goto again; |
226 | } | 226 | } |
227 | spin_unlock(&root->cache_lock); | 227 | spin_unlock(&root->ino_cache_lock); |
228 | 228 | ||
229 | start_caching(root); | 229 | start_caching(root); |
230 | 230 | ||
@@ -235,10 +235,10 @@ again: | |||
235 | } | 235 | } |
236 | 236 | ||
237 | /* | 237 | /* |
238 | * When a transaction is committed, we'll move those inode numbers which | 238 | * When a transaction is committed, we'll move those inode numbers which are |
239 | * are smaller than root->cache_progress from pinned tree to free_ino tree, | 239 | * smaller than root->ino_cache_progress from pinned tree to free_ino tree, and |
240 | * and others will just be dropped, because the commit root we were | 240 | * others will just be dropped, because the commit root we were searching has |
241 | * searching has changed. | 241 | * changed. |
242 | * | 242 | * |
243 | * Must be called with root->fs_info->commit_root_sem held | 243 | * Must be called with root->fs_info->commit_root_sem held |
244 | */ | 244 | */ |
@@ -261,10 +261,10 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) | |||
261 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 261 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
262 | BUG_ON(info->bitmap); /* Logic error */ | 262 | BUG_ON(info->bitmap); /* Logic error */ |
263 | 263 | ||
264 | if (info->offset > root->cache_progress) | 264 | if (info->offset > root->ino_cache_progress) |
265 | goto free; | 265 | goto free; |
266 | else if (info->offset + info->bytes > root->cache_progress) | 266 | else if (info->offset + info->bytes > root->ino_cache_progress) |
267 | count = root->cache_progress - info->offset + 1; | 267 | count = root->ino_cache_progress - info->offset + 1; |
268 | else | 268 | else |
269 | count = info->bytes; | 269 | count = info->bytes; |
270 | 270 | ||
@@ -462,13 +462,13 @@ again: | |||
462 | } | 462 | } |
463 | } | 463 | } |
464 | 464 | ||
465 | spin_lock(&root->cache_lock); | 465 | spin_lock(&root->ino_cache_lock); |
466 | if (root->cached != BTRFS_CACHE_FINISHED) { | 466 | if (root->ino_cache_state != BTRFS_CACHE_FINISHED) { |
467 | ret = -1; | 467 | ret = -1; |
468 | spin_unlock(&root->cache_lock); | 468 | spin_unlock(&root->ino_cache_lock); |
469 | goto out_put; | 469 | goto out_put; |
470 | } | 470 | } |
471 | spin_unlock(&root->cache_lock); | 471 | spin_unlock(&root->ino_cache_lock); |
472 | 472 | ||
473 | spin_lock(&ctl->tree_lock); | 473 | spin_lock(&ctl->tree_lock); |
474 | prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents; | 474 | prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 03708ef3deef..fc9c0439caa3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -153,7 +153,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
153 | 153 | ||
154 | key.objectid = btrfs_ino(inode); | 154 | key.objectid = btrfs_ino(inode); |
155 | key.offset = start; | 155 | key.offset = start; |
156 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | 156 | key.type = BTRFS_EXTENT_DATA_KEY; |
157 | 157 | ||
158 | datasize = btrfs_file_extent_calc_inline_size(cur_size); | 158 | datasize = btrfs_file_extent_calc_inline_size(cur_size); |
159 | path->leave_spinning = 1; | 159 | path->leave_spinning = 1; |
@@ -249,8 +249,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root, | |||
249 | data_len = compressed_size; | 249 | data_len = compressed_size; |
250 | 250 | ||
251 | if (start > 0 || | 251 | if (start > 0 || |
252 | actual_end >= PAGE_CACHE_SIZE || | 252 | actual_end > PAGE_CACHE_SIZE || |
253 | data_len >= BTRFS_MAX_INLINE_DATA_SIZE(root) || | 253 | data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) || |
254 | (!compressed_size && | 254 | (!compressed_size && |
255 | (actual_end & (root->sectorsize - 1)) == 0) || | 255 | (actual_end & (root->sectorsize - 1)) == 0) || |
256 | end + 1 < isize || | 256 | end + 1 < isize || |
@@ -348,6 +348,23 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
348 | return 0; | 348 | return 0; |
349 | } | 349 | } |
350 | 350 | ||
351 | static inline int inode_need_compress(struct inode *inode) | ||
352 | { | ||
353 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
354 | |||
355 | /* force compress */ | ||
356 | if (btrfs_test_opt(root, FORCE_COMPRESS)) | ||
357 | return 1; | ||
358 | /* bad compression ratios */ | ||
359 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) | ||
360 | return 0; | ||
361 | if (btrfs_test_opt(root, COMPRESS) || | ||
362 | BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS || | ||
363 | BTRFS_I(inode)->force_compress) | ||
364 | return 1; | ||
365 | return 0; | ||
366 | } | ||
367 | |||
351 | /* | 368 | /* |
352 | * we create compressed extents in two phases. The first | 369 | * we create compressed extents in two phases. The first |
353 | * phase compresses a range of pages that have already been | 370 | * phase compresses a range of pages that have already been |
@@ -444,10 +461,7 @@ again: | |||
444 | * inode has not been flagged as nocompress. This flag can | 461 | * inode has not been flagged as nocompress. This flag can |
445 | * change at any time if we discover bad compression ratios. | 462 | * change at any time if we discover bad compression ratios. |
446 | */ | 463 | */ |
447 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && | 464 | if (inode_need_compress(inode)) { |
448 | (btrfs_test_opt(root, COMPRESS) || | ||
449 | (BTRFS_I(inode)->force_compress) || | ||
450 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { | ||
451 | WARN_ON(pages); | 465 | WARN_ON(pages); |
452 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 466 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
453 | if (!pages) { | 467 | if (!pages) { |
@@ -778,8 +792,12 @@ retry: | |||
778 | ins.offset, | 792 | ins.offset, |
779 | BTRFS_ORDERED_COMPRESSED, | 793 | BTRFS_ORDERED_COMPRESSED, |
780 | async_extent->compress_type); | 794 | async_extent->compress_type); |
781 | if (ret) | 795 | if (ret) { |
796 | btrfs_drop_extent_cache(inode, async_extent->start, | ||
797 | async_extent->start + | ||
798 | async_extent->ram_size - 1, 0); | ||
782 | goto out_free_reserve; | 799 | goto out_free_reserve; |
800 | } | ||
783 | 801 | ||
784 | /* | 802 | /* |
785 | * clear dirty, set writeback and unlock the pages. | 803 | * clear dirty, set writeback and unlock the pages. |
@@ -971,14 +989,14 @@ static noinline int cow_file_range(struct inode *inode, | |||
971 | ret = btrfs_add_ordered_extent(inode, start, ins.objectid, | 989 | ret = btrfs_add_ordered_extent(inode, start, ins.objectid, |
972 | ram_size, cur_alloc_size, 0); | 990 | ram_size, cur_alloc_size, 0); |
973 | if (ret) | 991 | if (ret) |
974 | goto out_reserve; | 992 | goto out_drop_extent_cache; |
975 | 993 | ||
976 | if (root->root_key.objectid == | 994 | if (root->root_key.objectid == |
977 | BTRFS_DATA_RELOC_TREE_OBJECTID) { | 995 | BTRFS_DATA_RELOC_TREE_OBJECTID) { |
978 | ret = btrfs_reloc_clone_csums(inode, start, | 996 | ret = btrfs_reloc_clone_csums(inode, start, |
979 | cur_alloc_size); | 997 | cur_alloc_size); |
980 | if (ret) | 998 | if (ret) |
981 | goto out_reserve; | 999 | goto out_drop_extent_cache; |
982 | } | 1000 | } |
983 | 1001 | ||
984 | if (disk_num_bytes < cur_alloc_size) | 1002 | if (disk_num_bytes < cur_alloc_size) |
@@ -1006,6 +1024,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
1006 | out: | 1024 | out: |
1007 | return ret; | 1025 | return ret; |
1008 | 1026 | ||
1027 | out_drop_extent_cache: | ||
1028 | btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); | ||
1009 | out_reserve: | 1029 | out_reserve: |
1010 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | 1030 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
1011 | out_unlock: | 1031 | out_unlock: |
@@ -1088,7 +1108,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
1088 | async_cow->locked_page = locked_page; | 1108 | async_cow->locked_page = locked_page; |
1089 | async_cow->start = start; | 1109 | async_cow->start = start; |
1090 | 1110 | ||
1091 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) | 1111 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && |
1112 | !btrfs_test_opt(root, FORCE_COMPRESS)) | ||
1092 | cur_end = end; | 1113 | cur_end = end; |
1093 | else | 1114 | else |
1094 | cur_end = min(end, start + 512 * 1024 - 1); | 1115 | cur_end = min(end, start + 512 * 1024 - 1); |
@@ -1096,8 +1117,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
1096 | async_cow->end = cur_end; | 1117 | async_cow->end = cur_end; |
1097 | INIT_LIST_HEAD(&async_cow->extents); | 1118 | INIT_LIST_HEAD(&async_cow->extents); |
1098 | 1119 | ||
1099 | btrfs_init_work(&async_cow->work, async_cow_start, | 1120 | btrfs_init_work(&async_cow->work, |
1100 | async_cow_submit, async_cow_free); | 1121 | btrfs_delalloc_helper, |
1122 | async_cow_start, async_cow_submit, | ||
1123 | async_cow_free); | ||
1101 | 1124 | ||
1102 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> | 1125 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> |
1103 | PAGE_CACHE_SHIFT; | 1126 | PAGE_CACHE_SHIFT; |
@@ -1437,6 +1460,26 @@ error: | |||
1437 | return ret; | 1460 | return ret; |
1438 | } | 1461 | } |
1439 | 1462 | ||
1463 | static inline int need_force_cow(struct inode *inode, u64 start, u64 end) | ||
1464 | { | ||
1465 | |||
1466 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && | ||
1467 | !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) | ||
1468 | return 0; | ||
1469 | |||
1470 | /* | ||
1471 | * @defrag_bytes is a hint value, no spinlock held here, | ||
1472 | * if is not zero, it means the file is defragging. | ||
1473 | * Force cow if given extent needs to be defragged. | ||
1474 | */ | ||
1475 | if (BTRFS_I(inode)->defrag_bytes && | ||
1476 | test_range_bit(&BTRFS_I(inode)->io_tree, start, end, | ||
1477 | EXTENT_DEFRAG, 0, NULL)) | ||
1478 | return 1; | ||
1479 | |||
1480 | return 0; | ||
1481 | } | ||
1482 | |||
1440 | /* | 1483 | /* |
1441 | * extent_io.c call back to do delayed allocation processing | 1484 | * extent_io.c call back to do delayed allocation processing |
1442 | */ | 1485 | */ |
@@ -1445,17 +1488,15 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1445 | unsigned long *nr_written) | 1488 | unsigned long *nr_written) |
1446 | { | 1489 | { |
1447 | int ret; | 1490 | int ret; |
1448 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1491 | int force_cow = need_force_cow(inode, start, end); |
1449 | 1492 | ||
1450 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) { | 1493 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) { |
1451 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1494 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1452 | page_started, 1, nr_written); | 1495 | page_started, 1, nr_written); |
1453 | } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) { | 1496 | } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { |
1454 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1497 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1455 | page_started, 0, nr_written); | 1498 | page_started, 0, nr_written); |
1456 | } else if (!btrfs_test_opt(root, COMPRESS) && | 1499 | } else if (!inode_need_compress(inode)) { |
1457 | !(BTRFS_I(inode)->force_compress) && | ||
1458 | !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) { | ||
1459 | ret = cow_file_range(inode, locked_page, start, end, | 1500 | ret = cow_file_range(inode, locked_page, start, end, |
1460 | page_started, nr_written, 1); | 1501 | page_started, nr_written, 1); |
1461 | } else { | 1502 | } else { |
@@ -1547,6 +1588,8 @@ static void btrfs_set_bit_hook(struct inode *inode, | |||
1547 | struct extent_state *state, unsigned long *bits) | 1588 | struct extent_state *state, unsigned long *bits) |
1548 | { | 1589 | { |
1549 | 1590 | ||
1591 | if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) | ||
1592 | WARN_ON(1); | ||
1550 | /* | 1593 | /* |
1551 | * set_bit and clear bit hooks normally require _irqsave/restore | 1594 | * set_bit and clear bit hooks normally require _irqsave/restore |
1552 | * but in this case, we are only testing for the DELALLOC | 1595 | * but in this case, we are only testing for the DELALLOC |
@@ -1569,6 +1612,8 @@ static void btrfs_set_bit_hook(struct inode *inode, | |||
1569 | root->fs_info->delalloc_batch); | 1612 | root->fs_info->delalloc_batch); |
1570 | spin_lock(&BTRFS_I(inode)->lock); | 1613 | spin_lock(&BTRFS_I(inode)->lock); |
1571 | BTRFS_I(inode)->delalloc_bytes += len; | 1614 | BTRFS_I(inode)->delalloc_bytes += len; |
1615 | if (*bits & EXTENT_DEFRAG) | ||
1616 | BTRFS_I(inode)->defrag_bytes += len; | ||
1572 | if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, | 1617 | if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, |
1573 | &BTRFS_I(inode)->runtime_flags)) | 1618 | &BTRFS_I(inode)->runtime_flags)) |
1574 | btrfs_add_delalloc_inodes(root, inode); | 1619 | btrfs_add_delalloc_inodes(root, inode); |
@@ -1583,6 +1628,13 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1583 | struct extent_state *state, | 1628 | struct extent_state *state, |
1584 | unsigned long *bits) | 1629 | unsigned long *bits) |
1585 | { | 1630 | { |
1631 | u64 len = state->end + 1 - state->start; | ||
1632 | |||
1633 | spin_lock(&BTRFS_I(inode)->lock); | ||
1634 | if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) | ||
1635 | BTRFS_I(inode)->defrag_bytes -= len; | ||
1636 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1637 | |||
1586 | /* | 1638 | /* |
1587 | * set_bit and clear bit hooks normally require _irqsave/restore | 1639 | * set_bit and clear bit hooks normally require _irqsave/restore |
1588 | * but in this case, we are only testing for the DELALLOC | 1640 | * but in this case, we are only testing for the DELALLOC |
@@ -1590,7 +1642,6 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1590 | */ | 1642 | */ |
1591 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1643 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1592 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1644 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1593 | u64 len = state->end + 1 - state->start; | ||
1594 | bool do_list = !btrfs_is_free_space_inode(inode); | 1645 | bool do_list = !btrfs_is_free_space_inode(inode); |
1595 | 1646 | ||
1596 | if (*bits & EXTENT_FIRST_DELALLOC) { | 1647 | if (*bits & EXTENT_FIRST_DELALLOC) { |
@@ -1881,7 +1932,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
1881 | 1932 | ||
1882 | SetPageChecked(page); | 1933 | SetPageChecked(page); |
1883 | page_cache_get(page); | 1934 | page_cache_get(page); |
1884 | btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); | 1935 | btrfs_init_work(&fixup->work, btrfs_fixup_helper, |
1936 | btrfs_writepage_fixup_worker, NULL, NULL); | ||
1885 | fixup->page = page; | 1937 | fixup->page = page; |
1886 | btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); | 1938 | btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); |
1887 | return -EBUSY; | 1939 | return -EBUSY; |
@@ -2651,6 +2703,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
2651 | goto out; | 2703 | goto out; |
2652 | } | 2704 | } |
2653 | 2705 | ||
2706 | btrfs_free_io_failure_record(inode, ordered_extent->file_offset, | ||
2707 | ordered_extent->file_offset + | ||
2708 | ordered_extent->len - 1); | ||
2709 | |||
2654 | if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { | 2710 | if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { |
2655 | truncated = true; | 2711 | truncated = true; |
2656 | logical_len = ordered_extent->truncated_len; | 2712 | logical_len = ordered_extent->truncated_len; |
@@ -2822,7 +2878,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2822 | struct inode *inode = page->mapping->host; | 2878 | struct inode *inode = page->mapping->host; |
2823 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2879 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2824 | struct btrfs_ordered_extent *ordered_extent = NULL; | 2880 | struct btrfs_ordered_extent *ordered_extent = NULL; |
2825 | struct btrfs_workqueue *workers; | 2881 | struct btrfs_workqueue *wq; |
2882 | btrfs_work_func_t func; | ||
2826 | 2883 | ||
2827 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | 2884 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); |
2828 | 2885 | ||
@@ -2831,17 +2888,55 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2831 | end - start + 1, uptodate)) | 2888 | end - start + 1, uptodate)) |
2832 | return 0; | 2889 | return 0; |
2833 | 2890 | ||
2834 | btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); | 2891 | if (btrfs_is_free_space_inode(inode)) { |
2892 | wq = root->fs_info->endio_freespace_worker; | ||
2893 | func = btrfs_freespace_write_helper; | ||
2894 | } else { | ||
2895 | wq = root->fs_info->endio_write_workers; | ||
2896 | func = btrfs_endio_write_helper; | ||
2897 | } | ||
2835 | 2898 | ||
2836 | if (btrfs_is_free_space_inode(inode)) | 2899 | btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL, |
2837 | workers = root->fs_info->endio_freespace_worker; | 2900 | NULL); |
2838 | else | 2901 | btrfs_queue_work(wq, &ordered_extent->work); |
2839 | workers = root->fs_info->endio_write_workers; | ||
2840 | btrfs_queue_work(workers, &ordered_extent->work); | ||
2841 | 2902 | ||
2842 | return 0; | 2903 | return 0; |
2843 | } | 2904 | } |
2844 | 2905 | ||
2906 | static int __readpage_endio_check(struct inode *inode, | ||
2907 | struct btrfs_io_bio *io_bio, | ||
2908 | int icsum, struct page *page, | ||
2909 | int pgoff, u64 start, size_t len) | ||
2910 | { | ||
2911 | char *kaddr; | ||
2912 | u32 csum_expected; | ||
2913 | u32 csum = ~(u32)0; | ||
2914 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | ||
2915 | DEFAULT_RATELIMIT_BURST); | ||
2916 | |||
2917 | csum_expected = *(((u32 *)io_bio->csum) + icsum); | ||
2918 | |||
2919 | kaddr = kmap_atomic(page); | ||
2920 | csum = btrfs_csum_data(kaddr + pgoff, csum, len); | ||
2921 | btrfs_csum_final(csum, (char *)&csum); | ||
2922 | if (csum != csum_expected) | ||
2923 | goto zeroit; | ||
2924 | |||
2925 | kunmap_atomic(kaddr); | ||
2926 | return 0; | ||
2927 | zeroit: | ||
2928 | if (__ratelimit(&_rs)) | ||
2929 | btrfs_info(BTRFS_I(inode)->root->fs_info, | ||
2930 | "csum failed ino %llu off %llu csum %u expected csum %u", | ||
2931 | btrfs_ino(inode), start, csum, csum_expected); | ||
2932 | memset(kaddr + pgoff, 1, len); | ||
2933 | flush_dcache_page(page); | ||
2934 | kunmap_atomic(kaddr); | ||
2935 | if (csum_expected == 0) | ||
2936 | return 0; | ||
2937 | return -EIO; | ||
2938 | } | ||
2939 | |||
2845 | /* | 2940 | /* |
2846 | * when reads are done, we need to check csums to verify the data is correct | 2941 | * when reads are done, we need to check csums to verify the data is correct |
2847 | * if there's a match, we allow the bio to finish. If not, the code in | 2942 | * if there's a match, we allow the bio to finish. If not, the code in |
@@ -2854,20 +2949,15 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
2854 | size_t offset = start - page_offset(page); | 2949 | size_t offset = start - page_offset(page); |
2855 | struct inode *inode = page->mapping->host; | 2950 | struct inode *inode = page->mapping->host; |
2856 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 2951 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
2857 | char *kaddr; | ||
2858 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2952 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2859 | u32 csum_expected; | ||
2860 | u32 csum = ~(u32)0; | ||
2861 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | ||
2862 | DEFAULT_RATELIMIT_BURST); | ||
2863 | 2953 | ||
2864 | if (PageChecked(page)) { | 2954 | if (PageChecked(page)) { |
2865 | ClearPageChecked(page); | 2955 | ClearPageChecked(page); |
2866 | goto good; | 2956 | return 0; |
2867 | } | 2957 | } |
2868 | 2958 | ||
2869 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) | 2959 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) |
2870 | goto good; | 2960 | return 0; |
2871 | 2961 | ||
2872 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | 2962 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && |
2873 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { | 2963 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { |
@@ -2877,28 +2967,8 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
2877 | } | 2967 | } |
2878 | 2968 | ||
2879 | phy_offset >>= inode->i_sb->s_blocksize_bits; | 2969 | phy_offset >>= inode->i_sb->s_blocksize_bits; |
2880 | csum_expected = *(((u32 *)io_bio->csum) + phy_offset); | 2970 | return __readpage_endio_check(inode, io_bio, phy_offset, page, offset, |
2881 | 2971 | start, (size_t)(end - start + 1)); | |
2882 | kaddr = kmap_atomic(page); | ||
2883 | csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); | ||
2884 | btrfs_csum_final(csum, (char *)&csum); | ||
2885 | if (csum != csum_expected) | ||
2886 | goto zeroit; | ||
2887 | |||
2888 | kunmap_atomic(kaddr); | ||
2889 | good: | ||
2890 | return 0; | ||
2891 | |||
2892 | zeroit: | ||
2893 | if (__ratelimit(&_rs)) | ||
2894 | btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", | ||
2895 | btrfs_ino(page->mapping->host), start, csum, csum_expected); | ||
2896 | memset(kaddr + offset, 1, end - start + 1); | ||
2897 | flush_dcache_page(page); | ||
2898 | kunmap_atomic(kaddr); | ||
2899 | if (csum_expected == 0) | ||
2900 | return 0; | ||
2901 | return -EIO; | ||
2902 | } | 2972 | } |
2903 | 2973 | ||
2904 | struct delayed_iput { | 2974 | struct delayed_iput { |
@@ -3145,7 +3215,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
3145 | path->reada = -1; | 3215 | path->reada = -1; |
3146 | 3216 | ||
3147 | key.objectid = BTRFS_ORPHAN_OBJECTID; | 3217 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
3148 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | 3218 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
3149 | key.offset = (u64)-1; | 3219 | key.offset = (u64)-1; |
3150 | 3220 | ||
3151 | while (1) { | 3221 | while (1) { |
@@ -3172,7 +3242,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
3172 | /* make sure the item matches what we want */ | 3242 | /* make sure the item matches what we want */ |
3173 | if (found_key.objectid != BTRFS_ORPHAN_OBJECTID) | 3243 | if (found_key.objectid != BTRFS_ORPHAN_OBJECTID) |
3174 | break; | 3244 | break; |
3175 | if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY) | 3245 | if (found_key.type != BTRFS_ORPHAN_ITEM_KEY) |
3176 | break; | 3246 | break; |
3177 | 3247 | ||
3178 | /* release the path since we're done with it */ | 3248 | /* release the path since we're done with it */ |
@@ -3648,7 +3718,8 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
3648 | * without delay | 3718 | * without delay |
3649 | */ | 3719 | */ |
3650 | if (!btrfs_is_free_space_inode(inode) | 3720 | if (!btrfs_is_free_space_inode(inode) |
3651 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { | 3721 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID |
3722 | && !root->fs_info->log_root_recovering) { | ||
3652 | btrfs_update_root_times(trans, root); | 3723 | btrfs_update_root_times(trans, root); |
3653 | 3724 | ||
3654 | ret = btrfs_delayed_update_inode(trans, root, inode); | 3725 | ret = btrfs_delayed_update_inode(trans, root, inode); |
@@ -4071,7 +4142,7 @@ search_again: | |||
4071 | fi = NULL; | 4142 | fi = NULL; |
4072 | leaf = path->nodes[0]; | 4143 | leaf = path->nodes[0]; |
4073 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 4144 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
4074 | found_type = btrfs_key_type(&found_key); | 4145 | found_type = found_key.type; |
4075 | 4146 | ||
4076 | if (found_key.objectid != ino) | 4147 | if (found_key.objectid != ino) |
4077 | break; | 4148 | break; |
@@ -4234,7 +4305,8 @@ out: | |||
4234 | btrfs_abort_transaction(trans, root, ret); | 4305 | btrfs_abort_transaction(trans, root, ret); |
4235 | } | 4306 | } |
4236 | error: | 4307 | error: |
4237 | if (last_size != (u64)-1) | 4308 | if (last_size != (u64)-1 && |
4309 | root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) | ||
4238 | btrfs_ordered_update_i_size(inode, last_size, NULL); | 4310 | btrfs_ordered_update_i_size(inode, last_size, NULL); |
4239 | btrfs_free_path(path); | 4311 | btrfs_free_path(path); |
4240 | return err; | 4312 | return err; |
@@ -4674,6 +4746,11 @@ static void evict_inode_truncate_pages(struct inode *inode) | |||
4674 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | 4746 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); |
4675 | remove_extent_mapping(map_tree, em); | 4747 | remove_extent_mapping(map_tree, em); |
4676 | free_extent_map(em); | 4748 | free_extent_map(em); |
4749 | if (need_resched()) { | ||
4750 | write_unlock(&map_tree->lock); | ||
4751 | cond_resched(); | ||
4752 | write_lock(&map_tree->lock); | ||
4753 | } | ||
4677 | } | 4754 | } |
4678 | write_unlock(&map_tree->lock); | 4755 | write_unlock(&map_tree->lock); |
4679 | 4756 | ||
@@ -4696,6 +4773,7 @@ static void evict_inode_truncate_pages(struct inode *inode) | |||
4696 | &cached_state, GFP_NOFS); | 4773 | &cached_state, GFP_NOFS); |
4697 | free_extent_state(state); | 4774 | free_extent_state(state); |
4698 | 4775 | ||
4776 | cond_resched(); | ||
4699 | spin_lock(&io_tree->lock); | 4777 | spin_lock(&io_tree->lock); |
4700 | } | 4778 | } |
4701 | spin_unlock(&io_tree->lock); | 4779 | spin_unlock(&io_tree->lock); |
@@ -4726,6 +4804,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
4726 | /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ | 4804 | /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ |
4727 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 4805 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
4728 | 4806 | ||
4807 | btrfs_free_io_failure_record(inode, 0, (u64)-1); | ||
4808 | |||
4729 | if (root->fs_info->log_root_recovering) { | 4809 | if (root->fs_info->log_root_recovering) { |
4730 | BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, | 4810 | BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
4731 | &BTRFS_I(inode)->runtime_flags)); | 4811 | &BTRFS_I(inode)->runtime_flags)); |
@@ -5181,6 +5261,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
5181 | iput(inode); | 5261 | iput(inode); |
5182 | inode = ERR_PTR(ret); | 5262 | inode = ERR_PTR(ret); |
5183 | } | 5263 | } |
5264 | /* | ||
5265 | * If orphan cleanup did remove any orphans, it means the tree | ||
5266 | * was modified and therefore the commit root is not the same as | ||
5267 | * the current root anymore. This is a problem, because send | ||
5268 | * uses the commit root and therefore can see inode items that | ||
5269 | * don't exist in the current root anymore, and for example make | ||
5270 | * calls to btrfs_iget, which will do tree lookups based on the | ||
5271 | * current root and not on the commit root. Those lookups will | ||
5272 | * fail, returning a -ESTALE error, and making send fail with | ||
5273 | * that error. So make sure a send does not see any orphans we | ||
5274 | * have just removed, and that it will see the same inodes | ||
5275 | * regardless of whether a transaction commit happened before | ||
5276 | * it started (meaning that the commit root will be the same as | ||
5277 | * the current root) or not. | ||
5278 | */ | ||
5279 | if (sub_root->node != sub_root->commit_root) { | ||
5280 | u64 sub_flags = btrfs_root_flags(&sub_root->root_item); | ||
5281 | |||
5282 | if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) { | ||
5283 | struct extent_buffer *eb; | ||
5284 | |||
5285 | /* | ||
5286 | * Assert we can't have races between dentry | ||
5287 | * lookup called through the snapshot creation | ||
5288 | * ioctl and the VFS. | ||
5289 | */ | ||
5290 | ASSERT(mutex_is_locked(&dir->i_mutex)); | ||
5291 | |||
5292 | down_write(&root->fs_info->commit_root_sem); | ||
5293 | eb = sub_root->commit_root; | ||
5294 | sub_root->commit_root = | ||
5295 | btrfs_root_node(sub_root); | ||
5296 | up_write(&root->fs_info->commit_root_sem); | ||
5297 | free_extent_buffer(eb); | ||
5298 | } | ||
5299 | } | ||
5184 | } | 5300 | } |
5185 | 5301 | ||
5186 | return inode; | 5302 | return inode; |
@@ -5274,7 +5390,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5274 | btrfs_get_delayed_items(inode, &ins_list, &del_list); | 5390 | btrfs_get_delayed_items(inode, &ins_list, &del_list); |
5275 | } | 5391 | } |
5276 | 5392 | ||
5277 | btrfs_set_key_type(&key, key_type); | 5393 | key.type = key_type; |
5278 | key.offset = ctx->pos; | 5394 | key.offset = ctx->pos; |
5279 | key.objectid = btrfs_ino(inode); | 5395 | key.objectid = btrfs_ino(inode); |
5280 | 5396 | ||
@@ -5299,7 +5415,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5299 | 5415 | ||
5300 | if (found_key.objectid != key.objectid) | 5416 | if (found_key.objectid != key.objectid) |
5301 | break; | 5417 | break; |
5302 | if (btrfs_key_type(&found_key) != key_type) | 5418 | if (found_key.type != key_type) |
5303 | break; | 5419 | break; |
5304 | if (found_key.offset < ctx->pos) | 5420 | if (found_key.offset < ctx->pos) |
5305 | goto next; | 5421 | goto next; |
@@ -5511,7 +5627,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) | |||
5511 | int ret; | 5627 | int ret; |
5512 | 5628 | ||
5513 | key.objectid = btrfs_ino(inode); | 5629 | key.objectid = btrfs_ino(inode); |
5514 | btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); | 5630 | key.type = BTRFS_DIR_INDEX_KEY; |
5515 | key.offset = (u64)-1; | 5631 | key.offset = (u64)-1; |
5516 | 5632 | ||
5517 | path = btrfs_alloc_path(); | 5633 | path = btrfs_alloc_path(); |
@@ -5543,7 +5659,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) | |||
5543 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 5659 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
5544 | 5660 | ||
5545 | if (found_key.objectid != btrfs_ino(inode) || | 5661 | if (found_key.objectid != btrfs_ino(inode) || |
5546 | btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { | 5662 | found_key.type != BTRFS_DIR_INDEX_KEY) { |
5547 | BTRFS_I(inode)->index_cnt = 2; | 5663 | BTRFS_I(inode)->index_cnt = 2; |
5548 | goto out; | 5664 | goto out; |
5549 | } | 5665 | } |
@@ -5577,6 +5693,17 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index) | |||
5577 | return ret; | 5693 | return ret; |
5578 | } | 5694 | } |
5579 | 5695 | ||
5696 | static int btrfs_insert_inode_locked(struct inode *inode) | ||
5697 | { | ||
5698 | struct btrfs_iget_args args; | ||
5699 | args.location = &BTRFS_I(inode)->location; | ||
5700 | args.root = BTRFS_I(inode)->root; | ||
5701 | |||
5702 | return insert_inode_locked4(inode, | ||
5703 | btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root), | ||
5704 | btrfs_find_actor, &args); | ||
5705 | } | ||
5706 | |||
5580 | static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | 5707 | static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, |
5581 | struct btrfs_root *root, | 5708 | struct btrfs_root *root, |
5582 | struct inode *dir, | 5709 | struct inode *dir, |
@@ -5606,6 +5733,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5606 | } | 5733 | } |
5607 | 5734 | ||
5608 | /* | 5735 | /* |
5736 | * O_TMPFILE, set link count to 0, so that after this point, | ||
5737 | * we fill in an inode item with the correct link count. | ||
5738 | */ | ||
5739 | if (!name) | ||
5740 | set_nlink(inode, 0); | ||
5741 | |||
5742 | /* | ||
5609 | * we have to initialize this early, so we can reclaim the inode | 5743 | * we have to initialize this early, so we can reclaim the inode |
5610 | * number if we fail afterwards in this function. | 5744 | * number if we fail afterwards in this function. |
5611 | */ | 5745 | */ |
@@ -5643,7 +5777,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5643 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); | 5777 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); |
5644 | 5778 | ||
5645 | key[0].objectid = objectid; | 5779 | key[0].objectid = objectid; |
5646 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | 5780 | key[0].type = BTRFS_INODE_ITEM_KEY; |
5647 | key[0].offset = 0; | 5781 | key[0].offset = 0; |
5648 | 5782 | ||
5649 | sizes[0] = sizeof(struct btrfs_inode_item); | 5783 | sizes[0] = sizeof(struct btrfs_inode_item); |
@@ -5656,16 +5790,25 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5656 | * add more hard links than can fit in the ref item. | 5790 | * add more hard links than can fit in the ref item. |
5657 | */ | 5791 | */ |
5658 | key[1].objectid = objectid; | 5792 | key[1].objectid = objectid; |
5659 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); | 5793 | key[1].type = BTRFS_INODE_REF_KEY; |
5660 | key[1].offset = ref_objectid; | 5794 | key[1].offset = ref_objectid; |
5661 | 5795 | ||
5662 | sizes[1] = name_len + sizeof(*ref); | 5796 | sizes[1] = name_len + sizeof(*ref); |
5663 | } | 5797 | } |
5664 | 5798 | ||
5799 | location = &BTRFS_I(inode)->location; | ||
5800 | location->objectid = objectid; | ||
5801 | location->offset = 0; | ||
5802 | location->type = BTRFS_INODE_ITEM_KEY; | ||
5803 | |||
5804 | ret = btrfs_insert_inode_locked(inode); | ||
5805 | if (ret < 0) | ||
5806 | goto fail; | ||
5807 | |||
5665 | path->leave_spinning = 1; | 5808 | path->leave_spinning = 1; |
5666 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); | 5809 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); |
5667 | if (ret != 0) | 5810 | if (ret != 0) |
5668 | goto fail; | 5811 | goto fail_unlock; |
5669 | 5812 | ||
5670 | inode_init_owner(inode, dir, mode); | 5813 | inode_init_owner(inode, dir, mode); |
5671 | inode_set_bytes(inode, 0); | 5814 | inode_set_bytes(inode, 0); |
@@ -5688,11 +5831,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5688 | btrfs_mark_buffer_dirty(path->nodes[0]); | 5831 | btrfs_mark_buffer_dirty(path->nodes[0]); |
5689 | btrfs_free_path(path); | 5832 | btrfs_free_path(path); |
5690 | 5833 | ||
5691 | location = &BTRFS_I(inode)->location; | ||
5692 | location->objectid = objectid; | ||
5693 | location->offset = 0; | ||
5694 | btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); | ||
5695 | |||
5696 | btrfs_inherit_iflags(inode, dir); | 5834 | btrfs_inherit_iflags(inode, dir); |
5697 | 5835 | ||
5698 | if (S_ISREG(mode)) { | 5836 | if (S_ISREG(mode)) { |
@@ -5703,7 +5841,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5703 | BTRFS_INODE_NODATASUM; | 5841 | BTRFS_INODE_NODATASUM; |
5704 | } | 5842 | } |
5705 | 5843 | ||
5706 | btrfs_insert_inode_hash(inode); | ||
5707 | inode_tree_add(inode); | 5844 | inode_tree_add(inode); |
5708 | 5845 | ||
5709 | trace_btrfs_inode_new(inode); | 5846 | trace_btrfs_inode_new(inode); |
@@ -5718,6 +5855,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5718 | btrfs_ino(inode), root->root_key.objectid, ret); | 5855 | btrfs_ino(inode), root->root_key.objectid, ret); |
5719 | 5856 | ||
5720 | return inode; | 5857 | return inode; |
5858 | |||
5859 | fail_unlock: | ||
5860 | unlock_new_inode(inode); | ||
5721 | fail: | 5861 | fail: |
5722 | if (dir && name) | 5862 | if (dir && name) |
5723 | BTRFS_I(dir)->index_cnt--; | 5863 | BTRFS_I(dir)->index_cnt--; |
@@ -5751,7 +5891,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
5751 | memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); | 5891 | memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); |
5752 | } else { | 5892 | } else { |
5753 | key.objectid = ino; | 5893 | key.objectid = ino; |
5754 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 5894 | key.type = BTRFS_INODE_ITEM_KEY; |
5755 | key.offset = 0; | 5895 | key.offset = 0; |
5756 | } | 5896 | } |
5757 | 5897 | ||
@@ -5852,28 +5992,28 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
5852 | goto out_unlock; | 5992 | goto out_unlock; |
5853 | } | 5993 | } |
5854 | 5994 | ||
5855 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); | ||
5856 | if (err) { | ||
5857 | drop_inode = 1; | ||
5858 | goto out_unlock; | ||
5859 | } | ||
5860 | |||
5861 | /* | 5995 | /* |
5862 | * If the active LSM wants to access the inode during | 5996 | * If the active LSM wants to access the inode during |
5863 | * d_instantiate it needs these. Smack checks to see | 5997 | * d_instantiate it needs these. Smack checks to see |
5864 | * if the filesystem supports xattrs by looking at the | 5998 | * if the filesystem supports xattrs by looking at the |
5865 | * ops vector. | 5999 | * ops vector. |
5866 | */ | 6000 | */ |
5867 | |||
5868 | inode->i_op = &btrfs_special_inode_operations; | 6001 | inode->i_op = &btrfs_special_inode_operations; |
5869 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 6002 | init_special_inode(inode, inode->i_mode, rdev); |
6003 | |||
6004 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); | ||
5870 | if (err) | 6005 | if (err) |
5871 | drop_inode = 1; | 6006 | goto out_unlock_inode; |
5872 | else { | 6007 | |
5873 | init_special_inode(inode, inode->i_mode, rdev); | 6008 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
6009 | if (err) { | ||
6010 | goto out_unlock_inode; | ||
6011 | } else { | ||
5874 | btrfs_update_inode(trans, root, inode); | 6012 | btrfs_update_inode(trans, root, inode); |
6013 | unlock_new_inode(inode); | ||
5875 | d_instantiate(dentry, inode); | 6014 | d_instantiate(dentry, inode); |
5876 | } | 6015 | } |
6016 | |||
5877 | out_unlock: | 6017 | out_unlock: |
5878 | btrfs_end_transaction(trans, root); | 6018 | btrfs_end_transaction(trans, root); |
5879 | btrfs_balance_delayed_items(root); | 6019 | btrfs_balance_delayed_items(root); |
@@ -5883,6 +6023,12 @@ out_unlock: | |||
5883 | iput(inode); | 6023 | iput(inode); |
5884 | } | 6024 | } |
5885 | return err; | 6025 | return err; |
6026 | |||
6027 | out_unlock_inode: | ||
6028 | drop_inode = 1; | ||
6029 | unlock_new_inode(inode); | ||
6030 | goto out_unlock; | ||
6031 | |||
5886 | } | 6032 | } |
5887 | 6033 | ||
5888 | static int btrfs_create(struct inode *dir, struct dentry *dentry, | 6034 | static int btrfs_create(struct inode *dir, struct dentry *dentry, |
@@ -5917,15 +6063,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
5917 | goto out_unlock; | 6063 | goto out_unlock; |
5918 | } | 6064 | } |
5919 | drop_inode_on_err = 1; | 6065 | drop_inode_on_err = 1; |
5920 | |||
5921 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); | ||
5922 | if (err) | ||
5923 | goto out_unlock; | ||
5924 | |||
5925 | err = btrfs_update_inode(trans, root, inode); | ||
5926 | if (err) | ||
5927 | goto out_unlock; | ||
5928 | |||
5929 | /* | 6066 | /* |
5930 | * If the active LSM wants to access the inode during | 6067 | * If the active LSM wants to access the inode during |
5931 | * d_instantiate it needs these. Smack checks to see | 6068 | * d_instantiate it needs these. Smack checks to see |
@@ -5934,14 +6071,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
5934 | */ | 6071 | */ |
5935 | inode->i_fop = &btrfs_file_operations; | 6072 | inode->i_fop = &btrfs_file_operations; |
5936 | inode->i_op = &btrfs_file_inode_operations; | 6073 | inode->i_op = &btrfs_file_inode_operations; |
6074 | inode->i_mapping->a_ops = &btrfs_aops; | ||
6075 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
6076 | |||
6077 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); | ||
6078 | if (err) | ||
6079 | goto out_unlock_inode; | ||
6080 | |||
6081 | err = btrfs_update_inode(trans, root, inode); | ||
6082 | if (err) | ||
6083 | goto out_unlock_inode; | ||
5937 | 6084 | ||
5938 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 6085 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
5939 | if (err) | 6086 | if (err) |
5940 | goto out_unlock; | 6087 | goto out_unlock_inode; |
5941 | 6088 | ||
5942 | inode->i_mapping->a_ops = &btrfs_aops; | ||
5943 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
5944 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 6089 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
6090 | unlock_new_inode(inode); | ||
5945 | d_instantiate(dentry, inode); | 6091 | d_instantiate(dentry, inode); |
5946 | 6092 | ||
5947 | out_unlock: | 6093 | out_unlock: |
@@ -5953,6 +6099,11 @@ out_unlock: | |||
5953 | btrfs_balance_delayed_items(root); | 6099 | btrfs_balance_delayed_items(root); |
5954 | btrfs_btree_balance_dirty(root); | 6100 | btrfs_btree_balance_dirty(root); |
5955 | return err; | 6101 | return err; |
6102 | |||
6103 | out_unlock_inode: | ||
6104 | unlock_new_inode(inode); | ||
6105 | goto out_unlock; | ||
6106 | |||
5956 | } | 6107 | } |
5957 | 6108 | ||
5958 | static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | 6109 | static int btrfs_link(struct dentry *old_dentry, struct inode *dir, |
@@ -6060,25 +6211,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
6060 | } | 6211 | } |
6061 | 6212 | ||
6062 | drop_on_err = 1; | 6213 | drop_on_err = 1; |
6214 | /* these must be set before we unlock the inode */ | ||
6215 | inode->i_op = &btrfs_dir_inode_operations; | ||
6216 | inode->i_fop = &btrfs_dir_file_operations; | ||
6063 | 6217 | ||
6064 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); | 6218 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
6065 | if (err) | 6219 | if (err) |
6066 | goto out_fail; | 6220 | goto out_fail_inode; |
6067 | |||
6068 | inode->i_op = &btrfs_dir_inode_operations; | ||
6069 | inode->i_fop = &btrfs_dir_file_operations; | ||
6070 | 6221 | ||
6071 | btrfs_i_size_write(inode, 0); | 6222 | btrfs_i_size_write(inode, 0); |
6072 | err = btrfs_update_inode(trans, root, inode); | 6223 | err = btrfs_update_inode(trans, root, inode); |
6073 | if (err) | 6224 | if (err) |
6074 | goto out_fail; | 6225 | goto out_fail_inode; |
6075 | 6226 | ||
6076 | err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, | 6227 | err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, |
6077 | dentry->d_name.len, 0, index); | 6228 | dentry->d_name.len, 0, index); |
6078 | if (err) | 6229 | if (err) |
6079 | goto out_fail; | 6230 | goto out_fail_inode; |
6080 | 6231 | ||
6081 | d_instantiate(dentry, inode); | 6232 | d_instantiate(dentry, inode); |
6233 | /* | ||
6234 | * mkdir is special. We're unlocking after we call d_instantiate | ||
6235 | * to avoid a race with nfsd calling d_instantiate. | ||
6236 | */ | ||
6237 | unlock_new_inode(inode); | ||
6082 | drop_on_err = 0; | 6238 | drop_on_err = 0; |
6083 | 6239 | ||
6084 | out_fail: | 6240 | out_fail: |
@@ -6088,23 +6244,66 @@ out_fail: | |||
6088 | btrfs_balance_delayed_items(root); | 6244 | btrfs_balance_delayed_items(root); |
6089 | btrfs_btree_balance_dirty(root); | 6245 | btrfs_btree_balance_dirty(root); |
6090 | return err; | 6246 | return err; |
6247 | |||
6248 | out_fail_inode: | ||
6249 | unlock_new_inode(inode); | ||
6250 | goto out_fail; | ||
6251 | } | ||
6252 | |||
6253 | /* Find next extent map of a given extent map, caller needs to ensure locks */ | ||
6254 | static struct extent_map *next_extent_map(struct extent_map *em) | ||
6255 | { | ||
6256 | struct rb_node *next; | ||
6257 | |||
6258 | next = rb_next(&em->rb_node); | ||
6259 | if (!next) | ||
6260 | return NULL; | ||
6261 | return container_of(next, struct extent_map, rb_node); | ||
6262 | } | ||
6263 | |||
6264 | static struct extent_map *prev_extent_map(struct extent_map *em) | ||
6265 | { | ||
6266 | struct rb_node *prev; | ||
6267 | |||
6268 | prev = rb_prev(&em->rb_node); | ||
6269 | if (!prev) | ||
6270 | return NULL; | ||
6271 | return container_of(prev, struct extent_map, rb_node); | ||
6091 | } | 6272 | } |
6092 | 6273 | ||
6093 | /* helper for btfs_get_extent. Given an existing extent in the tree, | 6274 | /* helper for btfs_get_extent. Given an existing extent in the tree, |
6275 | * the existing extent is the nearest extent to map_start, | ||
6094 | * and an extent that you want to insert, deal with overlap and insert | 6276 | * and an extent that you want to insert, deal with overlap and insert |
6095 | * the new extent into the tree. | 6277 | * the best fitted new extent into the tree. |
6096 | */ | 6278 | */ |
6097 | static int merge_extent_mapping(struct extent_map_tree *em_tree, | 6279 | static int merge_extent_mapping(struct extent_map_tree *em_tree, |
6098 | struct extent_map *existing, | 6280 | struct extent_map *existing, |
6099 | struct extent_map *em, | 6281 | struct extent_map *em, |
6100 | u64 map_start, u64 map_len) | 6282 | u64 map_start) |
6101 | { | 6283 | { |
6284 | struct extent_map *prev; | ||
6285 | struct extent_map *next; | ||
6286 | u64 start; | ||
6287 | u64 end; | ||
6102 | u64 start_diff; | 6288 | u64 start_diff; |
6103 | 6289 | ||
6104 | BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); | 6290 | BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); |
6105 | start_diff = map_start - em->start; | 6291 | |
6106 | em->start = map_start; | 6292 | if (existing->start > map_start) { |
6107 | em->len = map_len; | 6293 | next = existing; |
6294 | prev = prev_extent_map(next); | ||
6295 | } else { | ||
6296 | prev = existing; | ||
6297 | next = next_extent_map(prev); | ||
6298 | } | ||
6299 | |||
6300 | start = prev ? extent_map_end(prev) : em->start; | ||
6301 | start = max_t(u64, start, em->start); | ||
6302 | end = next ? next->start : extent_map_end(em); | ||
6303 | end = min_t(u64, end, extent_map_end(em)); | ||
6304 | start_diff = start - em->start; | ||
6305 | em->start = start; | ||
6306 | em->len = end - start; | ||
6108 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | 6307 | if (em->block_start < EXTENT_MAP_LAST_BYTE && |
6109 | !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 6308 | !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
6110 | em->block_start += start_diff; | 6309 | em->block_start += start_diff; |
@@ -6232,7 +6431,7 @@ again: | |||
6232 | struct btrfs_file_extent_item); | 6431 | struct btrfs_file_extent_item); |
6233 | /* are we inside the extent that was found? */ | 6432 | /* are we inside the extent that was found? */ |
6234 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 6433 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
6235 | found_type = btrfs_key_type(&found_key); | 6434 | found_type = found_key.type; |
6236 | if (found_key.objectid != objectid || | 6435 | if (found_key.objectid != objectid || |
6237 | found_type != BTRFS_EXTENT_DATA_KEY) { | 6436 | found_type != BTRFS_EXTENT_DATA_KEY) { |
6238 | /* | 6437 | /* |
@@ -6275,6 +6474,8 @@ next: | |||
6275 | goto not_found; | 6474 | goto not_found; |
6276 | if (start + len <= found_key.offset) | 6475 | if (start + len <= found_key.offset) |
6277 | goto not_found; | 6476 | goto not_found; |
6477 | if (start > found_key.offset) | ||
6478 | goto next; | ||
6278 | em->start = start; | 6479 | em->start = start; |
6279 | em->orig_start = start; | 6480 | em->orig_start = start; |
6280 | em->len = found_key.offset - start; | 6481 | em->len = found_key.offset - start; |
@@ -6379,26 +6580,21 @@ insert: | |||
6379 | 6580 | ||
6380 | ret = 0; | 6581 | ret = 0; |
6381 | 6582 | ||
6382 | existing = lookup_extent_mapping(em_tree, start, len); | 6583 | existing = search_extent_mapping(em_tree, start, len); |
6383 | if (existing && (existing->start > start || | 6584 | /* |
6384 | existing->start + existing->len <= start)) { | 6585 | * existing will always be non-NULL, since there must be |
6586 | * extent causing the -EEXIST. | ||
6587 | */ | ||
6588 | if (start >= extent_map_end(existing) || | ||
6589 | start <= existing->start) { | ||
6590 | /* | ||
6591 | * The existing extent map is the one nearest to | ||
6592 | * the [start, start + len) range which overlaps | ||
6593 | */ | ||
6594 | err = merge_extent_mapping(em_tree, existing, | ||
6595 | em, start); | ||
6385 | free_extent_map(existing); | 6596 | free_extent_map(existing); |
6386 | existing = NULL; | 6597 | if (err) { |
6387 | } | ||
6388 | if (!existing) { | ||
6389 | existing = lookup_extent_mapping(em_tree, em->start, | ||
6390 | em->len); | ||
6391 | if (existing) { | ||
6392 | err = merge_extent_mapping(em_tree, existing, | ||
6393 | em, start, | ||
6394 | root->sectorsize); | ||
6395 | free_extent_map(existing); | ||
6396 | if (err) { | ||
6397 | free_extent_map(em); | ||
6398 | em = NULL; | ||
6399 | } | ||
6400 | } else { | ||
6401 | err = -EIO; | ||
6402 | free_extent_map(em); | 6598 | free_extent_map(em); |
6403 | em = NULL; | 6599 | em = NULL; |
6404 | } | 6600 | } |
@@ -7010,8 +7206,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
7010 | block_start, len, | 7206 | block_start, len, |
7011 | orig_block_len, | 7207 | orig_block_len, |
7012 | ram_bytes, type); | 7208 | ram_bytes, type); |
7013 | if (IS_ERR(em)) | 7209 | if (IS_ERR(em)) { |
7210 | ret = PTR_ERR(em); | ||
7014 | goto unlock_err; | 7211 | goto unlock_err; |
7212 | } | ||
7015 | } | 7213 | } |
7016 | 7214 | ||
7017 | ret = btrfs_add_ordered_extent_dio(inode, start, | 7215 | ret = btrfs_add_ordered_extent_dio(inode, start, |
@@ -7086,45 +7284,277 @@ unlock_err: | |||
7086 | return ret; | 7284 | return ret; |
7087 | } | 7285 | } |
7088 | 7286 | ||
7089 | static void btrfs_endio_direct_read(struct bio *bio, int err) | 7287 | static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio, |
7288 | int rw, int mirror_num) | ||
7090 | { | 7289 | { |
7091 | struct btrfs_dio_private *dip = bio->bi_private; | ||
7092 | struct bio_vec *bvec; | ||
7093 | struct inode *inode = dip->inode; | ||
7094 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7290 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7095 | struct bio *dio_bio; | 7291 | int ret; |
7096 | u32 *csums = (u32 *)dip->csum; | 7292 | |
7293 | BUG_ON(rw & REQ_WRITE); | ||
7294 | |||
7295 | bio_get(bio); | ||
7296 | |||
7297 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, | ||
7298 | BTRFS_WQ_ENDIO_DIO_REPAIR); | ||
7299 | if (ret) | ||
7300 | goto err; | ||
7301 | |||
7302 | ret = btrfs_map_bio(root, rw, bio, mirror_num, 0); | ||
7303 | err: | ||
7304 | bio_put(bio); | ||
7305 | return ret; | ||
7306 | } | ||
7307 | |||
7308 | static int btrfs_check_dio_repairable(struct inode *inode, | ||
7309 | struct bio *failed_bio, | ||
7310 | struct io_failure_record *failrec, | ||
7311 | int failed_mirror) | ||
7312 | { | ||
7313 | int num_copies; | ||
7314 | |||
7315 | num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, | ||
7316 | failrec->logical, failrec->len); | ||
7317 | if (num_copies == 1) { | ||
7318 | /* | ||
7319 | * we only have a single copy of the data, so don't bother with | ||
7320 | * all the retry and error correction code that follows. no | ||
7321 | * matter what the error is, it is very likely to persist. | ||
7322 | */ | ||
7323 | pr_debug("Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", | ||
7324 | num_copies, failrec->this_mirror, failed_mirror); | ||
7325 | return 0; | ||
7326 | } | ||
7327 | |||
7328 | failrec->failed_mirror = failed_mirror; | ||
7329 | failrec->this_mirror++; | ||
7330 | if (failrec->this_mirror == failed_mirror) | ||
7331 | failrec->this_mirror++; | ||
7332 | |||
7333 | if (failrec->this_mirror > num_copies) { | ||
7334 | pr_debug("Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", | ||
7335 | num_copies, failrec->this_mirror, failed_mirror); | ||
7336 | return 0; | ||
7337 | } | ||
7338 | |||
7339 | return 1; | ||
7340 | } | ||
7341 | |||
7342 | static int dio_read_error(struct inode *inode, struct bio *failed_bio, | ||
7343 | struct page *page, u64 start, u64 end, | ||
7344 | int failed_mirror, bio_end_io_t *repair_endio, | ||
7345 | void *repair_arg) | ||
7346 | { | ||
7347 | struct io_failure_record *failrec; | ||
7348 | struct bio *bio; | ||
7349 | int isector; | ||
7350 | int read_mode; | ||
7351 | int ret; | ||
7352 | |||
7353 | BUG_ON(failed_bio->bi_rw & REQ_WRITE); | ||
7354 | |||
7355 | ret = btrfs_get_io_failure_record(inode, start, end, &failrec); | ||
7356 | if (ret) | ||
7357 | return ret; | ||
7358 | |||
7359 | ret = btrfs_check_dio_repairable(inode, failed_bio, failrec, | ||
7360 | failed_mirror); | ||
7361 | if (!ret) { | ||
7362 | free_io_failure(inode, failrec); | ||
7363 | return -EIO; | ||
7364 | } | ||
7365 | |||
7366 | if (failed_bio->bi_vcnt > 1) | ||
7367 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; | ||
7368 | else | ||
7369 | read_mode = READ_SYNC; | ||
7370 | |||
7371 | isector = start - btrfs_io_bio(failed_bio)->logical; | ||
7372 | isector >>= inode->i_sb->s_blocksize_bits; | ||
7373 | bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, | ||
7374 | 0, isector, repair_endio, repair_arg); | ||
7375 | if (!bio) { | ||
7376 | free_io_failure(inode, failrec); | ||
7377 | return -EIO; | ||
7378 | } | ||
7379 | |||
7380 | btrfs_debug(BTRFS_I(inode)->root->fs_info, | ||
7381 | "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n", | ||
7382 | read_mode, failrec->this_mirror, failrec->in_validation); | ||
7383 | |||
7384 | ret = submit_dio_repair_bio(inode, bio, read_mode, | ||
7385 | failrec->this_mirror); | ||
7386 | if (ret) { | ||
7387 | free_io_failure(inode, failrec); | ||
7388 | bio_put(bio); | ||
7389 | } | ||
7390 | |||
7391 | return ret; | ||
7392 | } | ||
7393 | |||
7394 | struct btrfs_retry_complete { | ||
7395 | struct completion done; | ||
7396 | struct inode *inode; | ||
7097 | u64 start; | 7397 | u64 start; |
7398 | int uptodate; | ||
7399 | }; | ||
7400 | |||
7401 | static void btrfs_retry_endio_nocsum(struct bio *bio, int err) | ||
7402 | { | ||
7403 | struct btrfs_retry_complete *done = bio->bi_private; | ||
7404 | struct bio_vec *bvec; | ||
7098 | int i; | 7405 | int i; |
7099 | 7406 | ||
7100 | start = dip->logical_offset; | 7407 | if (err) |
7408 | goto end; | ||
7409 | |||
7410 | done->uptodate = 1; | ||
7411 | bio_for_each_segment_all(bvec, bio, i) | ||
7412 | clean_io_failure(done->inode, done->start, bvec->bv_page, 0); | ||
7413 | end: | ||
7414 | complete(&done->done); | ||
7415 | bio_put(bio); | ||
7416 | } | ||
7417 | |||
7418 | static int __btrfs_correct_data_nocsum(struct inode *inode, | ||
7419 | struct btrfs_io_bio *io_bio) | ||
7420 | { | ||
7421 | struct bio_vec *bvec; | ||
7422 | struct btrfs_retry_complete done; | ||
7423 | u64 start; | ||
7424 | int i; | ||
7425 | int ret; | ||
7426 | |||
7427 | start = io_bio->logical; | ||
7428 | done.inode = inode; | ||
7429 | |||
7430 | bio_for_each_segment_all(bvec, &io_bio->bio, i) { | ||
7431 | try_again: | ||
7432 | done.uptodate = 0; | ||
7433 | done.start = start; | ||
7434 | init_completion(&done.done); | ||
7435 | |||
7436 | ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, | ||
7437 | start + bvec->bv_len - 1, | ||
7438 | io_bio->mirror_num, | ||
7439 | btrfs_retry_endio_nocsum, &done); | ||
7440 | if (ret) | ||
7441 | return ret; | ||
7442 | |||
7443 | wait_for_completion(&done.done); | ||
7444 | |||
7445 | if (!done.uptodate) { | ||
7446 | /* We might have another mirror, so try again */ | ||
7447 | goto try_again; | ||
7448 | } | ||
7449 | |||
7450 | start += bvec->bv_len; | ||
7451 | } | ||
7452 | |||
7453 | return 0; | ||
7454 | } | ||
7455 | |||
7456 | static void btrfs_retry_endio(struct bio *bio, int err) | ||
7457 | { | ||
7458 | struct btrfs_retry_complete *done = bio->bi_private; | ||
7459 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | ||
7460 | struct bio_vec *bvec; | ||
7461 | int uptodate; | ||
7462 | int ret; | ||
7463 | int i; | ||
7464 | |||
7465 | if (err) | ||
7466 | goto end; | ||
7467 | |||
7468 | uptodate = 1; | ||
7101 | bio_for_each_segment_all(bvec, bio, i) { | 7469 | bio_for_each_segment_all(bvec, bio, i) { |
7102 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | 7470 | ret = __readpage_endio_check(done->inode, io_bio, i, |
7103 | struct page *page = bvec->bv_page; | 7471 | bvec->bv_page, 0, |
7104 | char *kaddr; | 7472 | done->start, bvec->bv_len); |
7105 | u32 csum = ~(u32)0; | 7473 | if (!ret) |
7106 | unsigned long flags; | 7474 | clean_io_failure(done->inode, done->start, |
7107 | 7475 | bvec->bv_page, 0); | |
7108 | local_irq_save(flags); | 7476 | else |
7109 | kaddr = kmap_atomic(page); | 7477 | uptodate = 0; |
7110 | csum = btrfs_csum_data(kaddr + bvec->bv_offset, | 7478 | } |
7111 | csum, bvec->bv_len); | 7479 | |
7112 | btrfs_csum_final(csum, (char *)&csum); | 7480 | done->uptodate = uptodate; |
7113 | kunmap_atomic(kaddr); | 7481 | end: |
7114 | local_irq_restore(flags); | 7482 | complete(&done->done); |
7115 | 7483 | bio_put(bio); | |
7116 | flush_dcache_page(bvec->bv_page); | 7484 | } |
7117 | if (csum != csums[i]) { | 7485 | |
7118 | btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", | 7486 | static int __btrfs_subio_endio_read(struct inode *inode, |
7119 | btrfs_ino(inode), start, csum, | 7487 | struct btrfs_io_bio *io_bio, int err) |
7120 | csums[i]); | 7488 | { |
7121 | err = -EIO; | 7489 | struct bio_vec *bvec; |
7122 | } | 7490 | struct btrfs_retry_complete done; |
7491 | u64 start; | ||
7492 | u64 offset = 0; | ||
7493 | int i; | ||
7494 | int ret; | ||
7495 | |||
7496 | err = 0; | ||
7497 | start = io_bio->logical; | ||
7498 | done.inode = inode; | ||
7499 | |||
7500 | bio_for_each_segment_all(bvec, &io_bio->bio, i) { | ||
7501 | ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, | ||
7502 | 0, start, bvec->bv_len); | ||
7503 | if (likely(!ret)) | ||
7504 | goto next; | ||
7505 | try_again: | ||
7506 | done.uptodate = 0; | ||
7507 | done.start = start; | ||
7508 | init_completion(&done.done); | ||
7509 | |||
7510 | ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, | ||
7511 | start + bvec->bv_len - 1, | ||
7512 | io_bio->mirror_num, | ||
7513 | btrfs_retry_endio, &done); | ||
7514 | if (ret) { | ||
7515 | err = ret; | ||
7516 | goto next; | ||
7123 | } | 7517 | } |
7124 | 7518 | ||
7519 | wait_for_completion(&done.done); | ||
7520 | |||
7521 | if (!done.uptodate) { | ||
7522 | /* We might have another mirror, so try again */ | ||
7523 | goto try_again; | ||
7524 | } | ||
7525 | next: | ||
7526 | offset += bvec->bv_len; | ||
7125 | start += bvec->bv_len; | 7527 | start += bvec->bv_len; |
7126 | } | 7528 | } |
7127 | 7529 | ||
7530 | return err; | ||
7531 | } | ||
7532 | |||
7533 | static int btrfs_subio_endio_read(struct inode *inode, | ||
7534 | struct btrfs_io_bio *io_bio, int err) | ||
7535 | { | ||
7536 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
7537 | |||
7538 | if (skip_csum) { | ||
7539 | if (unlikely(err)) | ||
7540 | return __btrfs_correct_data_nocsum(inode, io_bio); | ||
7541 | else | ||
7542 | return 0; | ||
7543 | } else { | ||
7544 | return __btrfs_subio_endio_read(inode, io_bio, err); | ||
7545 | } | ||
7546 | } | ||
7547 | |||
7548 | static void btrfs_endio_direct_read(struct bio *bio, int err) | ||
7549 | { | ||
7550 | struct btrfs_dio_private *dip = bio->bi_private; | ||
7551 | struct inode *inode = dip->inode; | ||
7552 | struct bio *dio_bio; | ||
7553 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | ||
7554 | |||
7555 | if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) | ||
7556 | err = btrfs_subio_endio_read(inode, io_bio, err); | ||
7557 | |||
7128 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, | 7558 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, |
7129 | dip->logical_offset + dip->bytes - 1); | 7559 | dip->logical_offset + dip->bytes - 1); |
7130 | dio_bio = dip->dio_bio; | 7560 | dio_bio = dip->dio_bio; |
@@ -7135,6 +7565,9 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
7135 | if (err) | 7565 | if (err) |
7136 | clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); | 7566 | clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); |
7137 | dio_end_io(dio_bio, err); | 7567 | dio_end_io(dio_bio, err); |
7568 | |||
7569 | if (io_bio->end_io) | ||
7570 | io_bio->end_io(io_bio, err); | ||
7138 | bio_put(bio); | 7571 | bio_put(bio); |
7139 | } | 7572 | } |
7140 | 7573 | ||
@@ -7158,7 +7591,8 @@ again: | |||
7158 | if (!ret) | 7591 | if (!ret) |
7159 | goto out_test; | 7592 | goto out_test; |
7160 | 7593 | ||
7161 | btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); | 7594 | btrfs_init_work(&ordered->work, btrfs_endio_write_helper, |
7595 | finish_ordered_fn, NULL, NULL); | ||
7162 | btrfs_queue_work(root->fs_info->endio_write_workers, | 7596 | btrfs_queue_work(root->fs_info->endio_write_workers, |
7163 | &ordered->work); | 7597 | &ordered->work); |
7164 | out_test: | 7598 | out_test: |
@@ -7199,12 +7633,17 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) | |||
7199 | { | 7633 | { |
7200 | struct btrfs_dio_private *dip = bio->bi_private; | 7634 | struct btrfs_dio_private *dip = bio->bi_private; |
7201 | 7635 | ||
7636 | if (err) | ||
7637 | btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, | ||
7638 | "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d", | ||
7639 | btrfs_ino(dip->inode), bio->bi_rw, | ||
7640 | (unsigned long long)bio->bi_iter.bi_sector, | ||
7641 | bio->bi_iter.bi_size, err); | ||
7642 | |||
7643 | if (dip->subio_endio) | ||
7644 | err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err); | ||
7645 | |||
7202 | if (err) { | 7646 | if (err) { |
7203 | btrfs_err(BTRFS_I(dip->inode)->root->fs_info, | ||
7204 | "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d", | ||
7205 | btrfs_ino(dip->inode), bio->bi_rw, | ||
7206 | (unsigned long long)bio->bi_iter.bi_sector, | ||
7207 | bio->bi_iter.bi_size, err); | ||
7208 | dip->errors = 1; | 7647 | dip->errors = 1; |
7209 | 7648 | ||
7210 | /* | 7649 | /* |
@@ -7235,6 +7674,38 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | |||
7235 | return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); | 7674 | return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); |
7236 | } | 7675 | } |
7237 | 7676 | ||
7677 | static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root, | ||
7678 | struct inode *inode, | ||
7679 | struct btrfs_dio_private *dip, | ||
7680 | struct bio *bio, | ||
7681 | u64 file_offset) | ||
7682 | { | ||
7683 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | ||
7684 | struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); | ||
7685 | int ret; | ||
7686 | |||
7687 | /* | ||
7688 | * We load all the csum data we need when we submit | ||
7689 | * the first bio to reduce the csum tree search and | ||
7690 | * contention. | ||
7691 | */ | ||
7692 | if (dip->logical_offset == file_offset) { | ||
7693 | ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio, | ||
7694 | file_offset); | ||
7695 | if (ret) | ||
7696 | return ret; | ||
7697 | } | ||
7698 | |||
7699 | if (bio == dip->orig_bio) | ||
7700 | return 0; | ||
7701 | |||
7702 | file_offset -= dip->logical_offset; | ||
7703 | file_offset >>= inode->i_sb->s_blocksize_bits; | ||
7704 | io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset); | ||
7705 | |||
7706 | return 0; | ||
7707 | } | ||
7708 | |||
7238 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | 7709 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, |
7239 | int rw, u64 file_offset, int skip_sum, | 7710 | int rw, u64 file_offset, int skip_sum, |
7240 | int async_submit) | 7711 | int async_submit) |
@@ -7250,7 +7721,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
7250 | bio_get(bio); | 7721 | bio_get(bio); |
7251 | 7722 | ||
7252 | if (!write) { | 7723 | if (!write) { |
7253 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 7724 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, |
7725 | BTRFS_WQ_ENDIO_DATA); | ||
7254 | if (ret) | 7726 | if (ret) |
7255 | goto err; | 7727 | goto err; |
7256 | } | 7728 | } |
@@ -7273,13 +7745,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
7273 | ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1); | 7745 | ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1); |
7274 | if (ret) | 7746 | if (ret) |
7275 | goto err; | 7747 | goto err; |
7276 | } else if (!skip_sum) { | 7748 | } else { |
7277 | ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio, | 7749 | ret = btrfs_lookup_and_bind_dio_csum(root, inode, dip, bio, |
7278 | file_offset); | 7750 | file_offset); |
7279 | if (ret) | 7751 | if (ret) |
7280 | goto err; | 7752 | goto err; |
7281 | } | 7753 | } |
7282 | |||
7283 | map: | 7754 | map: |
7284 | ret = btrfs_map_bio(root, rw, bio, 0, async_submit); | 7755 | ret = btrfs_map_bio(root, rw, bio, 0, async_submit); |
7285 | err: | 7756 | err: |
@@ -7300,19 +7771,18 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
7300 | u64 submit_len = 0; | 7771 | u64 submit_len = 0; |
7301 | u64 map_length; | 7772 | u64 map_length; |
7302 | int nr_pages = 0; | 7773 | int nr_pages = 0; |
7303 | int ret = 0; | 7774 | int ret; |
7304 | int async_submit = 0; | 7775 | int async_submit = 0; |
7305 | 7776 | ||
7306 | map_length = orig_bio->bi_iter.bi_size; | 7777 | map_length = orig_bio->bi_iter.bi_size; |
7307 | ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, | 7778 | ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, |
7308 | &map_length, NULL, 0); | 7779 | &map_length, NULL, 0); |
7309 | if (ret) { | 7780 | if (ret) |
7310 | bio_put(orig_bio); | ||
7311 | return -EIO; | 7781 | return -EIO; |
7312 | } | ||
7313 | 7782 | ||
7314 | if (map_length >= orig_bio->bi_iter.bi_size) { | 7783 | if (map_length >= orig_bio->bi_iter.bi_size) { |
7315 | bio = orig_bio; | 7784 | bio = orig_bio; |
7785 | dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED; | ||
7316 | goto submit; | 7786 | goto submit; |
7317 | } | 7787 | } |
7318 | 7788 | ||
@@ -7326,14 +7796,16 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
7326 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | 7796 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); |
7327 | if (!bio) | 7797 | if (!bio) |
7328 | return -ENOMEM; | 7798 | return -ENOMEM; |
7799 | |||
7329 | bio->bi_private = dip; | 7800 | bio->bi_private = dip; |
7330 | bio->bi_end_io = btrfs_end_dio_bio; | 7801 | bio->bi_end_io = btrfs_end_dio_bio; |
7802 | btrfs_io_bio(bio)->logical = file_offset; | ||
7331 | atomic_inc(&dip->pending_bios); | 7803 | atomic_inc(&dip->pending_bios); |
7332 | 7804 | ||
7333 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { | 7805 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { |
7334 | if (unlikely(map_length < submit_len + bvec->bv_len || | 7806 | if (map_length < submit_len + bvec->bv_len || |
7335 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, | 7807 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, |
7336 | bvec->bv_offset) < bvec->bv_len)) { | 7808 | bvec->bv_offset) < bvec->bv_len) { |
7337 | /* | 7809 | /* |
7338 | * inc the count before we submit the bio so | 7810 | * inc the count before we submit the bio so |
7339 | * we know the end IO handler won't happen before | 7811 | * we know the end IO handler won't happen before |
@@ -7362,6 +7834,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
7362 | goto out_err; | 7834 | goto out_err; |
7363 | bio->bi_private = dip; | 7835 | bio->bi_private = dip; |
7364 | bio->bi_end_io = btrfs_end_dio_bio; | 7836 | bio->bi_end_io = btrfs_end_dio_bio; |
7837 | btrfs_io_bio(bio)->logical = file_offset; | ||
7365 | 7838 | ||
7366 | map_length = orig_bio->bi_iter.bi_size; | 7839 | map_length = orig_bio->bi_iter.bi_size; |
7367 | ret = btrfs_map_block(root->fs_info, rw, | 7840 | ret = btrfs_map_block(root->fs_info, rw, |
@@ -7405,11 +7878,10 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
7405 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7878 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7406 | struct btrfs_dio_private *dip; | 7879 | struct btrfs_dio_private *dip; |
7407 | struct bio *io_bio; | 7880 | struct bio *io_bio; |
7881 | struct btrfs_io_bio *btrfs_bio; | ||
7408 | int skip_sum; | 7882 | int skip_sum; |
7409 | int sum_len; | ||
7410 | int write = rw & REQ_WRITE; | 7883 | int write = rw & REQ_WRITE; |
7411 | int ret = 0; | 7884 | int ret = 0; |
7412 | u16 csum_size; | ||
7413 | 7885 | ||
7414 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 7886 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
7415 | 7887 | ||
@@ -7419,16 +7891,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
7419 | goto free_ordered; | 7891 | goto free_ordered; |
7420 | } | 7892 | } |
7421 | 7893 | ||
7422 | if (!skip_sum && !write) { | 7894 | dip = kzalloc(sizeof(*dip), GFP_NOFS); |
7423 | csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | ||
7424 | sum_len = dio_bio->bi_iter.bi_size >> | ||
7425 | inode->i_sb->s_blocksize_bits; | ||
7426 | sum_len *= csum_size; | ||
7427 | } else { | ||
7428 | sum_len = 0; | ||
7429 | } | ||
7430 | |||
7431 | dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS); | ||
7432 | if (!dip) { | 7895 | if (!dip) { |
7433 | ret = -ENOMEM; | 7896 | ret = -ENOMEM; |
7434 | goto free_io_bio; | 7897 | goto free_io_bio; |
@@ -7440,20 +7903,25 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
7440 | dip->bytes = dio_bio->bi_iter.bi_size; | 7903 | dip->bytes = dio_bio->bi_iter.bi_size; |
7441 | dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; | 7904 | dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; |
7442 | io_bio->bi_private = dip; | 7905 | io_bio->bi_private = dip; |
7443 | dip->errors = 0; | ||
7444 | dip->orig_bio = io_bio; | 7906 | dip->orig_bio = io_bio; |
7445 | dip->dio_bio = dio_bio; | 7907 | dip->dio_bio = dio_bio; |
7446 | atomic_set(&dip->pending_bios, 0); | 7908 | atomic_set(&dip->pending_bios, 0); |
7909 | btrfs_bio = btrfs_io_bio(io_bio); | ||
7910 | btrfs_bio->logical = file_offset; | ||
7447 | 7911 | ||
7448 | if (write) | 7912 | if (write) { |
7449 | io_bio->bi_end_io = btrfs_endio_direct_write; | 7913 | io_bio->bi_end_io = btrfs_endio_direct_write; |
7450 | else | 7914 | } else { |
7451 | io_bio->bi_end_io = btrfs_endio_direct_read; | 7915 | io_bio->bi_end_io = btrfs_endio_direct_read; |
7916 | dip->subio_endio = btrfs_subio_endio_read; | ||
7917 | } | ||
7452 | 7918 | ||
7453 | ret = btrfs_submit_direct_hook(rw, dip, skip_sum); | 7919 | ret = btrfs_submit_direct_hook(rw, dip, skip_sum); |
7454 | if (!ret) | 7920 | if (!ret) |
7455 | return; | 7921 | return; |
7456 | 7922 | ||
7923 | if (btrfs_bio->end_io) | ||
7924 | btrfs_bio->end_io(btrfs_bio, ret); | ||
7457 | free_io_bio: | 7925 | free_io_bio: |
7458 | bio_put(io_bio); | 7926 | bio_put(io_bio); |
7459 | 7927 | ||
@@ -7534,7 +8002,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
7534 | count = iov_iter_count(iter); | 8002 | count = iov_iter_count(iter); |
7535 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | 8003 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
7536 | &BTRFS_I(inode)->runtime_flags)) | 8004 | &BTRFS_I(inode)->runtime_flags)) |
7537 | filemap_fdatawrite_range(inode->i_mapping, offset, count); | 8005 | filemap_fdatawrite_range(inode->i_mapping, offset, |
8006 | offset + count - 1); | ||
7538 | 8007 | ||
7539 | if (rw & WRITE) { | 8008 | if (rw & WRITE) { |
7540 | /* | 8009 | /* |
@@ -7549,8 +8018,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
7549 | ret = btrfs_delalloc_reserve_space(inode, count); | 8018 | ret = btrfs_delalloc_reserve_space(inode, count); |
7550 | if (ret) | 8019 | if (ret) |
7551 | goto out; | 8020 | goto out; |
7552 | } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK, | 8021 | } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, |
7553 | &BTRFS_I(inode)->runtime_flags))) { | 8022 | &BTRFS_I(inode)->runtime_flags)) { |
7554 | inode_dio_done(inode); | 8023 | inode_dio_done(inode); |
7555 | flags = DIO_LOCKING | DIO_SKIP_HOLES; | 8024 | flags = DIO_LOCKING | DIO_SKIP_HOLES; |
7556 | wakeup = false; | 8025 | wakeup = false; |
@@ -8041,6 +8510,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
8041 | 8510 | ||
8042 | set_nlink(inode, 1); | 8511 | set_nlink(inode, 1); |
8043 | btrfs_i_size_write(inode, 0); | 8512 | btrfs_i_size_write(inode, 0); |
8513 | unlock_new_inode(inode); | ||
8044 | 8514 | ||
8045 | err = btrfs_subvol_inherit_props(trans, new_root, parent_root); | 8515 | err = btrfs_subvol_inherit_props(trans, new_root, parent_root); |
8046 | if (err) | 8516 | if (err) |
@@ -8069,6 +8539,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
8069 | ei->last_sub_trans = 0; | 8539 | ei->last_sub_trans = 0; |
8070 | ei->logged_trans = 0; | 8540 | ei->logged_trans = 0; |
8071 | ei->delalloc_bytes = 0; | 8541 | ei->delalloc_bytes = 0; |
8542 | ei->defrag_bytes = 0; | ||
8072 | ei->disk_i_size = 0; | 8543 | ei->disk_i_size = 0; |
8073 | ei->flags = 0; | 8544 | ei->flags = 0; |
8074 | ei->csum_bytes = 0; | 8545 | ei->csum_bytes = 0; |
@@ -8127,6 +8598,7 @@ void btrfs_destroy_inode(struct inode *inode) | |||
8127 | WARN_ON(BTRFS_I(inode)->reserved_extents); | 8598 | WARN_ON(BTRFS_I(inode)->reserved_extents); |
8128 | WARN_ON(BTRFS_I(inode)->delalloc_bytes); | 8599 | WARN_ON(BTRFS_I(inode)->delalloc_bytes); |
8129 | WARN_ON(BTRFS_I(inode)->csum_bytes); | 8600 | WARN_ON(BTRFS_I(inode)->csum_bytes); |
8601 | WARN_ON(BTRFS_I(inode)->defrag_bytes); | ||
8130 | 8602 | ||
8131 | /* | 8603 | /* |
8132 | * This can happen where we create an inode, but somebody else also | 8604 | * This can happen where we create an inode, but somebody else also |
@@ -8495,7 +8967,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, | |||
8495 | work->inode = inode; | 8967 | work->inode = inode; |
8496 | work->wait = wait; | 8968 | work->wait = wait; |
8497 | work->delay_iput = delay_iput; | 8969 | work->delay_iput = delay_iput; |
8498 | btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); | 8970 | WARN_ON_ONCE(!inode); |
8971 | btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, | ||
8972 | btrfs_run_delalloc_work, NULL, NULL); | ||
8499 | 8973 | ||
8500 | return work; | 8974 | return work; |
8501 | } | 8975 | } |
@@ -8540,7 +9014,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, | |||
8540 | spin_unlock(&root->delalloc_lock); | 9014 | spin_unlock(&root->delalloc_lock); |
8541 | 9015 | ||
8542 | work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); | 9016 | work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); |
8543 | if (unlikely(!work)) { | 9017 | if (!work) { |
8544 | if (delay_iput) | 9018 | if (delay_iput) |
8545 | btrfs_add_delayed_iput(inode); | 9019 | btrfs_add_delayed_iput(inode); |
8546 | else | 9020 | else |
@@ -8699,12 +9173,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
8699 | goto out_unlock; | 9173 | goto out_unlock; |
8700 | } | 9174 | } |
8701 | 9175 | ||
8702 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); | ||
8703 | if (err) { | ||
8704 | drop_inode = 1; | ||
8705 | goto out_unlock; | ||
8706 | } | ||
8707 | |||
8708 | /* | 9176 | /* |
8709 | * If the active LSM wants to access the inode during | 9177 | * If the active LSM wants to access the inode during |
8710 | * d_instantiate it needs these. Smack checks to see | 9178 | * d_instantiate it needs these. Smack checks to see |
@@ -8713,34 +9181,32 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
8713 | */ | 9181 | */ |
8714 | inode->i_fop = &btrfs_file_operations; | 9182 | inode->i_fop = &btrfs_file_operations; |
8715 | inode->i_op = &btrfs_file_inode_operations; | 9183 | inode->i_op = &btrfs_file_inode_operations; |
9184 | inode->i_mapping->a_ops = &btrfs_aops; | ||
9185 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
9186 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | ||
9187 | |||
9188 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); | ||
9189 | if (err) | ||
9190 | goto out_unlock_inode; | ||
8716 | 9191 | ||
8717 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 9192 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
8718 | if (err) | 9193 | if (err) |
8719 | drop_inode = 1; | 9194 | goto out_unlock_inode; |
8720 | else { | ||
8721 | inode->i_mapping->a_ops = &btrfs_aops; | ||
8722 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | ||
8723 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | ||
8724 | } | ||
8725 | if (drop_inode) | ||
8726 | goto out_unlock; | ||
8727 | 9195 | ||
8728 | path = btrfs_alloc_path(); | 9196 | path = btrfs_alloc_path(); |
8729 | if (!path) { | 9197 | if (!path) { |
8730 | err = -ENOMEM; | 9198 | err = -ENOMEM; |
8731 | drop_inode = 1; | 9199 | goto out_unlock_inode; |
8732 | goto out_unlock; | ||
8733 | } | 9200 | } |
8734 | key.objectid = btrfs_ino(inode); | 9201 | key.objectid = btrfs_ino(inode); |
8735 | key.offset = 0; | 9202 | key.offset = 0; |
8736 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | 9203 | key.type = BTRFS_EXTENT_DATA_KEY; |
8737 | datasize = btrfs_file_extent_calc_inline_size(name_len); | 9204 | datasize = btrfs_file_extent_calc_inline_size(name_len); |
8738 | err = btrfs_insert_empty_item(trans, root, path, &key, | 9205 | err = btrfs_insert_empty_item(trans, root, path, &key, |
8739 | datasize); | 9206 | datasize); |
8740 | if (err) { | 9207 | if (err) { |
8741 | drop_inode = 1; | ||
8742 | btrfs_free_path(path); | 9208 | btrfs_free_path(path); |
8743 | goto out_unlock; | 9209 | goto out_unlock_inode; |
8744 | } | 9210 | } |
8745 | leaf = path->nodes[0]; | 9211 | leaf = path->nodes[0]; |
8746 | ei = btrfs_item_ptr(leaf, path->slots[0], | 9212 | ei = btrfs_item_ptr(leaf, path->slots[0], |
@@ -8764,12 +9230,15 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
8764 | inode_set_bytes(inode, name_len); | 9230 | inode_set_bytes(inode, name_len); |
8765 | btrfs_i_size_write(inode, name_len); | 9231 | btrfs_i_size_write(inode, name_len); |
8766 | err = btrfs_update_inode(trans, root, inode); | 9232 | err = btrfs_update_inode(trans, root, inode); |
8767 | if (err) | 9233 | if (err) { |
8768 | drop_inode = 1; | 9234 | drop_inode = 1; |
9235 | goto out_unlock_inode; | ||
9236 | } | ||
9237 | |||
9238 | unlock_new_inode(inode); | ||
9239 | d_instantiate(dentry, inode); | ||
8769 | 9240 | ||
8770 | out_unlock: | 9241 | out_unlock: |
8771 | if (!err) | ||
8772 | d_instantiate(dentry, inode); | ||
8773 | btrfs_end_transaction(trans, root); | 9242 | btrfs_end_transaction(trans, root); |
8774 | if (drop_inode) { | 9243 | if (drop_inode) { |
8775 | inode_dec_link_count(inode); | 9244 | inode_dec_link_count(inode); |
@@ -8777,6 +9246,11 @@ out_unlock: | |||
8777 | } | 9246 | } |
8778 | btrfs_btree_balance_dirty(root); | 9247 | btrfs_btree_balance_dirty(root); |
8779 | return err; | 9248 | return err; |
9249 | |||
9250 | out_unlock_inode: | ||
9251 | drop_inode = 1; | ||
9252 | unlock_new_inode(inode); | ||
9253 | goto out_unlock; | ||
8780 | } | 9254 | } |
8781 | 9255 | ||
8782 | static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | 9256 | static int __btrfs_prealloc_file_range(struct inode *inode, int mode, |
@@ -8960,14 +9434,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
8960 | goto out; | 9434 | goto out; |
8961 | } | 9435 | } |
8962 | 9436 | ||
8963 | ret = btrfs_init_inode_security(trans, inode, dir, NULL); | ||
8964 | if (ret) | ||
8965 | goto out; | ||
8966 | |||
8967 | ret = btrfs_update_inode(trans, root, inode); | ||
8968 | if (ret) | ||
8969 | goto out; | ||
8970 | |||
8971 | inode->i_fop = &btrfs_file_operations; | 9437 | inode->i_fop = &btrfs_file_operations; |
8972 | inode->i_op = &btrfs_file_inode_operations; | 9438 | inode->i_op = &btrfs_file_inode_operations; |
8973 | 9439 | ||
@@ -8975,10 +9441,26 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
8975 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | 9441 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; |
8976 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 9442 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
8977 | 9443 | ||
9444 | ret = btrfs_init_inode_security(trans, inode, dir, NULL); | ||
9445 | if (ret) | ||
9446 | goto out_inode; | ||
9447 | |||
9448 | ret = btrfs_update_inode(trans, root, inode); | ||
9449 | if (ret) | ||
9450 | goto out_inode; | ||
8978 | ret = btrfs_orphan_add(trans, inode); | 9451 | ret = btrfs_orphan_add(trans, inode); |
8979 | if (ret) | 9452 | if (ret) |
8980 | goto out; | 9453 | goto out_inode; |
8981 | 9454 | ||
9455 | /* | ||
9456 | * We set number of links to 0 in btrfs_new_inode(), and here we set | ||
9457 | * it to 1 because d_tmpfile() will issue a warning if the count is 0, | ||
9458 | * through: | ||
9459 | * | ||
9460 | * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() | ||
9461 | */ | ||
9462 | set_nlink(inode, 1); | ||
9463 | unlock_new_inode(inode); | ||
8982 | d_tmpfile(dentry, inode); | 9464 | d_tmpfile(dentry, inode); |
8983 | mark_inode_dirty(inode); | 9465 | mark_inode_dirty(inode); |
8984 | 9466 | ||
@@ -8988,8 +9470,12 @@ out: | |||
8988 | iput(inode); | 9470 | iput(inode); |
8989 | btrfs_balance_delayed_items(root); | 9471 | btrfs_balance_delayed_items(root); |
8990 | btrfs_btree_balance_dirty(root); | 9472 | btrfs_btree_balance_dirty(root); |
8991 | |||
8992 | return ret; | 9473 | return ret; |
9474 | |||
9475 | out_inode: | ||
9476 | unlock_new_inode(inode); | ||
9477 | goto out; | ||
9478 | |||
8993 | } | 9479 | } |
8994 | 9480 | ||
8995 | static const struct inode_operations btrfs_dir_inode_operations = { | 9481 | static const struct inode_operations btrfs_dir_inode_operations = { |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 47aceb494d1d..e732274f1afd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -332,6 +332,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
332 | goto out_drop; | 332 | goto out_drop; |
333 | 333 | ||
334 | } else { | 334 | } else { |
335 | ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); | ||
336 | if (ret && ret != -ENODATA) | ||
337 | goto out_drop; | ||
335 | ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); | 338 | ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); |
336 | } | 339 | } |
337 | 340 | ||
@@ -477,8 +480,7 @@ static noinline int create_subvol(struct inode *dir, | |||
477 | if (ret) | 480 | if (ret) |
478 | goto fail; | 481 | goto fail; |
479 | 482 | ||
480 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 483 | leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0); |
481 | 0, objectid, NULL, 0, 0, 0); | ||
482 | if (IS_ERR(leaf)) { | 484 | if (IS_ERR(leaf)) { |
483 | ret = PTR_ERR(leaf); | 485 | ret = PTR_ERR(leaf); |
484 | goto fail; | 486 | goto fail; |
@@ -503,7 +505,7 @@ static noinline int create_subvol(struct inode *dir, | |||
503 | btrfs_set_stack_inode_generation(inode_item, 1); | 505 | btrfs_set_stack_inode_generation(inode_item, 1); |
504 | btrfs_set_stack_inode_size(inode_item, 3); | 506 | btrfs_set_stack_inode_size(inode_item, 3); |
505 | btrfs_set_stack_inode_nlink(inode_item, 1); | 507 | btrfs_set_stack_inode_nlink(inode_item, 1); |
506 | btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); | 508 | btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); |
507 | btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); | 509 | btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); |
508 | 510 | ||
509 | btrfs_set_root_flags(&root_item, 0); | 511 | btrfs_set_root_flags(&root_item, 0); |
@@ -535,7 +537,7 @@ static noinline int create_subvol(struct inode *dir, | |||
535 | 537 | ||
536 | key.objectid = objectid; | 538 | key.objectid = objectid; |
537 | key.offset = 0; | 539 | key.offset = 0; |
538 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 540 | key.type = BTRFS_ROOT_ITEM_KEY; |
539 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 541 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, |
540 | &root_item); | 542 | &root_item); |
541 | if (ret) | 543 | if (ret) |
@@ -711,39 +713,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
711 | if (ret) | 713 | if (ret) |
712 | goto fail; | 714 | goto fail; |
713 | 715 | ||
714 | ret = btrfs_orphan_cleanup(pending_snapshot->snap); | ||
715 | if (ret) | ||
716 | goto fail; | ||
717 | |||
718 | /* | ||
719 | * If orphan cleanup did remove any orphans, it means the tree was | ||
720 | * modified and therefore the commit root is not the same as the | ||
721 | * current root anymore. This is a problem, because send uses the | ||
722 | * commit root and therefore can see inode items that don't exist | ||
723 | * in the current root anymore, and for example make calls to | ||
724 | * btrfs_iget, which will do tree lookups based on the current root | ||
725 | * and not on the commit root. Those lookups will fail, returning a | ||
726 | * -ESTALE error, and making send fail with that error. So make sure | ||
727 | * a send does not see any orphans we have just removed, and that it | ||
728 | * will see the same inodes regardless of whether a transaction | ||
729 | * commit happened before it started (meaning that the commit root | ||
730 | * will be the same as the current root) or not. | ||
731 | */ | ||
732 | if (readonly && pending_snapshot->snap->node != | ||
733 | pending_snapshot->snap->commit_root) { | ||
734 | trans = btrfs_join_transaction(pending_snapshot->snap); | ||
735 | if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { | ||
736 | ret = PTR_ERR(trans); | ||
737 | goto fail; | ||
738 | } | ||
739 | if (!IS_ERR(trans)) { | ||
740 | ret = btrfs_commit_transaction(trans, | ||
741 | pending_snapshot->snap); | ||
742 | if (ret) | ||
743 | goto fail; | ||
744 | } | ||
745 | } | ||
746 | |||
747 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 716 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
748 | if (IS_ERR(inode)) { | 717 | if (IS_ERR(inode)) { |
749 | ret = PTR_ERR(inode); | 718 | ret = PTR_ERR(inode); |
@@ -915,7 +884,7 @@ out_unlock: | |||
915 | * file you want to defrag, we return 0 to let you know to skip this | 884 | * file you want to defrag, we return 0 to let you know to skip this |
916 | * part of the file | 885 | * part of the file |
917 | */ | 886 | */ |
918 | static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh) | 887 | static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh) |
919 | { | 888 | { |
920 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 889 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
921 | struct extent_map *em = NULL; | 890 | struct extent_map *em = NULL; |
@@ -950,7 +919,7 @@ static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh) | |||
950 | */ | 919 | */ |
951 | static int find_new_extents(struct btrfs_root *root, | 920 | static int find_new_extents(struct btrfs_root *root, |
952 | struct inode *inode, u64 newer_than, | 921 | struct inode *inode, u64 newer_than, |
953 | u64 *off, int thresh) | 922 | u64 *off, u32 thresh) |
954 | { | 923 | { |
955 | struct btrfs_path *path; | 924 | struct btrfs_path *path; |
956 | struct btrfs_key min_key; | 925 | struct btrfs_key min_key; |
@@ -969,12 +938,9 @@ static int find_new_extents(struct btrfs_root *root, | |||
969 | min_key.offset = *off; | 938 | min_key.offset = *off; |
970 | 939 | ||
971 | while (1) { | 940 | while (1) { |
972 | path->keep_locks = 1; | ||
973 | ret = btrfs_search_forward(root, &min_key, path, newer_than); | 941 | ret = btrfs_search_forward(root, &min_key, path, newer_than); |
974 | if (ret != 0) | 942 | if (ret != 0) |
975 | goto none; | 943 | goto none; |
976 | path->keep_locks = 0; | ||
977 | btrfs_unlock_up_safe(path, 1); | ||
978 | process_slot: | 944 | process_slot: |
979 | if (min_key.objectid != ino) | 945 | if (min_key.objectid != ino) |
980 | goto none; | 946 | goto none; |
@@ -1052,15 +1018,17 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) | |||
1052 | return false; | 1018 | return false; |
1053 | 1019 | ||
1054 | next = defrag_lookup_extent(inode, em->start + em->len); | 1020 | next = defrag_lookup_extent(inode, em->start + em->len); |
1055 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || | 1021 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) |
1056 | (em->block_start + em->block_len == next->block_start)) | 1022 | ret = false; |
1023 | else if ((em->block_start + em->block_len == next->block_start) && | ||
1024 | (em->block_len > 128 * 1024 && next->block_len > 128 * 1024)) | ||
1057 | ret = false; | 1025 | ret = false; |
1058 | 1026 | ||
1059 | free_extent_map(next); | 1027 | free_extent_map(next); |
1060 | return ret; | 1028 | return ret; |
1061 | } | 1029 | } |
1062 | 1030 | ||
1063 | static int should_defrag_range(struct inode *inode, u64 start, int thresh, | 1031 | static int should_defrag_range(struct inode *inode, u64 start, u32 thresh, |
1064 | u64 *last_len, u64 *skip, u64 *defrag_end, | 1032 | u64 *last_len, u64 *skip, u64 *defrag_end, |
1065 | int compress) | 1033 | int compress) |
1066 | { | 1034 | { |
@@ -1088,7 +1056,6 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh, | |||
1088 | } | 1056 | } |
1089 | 1057 | ||
1090 | next_mergeable = defrag_check_next_extent(inode, em); | 1058 | next_mergeable = defrag_check_next_extent(inode, em); |
1091 | |||
1092 | /* | 1059 | /* |
1093 | * we hit a real extent, if it is big or the next extent is not a | 1060 | * we hit a real extent, if it is big or the next extent is not a |
1094 | * real extent, don't bother defragging it | 1061 | * real extent, don't bother defragging it |
@@ -1291,7 +1258,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1291 | int ret; | 1258 | int ret; |
1292 | int defrag_count = 0; | 1259 | int defrag_count = 0; |
1293 | int compress_type = BTRFS_COMPRESS_ZLIB; | 1260 | int compress_type = BTRFS_COMPRESS_ZLIB; |
1294 | int extent_thresh = range->extent_thresh; | 1261 | u32 extent_thresh = range->extent_thresh; |
1295 | unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; | 1262 | unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; |
1296 | unsigned long cluster = max_cluster; | 1263 | unsigned long cluster = max_cluster; |
1297 | u64 new_align = ~((u64)128 * 1024 - 1); | 1264 | u64 new_align = ~((u64)128 * 1024 - 1); |
@@ -1367,8 +1334,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1367 | inode->i_mapping->writeback_index = i; | 1334 | inode->i_mapping->writeback_index = i; |
1368 | 1335 | ||
1369 | while (i <= last_index && defrag_count < max_to_defrag && | 1336 | while (i <= last_index && defrag_count < max_to_defrag && |
1370 | (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 1337 | (i < DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE))) { |
1371 | PAGE_CACHE_SHIFT)) { | ||
1372 | /* | 1338 | /* |
1373 | * make sure we stop running if someone unmounts | 1339 | * make sure we stop running if someone unmounts |
1374 | * the FS | 1340 | * the FS |
@@ -1391,7 +1357,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1391 | * the should_defrag function tells us how much to skip | 1357 | * the should_defrag function tells us how much to skip |
1392 | * bump our counter by the suggested amount | 1358 | * bump our counter by the suggested amount |
1393 | */ | 1359 | */ |
1394 | next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1360 | next = DIV_ROUND_UP(skip, PAGE_CACHE_SIZE); |
1395 | i = max(i + 1, next); | 1361 | i = max(i + 1, next); |
1396 | continue; | 1362 | continue; |
1397 | } | 1363 | } |
@@ -1586,7 +1552,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
1586 | goto out_free; | 1552 | goto out_free; |
1587 | } | 1553 | } |
1588 | 1554 | ||
1589 | old_size = device->total_bytes; | 1555 | old_size = btrfs_device_get_total_bytes(device); |
1590 | 1556 | ||
1591 | if (mod < 0) { | 1557 | if (mod < 0) { |
1592 | if (new_size > old_size) { | 1558 | if (new_size > old_size) { |
@@ -1735,7 +1701,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1735 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | | 1701 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | |
1736 | BTRFS_SUBVOL_QGROUP_INHERIT)) { | 1702 | BTRFS_SUBVOL_QGROUP_INHERIT)) { |
1737 | ret = -EOPNOTSUPP; | 1703 | ret = -EOPNOTSUPP; |
1738 | goto out; | 1704 | goto free_args; |
1739 | } | 1705 | } |
1740 | 1706 | ||
1741 | if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) | 1707 | if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) |
@@ -1745,27 +1711,31 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1745 | if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { | 1711 | if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { |
1746 | if (vol_args->size > PAGE_CACHE_SIZE) { | 1712 | if (vol_args->size > PAGE_CACHE_SIZE) { |
1747 | ret = -EINVAL; | 1713 | ret = -EINVAL; |
1748 | goto out; | 1714 | goto free_args; |
1749 | } | 1715 | } |
1750 | inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); | 1716 | inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); |
1751 | if (IS_ERR(inherit)) { | 1717 | if (IS_ERR(inherit)) { |
1752 | ret = PTR_ERR(inherit); | 1718 | ret = PTR_ERR(inherit); |
1753 | goto out; | 1719 | goto free_args; |
1754 | } | 1720 | } |
1755 | } | 1721 | } |
1756 | 1722 | ||
1757 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | 1723 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
1758 | vol_args->fd, subvol, ptr, | 1724 | vol_args->fd, subvol, ptr, |
1759 | readonly, inherit); | 1725 | readonly, inherit); |
1726 | if (ret) | ||
1727 | goto free_inherit; | ||
1760 | 1728 | ||
1761 | if (ret == 0 && ptr && | 1729 | if (ptr && copy_to_user(arg + |
1762 | copy_to_user(arg + | 1730 | offsetof(struct btrfs_ioctl_vol_args_v2, |
1763 | offsetof(struct btrfs_ioctl_vol_args_v2, | 1731 | transid), |
1764 | transid), ptr, sizeof(*ptr))) | 1732 | ptr, sizeof(*ptr))) |
1765 | ret = -EFAULT; | 1733 | ret = -EFAULT; |
1766 | out: | 1734 | |
1767 | kfree(vol_args); | 1735 | free_inherit: |
1768 | kfree(inherit); | 1736 | kfree(inherit); |
1737 | free_args: | ||
1738 | kfree(vol_args); | ||
1769 | return ret; | 1739 | return ret; |
1770 | } | 1740 | } |
1771 | 1741 | ||
@@ -2117,8 +2087,6 @@ static noinline int search_ioctl(struct inode *inode, | |||
2117 | key.type = sk->min_type; | 2087 | key.type = sk->min_type; |
2118 | key.offset = sk->min_offset; | 2088 | key.offset = sk->min_offset; |
2119 | 2089 | ||
2120 | path->keep_locks = 1; | ||
2121 | |||
2122 | while (1) { | 2090 | while (1) { |
2123 | ret = btrfs_search_forward(root, &key, path, sk->min_transid); | 2091 | ret = btrfs_search_forward(root, &key, path, sk->min_transid); |
2124 | if (ret != 0) { | 2092 | if (ret != 0) { |
@@ -2554,9 +2522,9 @@ out_unlock: | |||
2554 | ASSERT(dest->send_in_progress == 0); | 2522 | ASSERT(dest->send_in_progress == 0); |
2555 | 2523 | ||
2556 | /* the last ref */ | 2524 | /* the last ref */ |
2557 | if (dest->cache_inode) { | 2525 | if (dest->ino_cache_inode) { |
2558 | iput(dest->cache_inode); | 2526 | iput(dest->ino_cache_inode); |
2559 | dest->cache_inode = NULL; | 2527 | dest->ino_cache_inode = NULL; |
2560 | } | 2528 | } |
2561 | } | 2529 | } |
2562 | out_dput: | 2530 | out_dput: |
@@ -2662,6 +2630,9 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | |||
2662 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 2630 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
2663 | ret = btrfs_init_new_device(root, vol_args->name); | 2631 | ret = btrfs_init_new_device(root, vol_args->name); |
2664 | 2632 | ||
2633 | if (!ret) | ||
2634 | btrfs_info(root->fs_info, "disk added %s",vol_args->name); | ||
2635 | |||
2665 | kfree(vol_args); | 2636 | kfree(vol_args); |
2666 | out: | 2637 | out: |
2667 | mutex_unlock(&root->fs_info->volume_mutex); | 2638 | mutex_unlock(&root->fs_info->volume_mutex); |
@@ -2685,7 +2656,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) | |||
2685 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 2656 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
2686 | if (IS_ERR(vol_args)) { | 2657 | if (IS_ERR(vol_args)) { |
2687 | ret = PTR_ERR(vol_args); | 2658 | ret = PTR_ERR(vol_args); |
2688 | goto out; | 2659 | goto err_drop; |
2689 | } | 2660 | } |
2690 | 2661 | ||
2691 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 2662 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
@@ -2701,8 +2672,12 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) | |||
2701 | mutex_unlock(&root->fs_info->volume_mutex); | 2672 | mutex_unlock(&root->fs_info->volume_mutex); |
2702 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | 2673 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); |
2703 | 2674 | ||
2675 | if (!ret) | ||
2676 | btrfs_info(root->fs_info, "disk deleted %s",vol_args->name); | ||
2677 | |||
2704 | out: | 2678 | out: |
2705 | kfree(vol_args); | 2679 | kfree(vol_args); |
2680 | err_drop: | ||
2706 | mnt_drop_write_file(file); | 2681 | mnt_drop_write_file(file); |
2707 | return ret; | 2682 | return ret; |
2708 | } | 2683 | } |
@@ -2764,8 +2739,8 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) | |||
2764 | } | 2739 | } |
2765 | 2740 | ||
2766 | di_args->devid = dev->devid; | 2741 | di_args->devid = dev->devid; |
2767 | di_args->bytes_used = dev->bytes_used; | 2742 | di_args->bytes_used = btrfs_device_get_bytes_used(dev); |
2768 | di_args->total_bytes = dev->total_bytes; | 2743 | di_args->total_bytes = btrfs_device_get_total_bytes(dev); |
2769 | memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); | 2744 | memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); |
2770 | if (dev->name) { | 2745 | if (dev->name) { |
2771 | struct rcu_string *name; | 2746 | struct rcu_string *name; |
@@ -3191,7 +3166,7 @@ static void clone_update_extent_map(struct inode *inode, | |||
3191 | em->start + em->len - 1, 0); | 3166 | em->start + em->len - 1, 0); |
3192 | } | 3167 | } |
3193 | 3168 | ||
3194 | if (unlikely(ret)) | 3169 | if (ret) |
3195 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 3170 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
3196 | &BTRFS_I(inode)->runtime_flags); | 3171 | &BTRFS_I(inode)->runtime_flags); |
3197 | } | 3172 | } |
@@ -3226,7 +3201,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, | |||
3226 | u64 last_dest_end = destoff; | 3201 | u64 last_dest_end = destoff; |
3227 | 3202 | ||
3228 | ret = -ENOMEM; | 3203 | ret = -ENOMEM; |
3229 | buf = vmalloc(btrfs_level_size(root, 0)); | 3204 | buf = vmalloc(root->nodesize); |
3230 | if (!buf) | 3205 | if (!buf) |
3231 | return ret; | 3206 | return ret; |
3232 | 3207 | ||
@@ -3279,11 +3254,11 @@ process_slot: | |||
3279 | slot = path->slots[0]; | 3254 | slot = path->slots[0]; |
3280 | 3255 | ||
3281 | btrfs_item_key_to_cpu(leaf, &key, slot); | 3256 | btrfs_item_key_to_cpu(leaf, &key, slot); |
3282 | if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || | 3257 | if (key.type > BTRFS_EXTENT_DATA_KEY || |
3283 | key.objectid != btrfs_ino(src)) | 3258 | key.objectid != btrfs_ino(src)) |
3284 | break; | 3259 | break; |
3285 | 3260 | ||
3286 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { | 3261 | if (key.type == BTRFS_EXTENT_DATA_KEY) { |
3287 | struct btrfs_file_extent_item *extent; | 3262 | struct btrfs_file_extent_item *extent; |
3288 | int type; | 3263 | int type; |
3289 | u32 size; | 3264 | u32 size; |
@@ -3527,7 +3502,8 @@ process_slot: | |||
3527 | btrfs_mark_buffer_dirty(leaf); | 3502 | btrfs_mark_buffer_dirty(leaf); |
3528 | btrfs_release_path(path); | 3503 | btrfs_release_path(path); |
3529 | 3504 | ||
3530 | last_dest_end = new_key.offset + datal; | 3505 | last_dest_end = ALIGN(new_key.offset + datal, |
3506 | root->sectorsize); | ||
3531 | ret = clone_finish_inode_update(trans, inode, | 3507 | ret = clone_finish_inode_update(trans, inode, |
3532 | last_dest_end, | 3508 | last_dest_end, |
3533 | destoff, olen); | 3509 | destoff, olen); |
@@ -5309,6 +5285,12 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
5309 | if (ret) | 5285 | if (ret) |
5310 | return ret; | 5286 | return ret; |
5311 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); | 5287 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); |
5288 | /* | ||
5289 | * The transaction thread may want to do more work, | ||
5290 | * namely it pokes the cleaner ktread that will start | ||
5291 | * processing uncleaned subvols. | ||
5292 | */ | ||
5293 | wake_up_process(root->fs_info->transaction_kthread); | ||
5312 | return ret; | 5294 | return ret; |
5313 | } | 5295 | } |
5314 | case BTRFS_IOC_START_SYNC: | 5296 | case BTRFS_IOC_START_SYNC: |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index dfad8514f0da..78285f30909e 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c | |||
@@ -266,8 +266,7 @@ static int lzo_decompress_biovec(struct list_head *ws, | |||
266 | char *data_in; | 266 | char *data_in; |
267 | unsigned long page_in_index = 0; | 267 | unsigned long page_in_index = 0; |
268 | unsigned long page_out_index = 0; | 268 | unsigned long page_out_index = 0; |
269 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | 269 | unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE); |
270 | PAGE_CACHE_SIZE; | ||
271 | unsigned long buf_start; | 270 | unsigned long buf_start; |
272 | unsigned long buf_offset = 0; | 271 | unsigned long buf_offset = 0; |
273 | unsigned long bytes; | 272 | unsigned long bytes; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 963895c1f801..ac734ec4cc20 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -615,6 +615,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
615 | spin_unlock(&root->ordered_extent_lock); | 615 | spin_unlock(&root->ordered_extent_lock); |
616 | 616 | ||
617 | btrfs_init_work(&ordered->flush_work, | 617 | btrfs_init_work(&ordered->flush_work, |
618 | btrfs_flush_delalloc_helper, | ||
618 | btrfs_run_ordered_extent_work, NULL, NULL); | 619 | btrfs_run_ordered_extent_work, NULL, NULL); |
619 | list_add_tail(&ordered->work_list, &works); | 620 | list_add_tail(&ordered->work_list, &works); |
620 | btrfs_queue_work(root->fs_info->flush_workers, | 621 | btrfs_queue_work(root->fs_info->flush_workers, |
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 65793edb38ca..47767d5b8f0b 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c | |||
@@ -27,7 +27,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | |||
27 | int ret = 0; | 27 | int ret = 0; |
28 | 28 | ||
29 | key.objectid = BTRFS_ORPHAN_OBJECTID; | 29 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
30 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | 30 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
31 | key.offset = offset; | 31 | key.offset = offset; |
32 | 32 | ||
33 | path = btrfs_alloc_path(); | 33 | path = btrfs_alloc_path(); |
@@ -48,7 +48,7 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | |||
48 | int ret = 0; | 48 | int ret = 0; |
49 | 49 | ||
50 | key.objectid = BTRFS_ORPHAN_OBJECTID; | 50 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
51 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | 51 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
52 | key.offset = offset; | 52 | key.offset = offset; |
53 | 53 | ||
54 | path = btrfs_alloc_path(); | 54 | path = btrfs_alloc_path(); |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 9626b4ad3b9a..647ab12fdf5d 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -195,7 +195,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
195 | for (i = 0 ; i < nr ; i++) { | 195 | for (i = 0 ; i < nr ; i++) { |
196 | item = btrfs_item_nr(i); | 196 | item = btrfs_item_nr(i); |
197 | btrfs_item_key_to_cpu(l, &key, i); | 197 | btrfs_item_key_to_cpu(l, &key, i); |
198 | type = btrfs_key_type(&key); | 198 | type = key.type; |
199 | printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " | 199 | printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " |
200 | "itemsize %d\n", | 200 | "itemsize %d\n", |
201 | i, key.objectid, type, key.offset, | 201 | i, key.objectid, type, key.offset, |
@@ -336,7 +336,6 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) | |||
336 | for (i = 0; i < nr; i++) { | 336 | for (i = 0; i < nr; i++) { |
337 | struct extent_buffer *next = read_tree_block(root, | 337 | struct extent_buffer *next = read_tree_block(root, |
338 | btrfs_node_blockptr(c, i), | 338 | btrfs_node_blockptr(c, i), |
339 | btrfs_level_size(root, level - 1), | ||
340 | btrfs_node_ptr_generation(c, i)); | 339 | btrfs_node_ptr_generation(c, i)); |
341 | if (btrfs_is_leaf(next) && | 340 | if (btrfs_is_leaf(next) && |
342 | level != 1) | 341 | level != 1) |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b497498484be..48b60dbf807f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -539,10 +539,9 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, | |||
539 | struct extent_buffer *leaf; | 539 | struct extent_buffer *leaf; |
540 | struct btrfs_key key; | 540 | struct btrfs_key key; |
541 | 541 | ||
542 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 542 | if (btrfs_test_is_dummy_root(quota_root)) |
543 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, "a_root->state))) | ||
544 | return 0; | 543 | return 0; |
545 | #endif | 544 | |
546 | path = btrfs_alloc_path(); | 545 | path = btrfs_alloc_path(); |
547 | if (!path) | 546 | if (!path) |
548 | return -ENOMEM; | 547 | return -ENOMEM; |
@@ -551,9 +550,15 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, | |||
551 | key.type = BTRFS_QGROUP_INFO_KEY; | 550 | key.type = BTRFS_QGROUP_INFO_KEY; |
552 | key.offset = qgroupid; | 551 | key.offset = qgroupid; |
553 | 552 | ||
553 | /* | ||
554 | * Avoid a transaction abort by catching -EEXIST here. In that | ||
555 | * case, we proceed by re-initializing the existing structure | ||
556 | * on disk. | ||
557 | */ | ||
558 | |||
554 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, | 559 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, |
555 | sizeof(*qgroup_info)); | 560 | sizeof(*qgroup_info)); |
556 | if (ret) | 561 | if (ret && ret != -EEXIST) |
557 | goto out; | 562 | goto out; |
558 | 563 | ||
559 | leaf = path->nodes[0]; | 564 | leaf = path->nodes[0]; |
@@ -572,7 +577,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, | |||
572 | key.type = BTRFS_QGROUP_LIMIT_KEY; | 577 | key.type = BTRFS_QGROUP_LIMIT_KEY; |
573 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, | 578 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, |
574 | sizeof(*qgroup_limit)); | 579 | sizeof(*qgroup_limit)); |
575 | if (ret) | 580 | if (ret && ret != -EEXIST) |
576 | goto out; | 581 | goto out; |
577 | 582 | ||
578 | leaf = path->nodes[0]; | 583 | leaf = path->nodes[0]; |
@@ -692,10 +697,9 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, | |||
692 | int ret; | 697 | int ret; |
693 | int slot; | 698 | int slot; |
694 | 699 | ||
695 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 700 | if (btrfs_test_is_dummy_root(root)) |
696 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
697 | return 0; | 701 | return 0; |
698 | #endif | 702 | |
699 | key.objectid = 0; | 703 | key.objectid = 0; |
700 | key.type = BTRFS_QGROUP_INFO_KEY; | 704 | key.type = BTRFS_QGROUP_INFO_KEY; |
701 | key.offset = qgroup->qgroupid; | 705 | key.offset = qgroup->qgroupid; |
@@ -1335,6 +1339,8 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | |||
1335 | INIT_LIST_HEAD(&oper->elem.list); | 1339 | INIT_LIST_HEAD(&oper->elem.list); |
1336 | oper->elem.seq = 0; | 1340 | oper->elem.seq = 0; |
1337 | 1341 | ||
1342 | trace_btrfs_qgroup_record_ref(oper); | ||
1343 | |||
1338 | if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { | 1344 | if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { |
1339 | /* | 1345 | /* |
1340 | * If any operation for this bytenr/ref_root combo | 1346 | * If any operation for this bytenr/ref_root combo |
@@ -1973,7 +1979,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, | |||
1973 | elem.seq, &roots); | 1979 | elem.seq, &roots); |
1974 | btrfs_put_tree_mod_seq(fs_info, &elem); | 1980 | btrfs_put_tree_mod_seq(fs_info, &elem); |
1975 | if (ret < 0) | 1981 | if (ret < 0) |
1976 | return ret; | 1982 | goto out; |
1977 | 1983 | ||
1978 | if (roots->nnodes != 1) | 1984 | if (roots->nnodes != 1) |
1979 | goto out; | 1985 | goto out; |
@@ -2077,6 +2083,8 @@ static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, | |||
2077 | 2083 | ||
2078 | ASSERT(is_fstree(oper->ref_root)); | 2084 | ASSERT(is_fstree(oper->ref_root)); |
2079 | 2085 | ||
2086 | trace_btrfs_qgroup_account(oper); | ||
2087 | |||
2080 | switch (oper->type) { | 2088 | switch (oper->type) { |
2081 | case BTRFS_QGROUP_OPER_ADD_EXCL: | 2089 | case BTRFS_QGROUP_OPER_ADD_EXCL: |
2082 | case BTRFS_QGROUP_OPER_SUB_EXCL: | 2090 | case BTRFS_QGROUP_OPER_SUB_EXCL: |
@@ -2237,7 +2245,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | |||
2237 | if (srcid) { | 2245 | if (srcid) { |
2238 | struct btrfs_root *srcroot; | 2246 | struct btrfs_root *srcroot; |
2239 | struct btrfs_key srckey; | 2247 | struct btrfs_key srckey; |
2240 | int srcroot_level; | ||
2241 | 2248 | ||
2242 | srckey.objectid = srcid; | 2249 | srckey.objectid = srcid; |
2243 | srckey.type = BTRFS_ROOT_ITEM_KEY; | 2250 | srckey.type = BTRFS_ROOT_ITEM_KEY; |
@@ -2249,8 +2256,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | |||
2249 | } | 2256 | } |
2250 | 2257 | ||
2251 | rcu_read_lock(); | 2258 | rcu_read_lock(); |
2252 | srcroot_level = btrfs_header_level(srcroot->node); | 2259 | level_size = srcroot->nodesize; |
2253 | level_size = btrfs_level_size(srcroot, srcroot_level); | ||
2254 | rcu_read_unlock(); | 2260 | rcu_read_unlock(); |
2255 | } | 2261 | } |
2256 | 2262 | ||
@@ -2566,7 +2572,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | |||
2566 | found.type != BTRFS_METADATA_ITEM_KEY) | 2572 | found.type != BTRFS_METADATA_ITEM_KEY) |
2567 | continue; | 2573 | continue; |
2568 | if (found.type == BTRFS_METADATA_ITEM_KEY) | 2574 | if (found.type == BTRFS_METADATA_ITEM_KEY) |
2569 | num_bytes = fs_info->extent_root->leafsize; | 2575 | num_bytes = fs_info->extent_root->nodesize; |
2570 | else | 2576 | else |
2571 | num_bytes = found.offset; | 2577 | num_bytes = found.offset; |
2572 | 2578 | ||
@@ -2720,6 +2726,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, | |||
2720 | memset(&fs_info->qgroup_rescan_work, 0, | 2726 | memset(&fs_info->qgroup_rescan_work, 0, |
2721 | sizeof(fs_info->qgroup_rescan_work)); | 2727 | sizeof(fs_info->qgroup_rescan_work)); |
2722 | btrfs_init_work(&fs_info->qgroup_rescan_work, | 2728 | btrfs_init_work(&fs_info->qgroup_rescan_work, |
2729 | btrfs_qgroup_rescan_helper, | ||
2723 | btrfs_qgroup_rescan_worker, NULL, NULL); | 2730 | btrfs_qgroup_rescan_worker, NULL, NULL); |
2724 | 2731 | ||
2725 | if (ret) { | 2732 | if (ret) { |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 4a88f073fdd7..6a41631cb959 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -912,7 +912,7 @@ static struct page *page_in_rbio(struct btrfs_raid_bio *rbio, | |||
912 | static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes) | 912 | static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes) |
913 | { | 913 | { |
914 | unsigned long nr = stripe_len * nr_stripes; | 914 | unsigned long nr = stripe_len * nr_stripes; |
915 | return (nr + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 915 | return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE); |
916 | } | 916 | } |
917 | 917 | ||
918 | /* | 918 | /* |
@@ -1416,7 +1416,8 @@ cleanup: | |||
1416 | 1416 | ||
1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) | 1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) |
1418 | { | 1418 | { |
1419 | btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); | 1419 | btrfs_init_work(&rbio->work, btrfs_rmw_helper, |
1420 | rmw_work, NULL, NULL); | ||
1420 | 1421 | ||
1421 | btrfs_queue_work(rbio->fs_info->rmw_workers, | 1422 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
1422 | &rbio->work); | 1423 | &rbio->work); |
@@ -1424,7 +1425,8 @@ static void async_rmw_stripe(struct btrfs_raid_bio *rbio) | |||
1424 | 1425 | ||
1425 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) | 1426 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) |
1426 | { | 1427 | { |
1427 | btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); | 1428 | btrfs_init_work(&rbio->work, btrfs_rmw_helper, |
1429 | read_rebuild_work, NULL, NULL); | ||
1428 | 1430 | ||
1429 | btrfs_queue_work(rbio->fs_info->rmw_workers, | 1431 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
1430 | &rbio->work); | 1432 | &rbio->work); |
@@ -1440,7 +1442,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) | |||
1440 | struct btrfs_bio *bbio = rbio->bbio; | 1442 | struct btrfs_bio *bbio = rbio->bbio; |
1441 | struct bio_list bio_list; | 1443 | struct bio_list bio_list; |
1442 | int ret; | 1444 | int ret; |
1443 | int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1445 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); |
1444 | int pagenr; | 1446 | int pagenr; |
1445 | int stripe; | 1447 | int stripe; |
1446 | struct bio *bio; | 1448 | struct bio *bio; |
@@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
1665 | plug = container_of(cb, struct btrfs_plug_cb, cb); | 1667 | plug = container_of(cb, struct btrfs_plug_cb, cb); |
1666 | 1668 | ||
1667 | if (from_schedule) { | 1669 | if (from_schedule) { |
1668 | btrfs_init_work(&plug->work, unplug_work, NULL, NULL); | 1670 | btrfs_init_work(&plug->work, btrfs_rmw_helper, |
1671 | unplug_work, NULL, NULL); | ||
1669 | btrfs_queue_work(plug->info->rmw_workers, | 1672 | btrfs_queue_work(plug->info->rmw_workers, |
1670 | &plug->work); | 1673 | &plug->work); |
1671 | return; | 1674 | return; |
@@ -1722,7 +1725,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1722 | int pagenr, stripe; | 1725 | int pagenr, stripe; |
1723 | void **pointers; | 1726 | void **pointers; |
1724 | int faila = -1, failb = -1; | 1727 | int faila = -1, failb = -1; |
1725 | int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1728 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); |
1726 | struct page *page; | 1729 | struct page *page; |
1727 | int err; | 1730 | int err; |
1728 | int i; | 1731 | int i; |
@@ -1937,7 +1940,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) | |||
1937 | struct btrfs_bio *bbio = rbio->bbio; | 1940 | struct btrfs_bio *bbio = rbio->bbio; |
1938 | struct bio_list bio_list; | 1941 | struct bio_list bio_list; |
1939 | int ret; | 1942 | int ret; |
1940 | int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1943 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); |
1941 | int pagenr; | 1944 | int pagenr; |
1942 | int stripe; | 1945 | int stripe; |
1943 | struct bio *bio; | 1946 | struct bio *bio; |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 09230cf3a244..b63ae20618fb 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -347,7 +347,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
347 | if (!re) | 347 | if (!re) |
348 | return NULL; | 348 | return NULL; |
349 | 349 | ||
350 | blocksize = btrfs_level_size(root, level); | 350 | blocksize = root->nodesize; |
351 | re->logical = logical; | 351 | re->logical = logical; |
352 | re->blocksize = blocksize; | 352 | re->blocksize = blocksize; |
353 | re->top = *top; | 353 | re->top = *top; |
@@ -798,7 +798,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) | |||
798 | /* FIXME we cannot handle this properly right now */ | 798 | /* FIXME we cannot handle this properly right now */ |
799 | BUG(); | 799 | BUG(); |
800 | } | 800 | } |
801 | btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); | 801 | btrfs_init_work(&rmw->work, btrfs_readahead_helper, |
802 | reada_start_machine_worker, NULL, NULL); | ||
802 | rmw->fs_info = fs_info; | 803 | rmw->fs_info = fs_info; |
803 | 804 | ||
804 | btrfs_queue_work(fs_info->readahead_workers, &rmw->work); | 805 | btrfs_queue_work(fs_info->readahead_workers, &rmw->work); |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 65245a07275b..74257d6436ad 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -736,7 +736,8 @@ again: | |||
736 | err = ret; | 736 | err = ret; |
737 | goto out; | 737 | goto out; |
738 | } | 738 | } |
739 | BUG_ON(!ret || !path1->slots[0]); | 739 | ASSERT(ret); |
740 | ASSERT(path1->slots[0]); | ||
740 | 741 | ||
741 | path1->slots[0]--; | 742 | path1->slots[0]--; |
742 | 743 | ||
@@ -746,10 +747,10 @@ again: | |||
746 | * the backref was added previously when processing | 747 | * the backref was added previously when processing |
747 | * backref of type BTRFS_TREE_BLOCK_REF_KEY | 748 | * backref of type BTRFS_TREE_BLOCK_REF_KEY |
748 | */ | 749 | */ |
749 | BUG_ON(!list_is_singular(&cur->upper)); | 750 | ASSERT(list_is_singular(&cur->upper)); |
750 | edge = list_entry(cur->upper.next, struct backref_edge, | 751 | edge = list_entry(cur->upper.next, struct backref_edge, |
751 | list[LOWER]); | 752 | list[LOWER]); |
752 | BUG_ON(!list_empty(&edge->list[UPPER])); | 753 | ASSERT(list_empty(&edge->list[UPPER])); |
753 | exist = edge->node[UPPER]; | 754 | exist = edge->node[UPPER]; |
754 | /* | 755 | /* |
755 | * add the upper level block to pending list if we need | 756 | * add the upper level block to pending list if we need |
@@ -831,7 +832,7 @@ again: | |||
831 | cur->cowonly = 1; | 832 | cur->cowonly = 1; |
832 | } | 833 | } |
833 | #else | 834 | #else |
834 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); | 835 | ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY); |
835 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { | 836 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { |
836 | #endif | 837 | #endif |
837 | if (key.objectid == key.offset) { | 838 | if (key.objectid == key.offset) { |
@@ -840,7 +841,7 @@ again: | |||
840 | * backref of this type. | 841 | * backref of this type. |
841 | */ | 842 | */ |
842 | root = find_reloc_root(rc, cur->bytenr); | 843 | root = find_reloc_root(rc, cur->bytenr); |
843 | BUG_ON(!root); | 844 | ASSERT(root); |
844 | cur->root = root; | 845 | cur->root = root; |
845 | break; | 846 | break; |
846 | } | 847 | } |
@@ -868,7 +869,7 @@ again: | |||
868 | } else { | 869 | } else { |
869 | upper = rb_entry(rb_node, struct backref_node, | 870 | upper = rb_entry(rb_node, struct backref_node, |
870 | rb_node); | 871 | rb_node); |
871 | BUG_ON(!upper->checked); | 872 | ASSERT(upper->checked); |
872 | INIT_LIST_HEAD(&edge->list[UPPER]); | 873 | INIT_LIST_HEAD(&edge->list[UPPER]); |
873 | } | 874 | } |
874 | list_add_tail(&edge->list[LOWER], &cur->upper); | 875 | list_add_tail(&edge->list[LOWER], &cur->upper); |
@@ -892,7 +893,7 @@ again: | |||
892 | 893 | ||
893 | if (btrfs_root_level(&root->root_item) == cur->level) { | 894 | if (btrfs_root_level(&root->root_item) == cur->level) { |
894 | /* tree root */ | 895 | /* tree root */ |
895 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 896 | ASSERT(btrfs_root_bytenr(&root->root_item) == |
896 | cur->bytenr); | 897 | cur->bytenr); |
897 | if (should_ignore_root(root)) | 898 | if (should_ignore_root(root)) |
898 | list_add(&cur->list, &useless); | 899 | list_add(&cur->list, &useless); |
@@ -927,7 +928,7 @@ again: | |||
927 | need_check = true; | 928 | need_check = true; |
928 | for (; level < BTRFS_MAX_LEVEL; level++) { | 929 | for (; level < BTRFS_MAX_LEVEL; level++) { |
929 | if (!path2->nodes[level]) { | 930 | if (!path2->nodes[level]) { |
930 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 931 | ASSERT(btrfs_root_bytenr(&root->root_item) == |
931 | lower->bytenr); | 932 | lower->bytenr); |
932 | if (should_ignore_root(root)) | 933 | if (should_ignore_root(root)) |
933 | list_add(&lower->list, &useless); | 934 | list_add(&lower->list, &useless); |
@@ -977,12 +978,15 @@ again: | |||
977 | need_check = false; | 978 | need_check = false; |
978 | list_add_tail(&edge->list[UPPER], | 979 | list_add_tail(&edge->list[UPPER], |
979 | &list); | 980 | &list); |
980 | } else | 981 | } else { |
982 | if (upper->checked) | ||
983 | need_check = true; | ||
981 | INIT_LIST_HEAD(&edge->list[UPPER]); | 984 | INIT_LIST_HEAD(&edge->list[UPPER]); |
985 | } | ||
982 | } else { | 986 | } else { |
983 | upper = rb_entry(rb_node, struct backref_node, | 987 | upper = rb_entry(rb_node, struct backref_node, |
984 | rb_node); | 988 | rb_node); |
985 | BUG_ON(!upper->checked); | 989 | ASSERT(upper->checked); |
986 | INIT_LIST_HEAD(&edge->list[UPPER]); | 990 | INIT_LIST_HEAD(&edge->list[UPPER]); |
987 | if (!upper->owner) | 991 | if (!upper->owner) |
988 | upper->owner = btrfs_header_owner(eb); | 992 | upper->owner = btrfs_header_owner(eb); |
@@ -1026,7 +1030,7 @@ next: | |||
1026 | * everything goes well, connect backref nodes and insert backref nodes | 1030 | * everything goes well, connect backref nodes and insert backref nodes |
1027 | * into the cache. | 1031 | * into the cache. |
1028 | */ | 1032 | */ |
1029 | BUG_ON(!node->checked); | 1033 | ASSERT(node->checked); |
1030 | cowonly = node->cowonly; | 1034 | cowonly = node->cowonly; |
1031 | if (!cowonly) { | 1035 | if (!cowonly) { |
1032 | rb_node = tree_insert(&cache->rb_root, node->bytenr, | 1036 | rb_node = tree_insert(&cache->rb_root, node->bytenr, |
@@ -1062,8 +1066,21 @@ next: | |||
1062 | continue; | 1066 | continue; |
1063 | } | 1067 | } |
1064 | 1068 | ||
1065 | BUG_ON(!upper->checked); | 1069 | if (!upper->checked) { |
1066 | BUG_ON(cowonly != upper->cowonly); | 1070 | /* |
1071 | * Still want to blow up for developers since this is a | ||
1072 | * logic bug. | ||
1073 | */ | ||
1074 | ASSERT(0); | ||
1075 | err = -EINVAL; | ||
1076 | goto out; | ||
1077 | } | ||
1078 | if (cowonly != upper->cowonly) { | ||
1079 | ASSERT(0); | ||
1080 | err = -EINVAL; | ||
1081 | goto out; | ||
1082 | } | ||
1083 | |||
1067 | if (!cowonly) { | 1084 | if (!cowonly) { |
1068 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, | 1085 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, |
1069 | &upper->rb_node); | 1086 | &upper->rb_node); |
@@ -1086,7 +1103,7 @@ next: | |||
1086 | while (!list_empty(&useless)) { | 1103 | while (!list_empty(&useless)) { |
1087 | upper = list_entry(useless.next, struct backref_node, list); | 1104 | upper = list_entry(useless.next, struct backref_node, list); |
1088 | list_del_init(&upper->list); | 1105 | list_del_init(&upper->list); |
1089 | BUG_ON(!list_empty(&upper->upper)); | 1106 | ASSERT(list_empty(&upper->upper)); |
1090 | if (upper == node) | 1107 | if (upper == node) |
1091 | node = NULL; | 1108 | node = NULL; |
1092 | if (upper->lowest) { | 1109 | if (upper->lowest) { |
@@ -1119,29 +1136,45 @@ out: | |||
1119 | if (err) { | 1136 | if (err) { |
1120 | while (!list_empty(&useless)) { | 1137 | while (!list_empty(&useless)) { |
1121 | lower = list_entry(useless.next, | 1138 | lower = list_entry(useless.next, |
1122 | struct backref_node, upper); | 1139 | struct backref_node, list); |
1123 | list_del_init(&lower->upper); | 1140 | list_del_init(&lower->list); |
1124 | } | 1141 | } |
1125 | upper = node; | 1142 | while (!list_empty(&list)) { |
1126 | INIT_LIST_HEAD(&list); | 1143 | edge = list_first_entry(&list, struct backref_edge, |
1127 | while (upper) { | 1144 | list[UPPER]); |
1128 | if (RB_EMPTY_NODE(&upper->rb_node)) { | 1145 | list_del(&edge->list[UPPER]); |
1129 | list_splice_tail(&upper->upper, &list); | ||
1130 | free_backref_node(cache, upper); | ||
1131 | } | ||
1132 | |||
1133 | if (list_empty(&list)) | ||
1134 | break; | ||
1135 | |||
1136 | edge = list_entry(list.next, struct backref_edge, | ||
1137 | list[LOWER]); | ||
1138 | list_del(&edge->list[LOWER]); | 1146 | list_del(&edge->list[LOWER]); |
1147 | lower = edge->node[LOWER]; | ||
1139 | upper = edge->node[UPPER]; | 1148 | upper = edge->node[UPPER]; |
1140 | free_backref_edge(cache, edge); | 1149 | free_backref_edge(cache, edge); |
1150 | |||
1151 | /* | ||
1152 | * Lower is no longer linked to any upper backref nodes | ||
1153 | * and isn't in the cache, we can free it ourselves. | ||
1154 | */ | ||
1155 | if (list_empty(&lower->upper) && | ||
1156 | RB_EMPTY_NODE(&lower->rb_node)) | ||
1157 | list_add(&lower->list, &useless); | ||
1158 | |||
1159 | if (!RB_EMPTY_NODE(&upper->rb_node)) | ||
1160 | continue; | ||
1161 | |||
1162 | /* Add this guy's upper edges to the list to proces */ | ||
1163 | list_for_each_entry(edge, &upper->upper, list[LOWER]) | ||
1164 | list_add_tail(&edge->list[UPPER], &list); | ||
1165 | if (list_empty(&upper->upper)) | ||
1166 | list_add(&upper->list, &useless); | ||
1167 | } | ||
1168 | |||
1169 | while (!list_empty(&useless)) { | ||
1170 | lower = list_entry(useless.next, | ||
1171 | struct backref_node, list); | ||
1172 | list_del_init(&lower->list); | ||
1173 | free_backref_node(cache, lower); | ||
1141 | } | 1174 | } |
1142 | return ERR_PTR(err); | 1175 | return ERR_PTR(err); |
1143 | } | 1176 | } |
1144 | BUG_ON(node && node->detached); | 1177 | ASSERT(!node || !node->detached); |
1145 | return node; | 1178 | return node; |
1146 | } | 1179 | } |
1147 | 1180 | ||
@@ -1787,7 +1820,7 @@ again: | |||
1787 | btrfs_node_key_to_cpu(parent, next_key, slot + 1); | 1820 | btrfs_node_key_to_cpu(parent, next_key, slot + 1); |
1788 | 1821 | ||
1789 | old_bytenr = btrfs_node_blockptr(parent, slot); | 1822 | old_bytenr = btrfs_node_blockptr(parent, slot); |
1790 | blocksize = btrfs_level_size(dest, level - 1); | 1823 | blocksize = dest->nodesize; |
1791 | old_ptr_gen = btrfs_node_ptr_generation(parent, slot); | 1824 | old_ptr_gen = btrfs_node_ptr_generation(parent, slot); |
1792 | 1825 | ||
1793 | if (level <= max_level) { | 1826 | if (level <= max_level) { |
@@ -1813,8 +1846,7 @@ again: | |||
1813 | break; | 1846 | break; |
1814 | } | 1847 | } |
1815 | 1848 | ||
1816 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1849 | eb = read_tree_block(dest, old_bytenr, old_ptr_gen); |
1817 | old_ptr_gen); | ||
1818 | if (!eb || !extent_buffer_uptodate(eb)) { | 1850 | if (!eb || !extent_buffer_uptodate(eb)) { |
1819 | ret = (!eb) ? -ENOMEM : -EIO; | 1851 | ret = (!eb) ? -ENOMEM : -EIO; |
1820 | free_extent_buffer(eb); | 1852 | free_extent_buffer(eb); |
@@ -1944,7 +1976,6 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, | |||
1944 | u64 bytenr; | 1976 | u64 bytenr; |
1945 | u64 ptr_gen = 0; | 1977 | u64 ptr_gen = 0; |
1946 | u64 last_snapshot; | 1978 | u64 last_snapshot; |
1947 | u32 blocksize; | ||
1948 | u32 nritems; | 1979 | u32 nritems; |
1949 | 1980 | ||
1950 | last_snapshot = btrfs_root_last_snapshot(&root->root_item); | 1981 | last_snapshot = btrfs_root_last_snapshot(&root->root_item); |
@@ -1970,8 +2001,7 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, | |||
1970 | } | 2001 | } |
1971 | 2002 | ||
1972 | bytenr = btrfs_node_blockptr(eb, path->slots[i]); | 2003 | bytenr = btrfs_node_blockptr(eb, path->slots[i]); |
1973 | blocksize = btrfs_level_size(root, i - 1); | 2004 | eb = read_tree_block(root, bytenr, ptr_gen); |
1974 | eb = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
1975 | if (!eb || !extent_buffer_uptodate(eb)) { | 2005 | if (!eb || !extent_buffer_uptodate(eb)) { |
1976 | free_extent_buffer(eb); | 2006 | free_extent_buffer(eb); |
1977 | return -EIO; | 2007 | return -EIO; |
@@ -2316,7 +2346,7 @@ void free_reloc_roots(struct list_head *list) | |||
2316 | } | 2346 | } |
2317 | 2347 | ||
2318 | static noinline_for_stack | 2348 | static noinline_for_stack |
2319 | int merge_reloc_roots(struct reloc_control *rc) | 2349 | void merge_reloc_roots(struct reloc_control *rc) |
2320 | { | 2350 | { |
2321 | struct btrfs_root *root; | 2351 | struct btrfs_root *root; |
2322 | struct btrfs_root *reloc_root; | 2352 | struct btrfs_root *reloc_root; |
@@ -2397,7 +2427,6 @@ out: | |||
2397 | } | 2427 | } |
2398 | 2428 | ||
2399 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); | 2429 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); |
2400 | return ret; | ||
2401 | } | 2430 | } |
2402 | 2431 | ||
2403 | static void free_block_list(struct rb_root *blocks) | 2432 | static void free_block_list(struct rb_root *blocks) |
@@ -2544,8 +2573,7 @@ u64 calcu_metadata_size(struct reloc_control *rc, | |||
2544 | if (next->processed && (reserve || next != node)) | 2573 | if (next->processed && (reserve || next != node)) |
2545 | break; | 2574 | break; |
2546 | 2575 | ||
2547 | num_bytes += btrfs_level_size(rc->extent_root, | 2576 | num_bytes += rc->extent_root->nodesize; |
2548 | next->level); | ||
2549 | 2577 | ||
2550 | if (list_empty(&next->upper)) | 2578 | if (list_empty(&next->upper)) |
2551 | break; | 2579 | break; |
@@ -2679,9 +2707,9 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2679 | goto next; | 2707 | goto next; |
2680 | } | 2708 | } |
2681 | 2709 | ||
2682 | blocksize = btrfs_level_size(root, node->level); | 2710 | blocksize = root->nodesize; |
2683 | generation = btrfs_node_ptr_generation(upper->eb, slot); | 2711 | generation = btrfs_node_ptr_generation(upper->eb, slot); |
2684 | eb = read_tree_block(root, bytenr, blocksize, generation); | 2712 | eb = read_tree_block(root, bytenr, generation); |
2685 | if (!eb || !extent_buffer_uptodate(eb)) { | 2713 | if (!eb || !extent_buffer_uptodate(eb)) { |
2686 | free_extent_buffer(eb); | 2714 | free_extent_buffer(eb); |
2687 | err = -EIO; | 2715 | err = -EIO; |
@@ -2789,7 +2817,7 @@ static void __mark_block_processed(struct reloc_control *rc, | |||
2789 | u32 blocksize; | 2817 | u32 blocksize; |
2790 | if (node->level == 0 || | 2818 | if (node->level == 0 || |
2791 | in_block_group(node->bytenr, rc->block_group)) { | 2819 | in_block_group(node->bytenr, rc->block_group)) { |
2792 | blocksize = btrfs_level_size(rc->extent_root, node->level); | 2820 | blocksize = rc->extent_root->nodesize; |
2793 | mark_block_processed(rc, node->bytenr, blocksize); | 2821 | mark_block_processed(rc, node->bytenr, blocksize); |
2794 | } | 2822 | } |
2795 | node->processed = 1; | 2823 | node->processed = 1; |
@@ -2843,7 +2871,7 @@ static int get_tree_block_key(struct reloc_control *rc, | |||
2843 | 2871 | ||
2844 | BUG_ON(block->key_ready); | 2872 | BUG_ON(block->key_ready); |
2845 | eb = read_tree_block(rc->extent_root, block->bytenr, | 2873 | eb = read_tree_block(rc->extent_root, block->bytenr, |
2846 | block->key.objectid, block->key.offset); | 2874 | block->key.offset); |
2847 | if (!eb || !extent_buffer_uptodate(eb)) { | 2875 | if (!eb || !extent_buffer_uptodate(eb)) { |
2848 | free_extent_buffer(eb); | 2876 | free_extent_buffer(eb); |
2849 | return -EIO; | 2877 | return -EIO; |
@@ -2858,20 +2886,6 @@ static int get_tree_block_key(struct reloc_control *rc, | |||
2858 | return 0; | 2886 | return 0; |
2859 | } | 2887 | } |
2860 | 2888 | ||
2861 | static int reada_tree_block(struct reloc_control *rc, | ||
2862 | struct tree_block *block) | ||
2863 | { | ||
2864 | BUG_ON(block->key_ready); | ||
2865 | if (block->key.type == BTRFS_METADATA_ITEM_KEY) | ||
2866 | readahead_tree_block(rc->extent_root, block->bytenr, | ||
2867 | block->key.objectid, | ||
2868 | rc->extent_root->leafsize); | ||
2869 | else | ||
2870 | readahead_tree_block(rc->extent_root, block->bytenr, | ||
2871 | block->key.objectid, block->key.offset); | ||
2872 | return 0; | ||
2873 | } | ||
2874 | |||
2875 | /* | 2889 | /* |
2876 | * helper function to relocate a tree block | 2890 | * helper function to relocate a tree block |
2877 | */ | 2891 | */ |
@@ -2951,7 +2965,8 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2951 | while (rb_node) { | 2965 | while (rb_node) { |
2952 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2966 | block = rb_entry(rb_node, struct tree_block, rb_node); |
2953 | if (!block->key_ready) | 2967 | if (!block->key_ready) |
2954 | reada_tree_block(rc, block); | 2968 | readahead_tree_block(rc->extent_root, block->bytenr, |
2969 | block->key.objectid); | ||
2955 | rb_node = rb_next(rb_node); | 2970 | rb_node = rb_next(rb_node); |
2956 | } | 2971 | } |
2957 | 2972 | ||
@@ -3313,7 +3328,7 @@ static int add_tree_block(struct reloc_control *rc, | |||
3313 | return -ENOMEM; | 3328 | return -ENOMEM; |
3314 | 3329 | ||
3315 | block->bytenr = extent_key->objectid; | 3330 | block->bytenr = extent_key->objectid; |
3316 | block->key.objectid = rc->extent_root->leafsize; | 3331 | block->key.objectid = rc->extent_root->nodesize; |
3317 | block->key.offset = generation; | 3332 | block->key.offset = generation; |
3318 | block->level = level; | 3333 | block->level = level; |
3319 | block->key_ready = 0; | 3334 | block->key_ready = 0; |
@@ -3640,7 +3655,7 @@ int add_data_references(struct reloc_control *rc, | |||
3640 | struct btrfs_extent_inline_ref *iref; | 3655 | struct btrfs_extent_inline_ref *iref; |
3641 | unsigned long ptr; | 3656 | unsigned long ptr; |
3642 | unsigned long end; | 3657 | unsigned long end; |
3643 | u32 blocksize = btrfs_level_size(rc->extent_root, 0); | 3658 | u32 blocksize = rc->extent_root->nodesize; |
3644 | int ret = 0; | 3659 | int ret = 0; |
3645 | int err = 0; | 3660 | int err = 0; |
3646 | 3661 | ||
@@ -3783,7 +3798,7 @@ next: | |||
3783 | } | 3798 | } |
3784 | 3799 | ||
3785 | if (key.type == BTRFS_METADATA_ITEM_KEY && | 3800 | if (key.type == BTRFS_METADATA_ITEM_KEY && |
3786 | key.objectid + rc->extent_root->leafsize <= | 3801 | key.objectid + rc->extent_root->nodesize <= |
3787 | rc->search_start) { | 3802 | rc->search_start) { |
3788 | path->slots[0]++; | 3803 | path->slots[0]++; |
3789 | goto next; | 3804 | goto next; |
@@ -3801,7 +3816,7 @@ next: | |||
3801 | rc->search_start = key.objectid + key.offset; | 3816 | rc->search_start = key.objectid + key.offset; |
3802 | else | 3817 | else |
3803 | rc->search_start = key.objectid + | 3818 | rc->search_start = key.objectid + |
3804 | rc->extent_root->leafsize; | 3819 | rc->extent_root->nodesize; |
3805 | memcpy(extent_key, &key, sizeof(key)); | 3820 | memcpy(extent_key, &key, sizeof(key)); |
3806 | return 0; | 3821 | return 0; |
3807 | } | 3822 | } |
@@ -4096,7 +4111,6 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
4096 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | | 4111 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | |
4097 | BTRFS_INODE_PREALLOC); | 4112 | BTRFS_INODE_PREALLOC); |
4098 | btrfs_mark_buffer_dirty(leaf); | 4113 | btrfs_mark_buffer_dirty(leaf); |
4099 | btrfs_release_path(path); | ||
4100 | out: | 4114 | out: |
4101 | btrfs_free_path(path); | 4115 | btrfs_free_path(path); |
4102 | return ret; | 4116 | return ret; |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b6d198f5181e..efa083113827 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -137,7 +137,6 @@ struct scrub_ctx { | |||
137 | int pages_per_rd_bio; | 137 | int pages_per_rd_bio; |
138 | u32 sectorsize; | 138 | u32 sectorsize; |
139 | u32 nodesize; | 139 | u32 nodesize; |
140 | u32 leafsize; | ||
141 | 140 | ||
142 | int is_dev_replace; | 141 | int is_dev_replace; |
143 | struct scrub_wr_ctx wr_ctx; | 142 | struct scrub_wr_ctx wr_ctx; |
@@ -178,17 +177,12 @@ struct scrub_copy_nocow_ctx { | |||
178 | struct scrub_warning { | 177 | struct scrub_warning { |
179 | struct btrfs_path *path; | 178 | struct btrfs_path *path; |
180 | u64 extent_item_size; | 179 | u64 extent_item_size; |
181 | char *scratch_buf; | ||
182 | char *msg_buf; | ||
183 | const char *errstr; | 180 | const char *errstr; |
184 | sector_t sector; | 181 | sector_t sector; |
185 | u64 logical; | 182 | u64 logical; |
186 | struct btrfs_device *dev; | 183 | struct btrfs_device *dev; |
187 | int msg_bufsize; | ||
188 | int scratch_bufsize; | ||
189 | }; | 184 | }; |
190 | 185 | ||
191 | |||
192 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx); | 186 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx); |
193 | static void scrub_pending_bio_dec(struct scrub_ctx *sctx); | 187 | static void scrub_pending_bio_dec(struct scrub_ctx *sctx); |
194 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); | 188 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); |
@@ -428,8 +422,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
428 | sbio->index = i; | 422 | sbio->index = i; |
429 | sbio->sctx = sctx; | 423 | sbio->sctx = sctx; |
430 | sbio->page_count = 0; | 424 | sbio->page_count = 0; |
431 | btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, | 425 | btrfs_init_work(&sbio->work, btrfs_scrub_helper, |
432 | NULL, NULL); | 426 | scrub_bio_end_io_worker, NULL, NULL); |
433 | 427 | ||
434 | if (i != SCRUB_BIOS_PER_SCTX - 1) | 428 | if (i != SCRUB_BIOS_PER_SCTX - 1) |
435 | sctx->bios[i]->next_free = i + 1; | 429 | sctx->bios[i]->next_free = i + 1; |
@@ -438,7 +432,6 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
438 | } | 432 | } |
439 | sctx->first_free = 0; | 433 | sctx->first_free = 0; |
440 | sctx->nodesize = dev->dev_root->nodesize; | 434 | sctx->nodesize = dev->dev_root->nodesize; |
441 | sctx->leafsize = dev->dev_root->leafsize; | ||
442 | sctx->sectorsize = dev->dev_root->sectorsize; | 435 | sctx->sectorsize = dev->dev_root->sectorsize; |
443 | atomic_set(&sctx->bios_in_flight, 0); | 436 | atomic_set(&sctx->bios_in_flight, 0); |
444 | atomic_set(&sctx->workers_pending, 0); | 437 | atomic_set(&sctx->workers_pending, 0); |
@@ -553,7 +546,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
553 | u64 ref_root; | 546 | u64 ref_root; |
554 | u32 item_size; | 547 | u32 item_size; |
555 | u8 ref_level; | 548 | u8 ref_level; |
556 | const int bufsize = 4096; | ||
557 | int ret; | 549 | int ret; |
558 | 550 | ||
559 | WARN_ON(sblock->page_count < 1); | 551 | WARN_ON(sblock->page_count < 1); |
@@ -561,18 +553,13 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
561 | fs_info = sblock->sctx->dev_root->fs_info; | 553 | fs_info = sblock->sctx->dev_root->fs_info; |
562 | 554 | ||
563 | path = btrfs_alloc_path(); | 555 | path = btrfs_alloc_path(); |
556 | if (!path) | ||
557 | return; | ||
564 | 558 | ||
565 | swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); | ||
566 | swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); | ||
567 | swarn.sector = (sblock->pagev[0]->physical) >> 9; | 559 | swarn.sector = (sblock->pagev[0]->physical) >> 9; |
568 | swarn.logical = sblock->pagev[0]->logical; | 560 | swarn.logical = sblock->pagev[0]->logical; |
569 | swarn.errstr = errstr; | 561 | swarn.errstr = errstr; |
570 | swarn.dev = NULL; | 562 | swarn.dev = NULL; |
571 | swarn.msg_bufsize = bufsize; | ||
572 | swarn.scratch_bufsize = bufsize; | ||
573 | |||
574 | if (!path || !swarn.scratch_buf || !swarn.msg_buf) | ||
575 | goto out; | ||
576 | 563 | ||
577 | ret = extent_from_logical(fs_info, swarn.logical, path, &found_key, | 564 | ret = extent_from_logical(fs_info, swarn.logical, path, &found_key, |
578 | &flags); | 565 | &flags); |
@@ -613,8 +600,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
613 | 600 | ||
614 | out: | 601 | out: |
615 | btrfs_free_path(path); | 602 | btrfs_free_path(path); |
616 | kfree(swarn.scratch_buf); | ||
617 | kfree(swarn.msg_buf); | ||
618 | } | 603 | } |
619 | 604 | ||
620 | static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) | 605 | static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) |
@@ -681,9 +666,9 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) | |||
681 | ret = -EIO; | 666 | ret = -EIO; |
682 | goto out; | 667 | goto out; |
683 | } | 668 | } |
684 | fs_info = BTRFS_I(inode)->root->fs_info; | 669 | ret = repair_io_failure(inode, offset, PAGE_SIZE, |
685 | ret = repair_io_failure(fs_info, offset, PAGE_SIZE, | ||
686 | fixup->logical, page, | 670 | fixup->logical, page, |
671 | offset - page_offset(page), | ||
687 | fixup->mirror_num); | 672 | fixup->mirror_num); |
688 | unlock_page(page); | 673 | unlock_page(page); |
689 | corrected = !ret; | 674 | corrected = !ret; |
@@ -999,8 +984,8 @@ nodatasum_case: | |||
999 | fixup_nodatasum->root = fs_info->extent_root; | 984 | fixup_nodatasum->root = fs_info->extent_root; |
1000 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; | 985 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; |
1001 | scrub_pending_trans_workers_inc(sctx); | 986 | scrub_pending_trans_workers_inc(sctx); |
1002 | btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, | 987 | btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper, |
1003 | NULL, NULL); | 988 | scrub_fixup_nodatasum, NULL, NULL); |
1004 | btrfs_queue_work(fs_info->scrub_workers, | 989 | btrfs_queue_work(fs_info->scrub_workers, |
1005 | &fixup_nodatasum->work); | 990 | &fixup_nodatasum->work); |
1006 | goto out; | 991 | goto out; |
@@ -1361,6 +1346,16 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
1361 | return; | 1346 | return; |
1362 | } | 1347 | } |
1363 | 1348 | ||
1349 | static inline int scrub_check_fsid(u8 fsid[], | ||
1350 | struct scrub_page *spage) | ||
1351 | { | ||
1352 | struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices; | ||
1353 | int ret; | ||
1354 | |||
1355 | ret = memcmp(fsid, fs_devices->fsid, BTRFS_UUID_SIZE); | ||
1356 | return !ret; | ||
1357 | } | ||
1358 | |||
1364 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | 1359 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, |
1365 | struct scrub_block *sblock, | 1360 | struct scrub_block *sblock, |
1366 | int is_metadata, int have_csum, | 1361 | int is_metadata, int have_csum, |
@@ -1380,7 +1375,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
1380 | h = (struct btrfs_header *)mapped_buffer; | 1375 | h = (struct btrfs_header *)mapped_buffer; |
1381 | 1376 | ||
1382 | if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) || | 1377 | if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) || |
1383 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || | 1378 | !scrub_check_fsid(h->fsid, sblock->pagev[0]) || |
1384 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, | 1379 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, |
1385 | BTRFS_UUID_SIZE)) { | 1380 | BTRFS_UUID_SIZE)) { |
1386 | sblock->header_error = 1; | 1381 | sblock->header_error = 1; |
@@ -1616,7 +1611,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) | |||
1616 | sbio->err = err; | 1611 | sbio->err = err; |
1617 | sbio->bio = bio; | 1612 | sbio->bio = bio; |
1618 | 1613 | ||
1619 | btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); | 1614 | btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, |
1615 | scrub_wr_bio_end_io_worker, NULL, NULL); | ||
1620 | btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); | 1616 | btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); |
1621 | } | 1617 | } |
1622 | 1618 | ||
@@ -1750,14 +1746,13 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
1750 | if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) | 1746 | if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) |
1751 | ++fail; | 1747 | ++fail; |
1752 | 1748 | ||
1753 | if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | 1749 | if (!scrub_check_fsid(h->fsid, sblock->pagev[0])) |
1754 | ++fail; | 1750 | ++fail; |
1755 | 1751 | ||
1756 | if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, | 1752 | if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, |
1757 | BTRFS_UUID_SIZE)) | 1753 | BTRFS_UUID_SIZE)) |
1758 | ++fail; | 1754 | ++fail; |
1759 | 1755 | ||
1760 | WARN_ON(sctx->nodesize != sctx->leafsize); | ||
1761 | len = sctx->nodesize - BTRFS_CSUM_SIZE; | 1756 | len = sctx->nodesize - BTRFS_CSUM_SIZE; |
1762 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; | 1757 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; |
1763 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; | 1758 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; |
@@ -1790,8 +1785,6 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1790 | { | 1785 | { |
1791 | struct btrfs_super_block *s; | 1786 | struct btrfs_super_block *s; |
1792 | struct scrub_ctx *sctx = sblock->sctx; | 1787 | struct scrub_ctx *sctx = sblock->sctx; |
1793 | struct btrfs_root *root = sctx->dev_root; | ||
1794 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1795 | u8 calculated_csum[BTRFS_CSUM_SIZE]; | 1788 | u8 calculated_csum[BTRFS_CSUM_SIZE]; |
1796 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; | 1789 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; |
1797 | struct page *page; | 1790 | struct page *page; |
@@ -1816,7 +1809,7 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1816 | if (sblock->pagev[0]->generation != btrfs_super_generation(s)) | 1809 | if (sblock->pagev[0]->generation != btrfs_super_generation(s)) |
1817 | ++fail_gen; | 1810 | ++fail_gen; |
1818 | 1811 | ||
1819 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | 1812 | if (!scrub_check_fsid(s->fsid, sblock->pagev[0])) |
1820 | ++fail_cor; | 1813 | ++fail_cor; |
1821 | 1814 | ||
1822 | len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; | 1815 | len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; |
@@ -2195,7 +2188,6 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
2195 | sctx->stat.data_bytes_scrubbed += len; | 2188 | sctx->stat.data_bytes_scrubbed += len; |
2196 | spin_unlock(&sctx->stat_lock); | 2189 | spin_unlock(&sctx->stat_lock); |
2197 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 2190 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
2198 | WARN_ON(sctx->nodesize != sctx->leafsize); | ||
2199 | blocksize = sctx->nodesize; | 2191 | blocksize = sctx->nodesize; |
2200 | spin_lock(&sctx->stat_lock); | 2192 | spin_lock(&sctx->stat_lock); |
2201 | sctx->stat.tree_extents_scrubbed++; | 2193 | sctx->stat.tree_extents_scrubbed++; |
@@ -2486,7 +2478,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2486 | btrfs_item_key_to_cpu(l, &key, slot); | 2478 | btrfs_item_key_to_cpu(l, &key, slot); |
2487 | 2479 | ||
2488 | if (key.type == BTRFS_METADATA_ITEM_KEY) | 2480 | if (key.type == BTRFS_METADATA_ITEM_KEY) |
2489 | bytes = root->leafsize; | 2481 | bytes = root->nodesize; |
2490 | else | 2482 | else |
2491 | bytes = key.offset; | 2483 | bytes = key.offset; |
2492 | 2484 | ||
@@ -2713,7 +2705,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
2713 | if (found_key.objectid != scrub_dev->devid) | 2705 | if (found_key.objectid != scrub_dev->devid) |
2714 | break; | 2706 | break; |
2715 | 2707 | ||
2716 | if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY) | 2708 | if (found_key.type != BTRFS_DEV_EXTENT_KEY) |
2717 | break; | 2709 | break; |
2718 | 2710 | ||
2719 | if (found_key.offset >= end) | 2711 | if (found_key.offset >= end) |
@@ -2827,11 +2819,16 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, | |||
2827 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) | 2819 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
2828 | return -EIO; | 2820 | return -EIO; |
2829 | 2821 | ||
2830 | gen = root->fs_info->last_trans_committed; | 2822 | /* Seed devices of a new filesystem has their own generation. */ |
2823 | if (scrub_dev->fs_devices != root->fs_info->fs_devices) | ||
2824 | gen = scrub_dev->generation; | ||
2825 | else | ||
2826 | gen = root->fs_info->last_trans_committed; | ||
2831 | 2827 | ||
2832 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | 2828 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { |
2833 | bytenr = btrfs_sb_offset(i); | 2829 | bytenr = btrfs_sb_offset(i); |
2834 | if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes) | 2830 | if (bytenr + BTRFS_SUPER_INFO_SIZE > |
2831 | scrub_dev->commit_total_bytes) | ||
2835 | break; | 2832 | break; |
2836 | 2833 | ||
2837 | ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, | 2834 | ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, |
@@ -2904,21 +2901,11 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | |||
2904 | struct scrub_ctx *sctx; | 2901 | struct scrub_ctx *sctx; |
2905 | int ret; | 2902 | int ret; |
2906 | struct btrfs_device *dev; | 2903 | struct btrfs_device *dev; |
2904 | struct rcu_string *name; | ||
2907 | 2905 | ||
2908 | if (btrfs_fs_closing(fs_info)) | 2906 | if (btrfs_fs_closing(fs_info)) |
2909 | return -EINVAL; | 2907 | return -EINVAL; |
2910 | 2908 | ||
2911 | /* | ||
2912 | * check some assumptions | ||
2913 | */ | ||
2914 | if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) { | ||
2915 | btrfs_err(fs_info, | ||
2916 | "scrub: size assumption nodesize == leafsize (%d == %d) fails", | ||
2917 | fs_info->chunk_root->nodesize, | ||
2918 | fs_info->chunk_root->leafsize); | ||
2919 | return -EINVAL; | ||
2920 | } | ||
2921 | |||
2922 | if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) { | 2909 | if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) { |
2923 | /* | 2910 | /* |
2924 | * in this case scrub is unable to calculate the checksum | 2911 | * in this case scrub is unable to calculate the checksum |
@@ -2965,6 +2952,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | |||
2965 | return -ENODEV; | 2952 | return -ENODEV; |
2966 | } | 2953 | } |
2967 | 2954 | ||
2955 | if (!is_dev_replace && !readonly && !dev->writeable) { | ||
2956 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
2957 | rcu_read_lock(); | ||
2958 | name = rcu_dereference(dev->name); | ||
2959 | btrfs_err(fs_info, "scrub: device %s is not writable", | ||
2960 | name->str); | ||
2961 | rcu_read_unlock(); | ||
2962 | return -EROFS; | ||
2963 | } | ||
2964 | |||
2968 | mutex_lock(&fs_info->scrub_lock); | 2965 | mutex_lock(&fs_info->scrub_lock); |
2969 | if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { | 2966 | if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { |
2970 | mutex_unlock(&fs_info->scrub_lock); | 2967 | mutex_unlock(&fs_info->scrub_lock); |
@@ -3203,7 +3200,8 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
3203 | nocow_ctx->len = len; | 3200 | nocow_ctx->len = len; |
3204 | nocow_ctx->mirror_num = mirror_num; | 3201 | nocow_ctx->mirror_num = mirror_num; |
3205 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; | 3202 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; |
3206 | btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); | 3203 | btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper, |
3204 | copy_nocow_pages_worker, NULL, NULL); | ||
3207 | INIT_LIST_HEAD(&nocow_ctx->inodes); | 3205 | INIT_LIST_HEAD(&nocow_ctx->inodes); |
3208 | btrfs_queue_work(fs_info->scrub_nocow_workers, | 3206 | btrfs_queue_work(fs_info->scrub_nocow_workers, |
3209 | &nocow_ctx->work); | 3207 | &nocow_ctx->work); |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 6528aa662181..874828dd0a86 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -515,7 +515,8 @@ static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) | |||
515 | set_fs(KERNEL_DS); | 515 | set_fs(KERNEL_DS); |
516 | 516 | ||
517 | while (pos < len) { | 517 | while (pos < len) { |
518 | ret = vfs_write(filp, (char *)buf + pos, len - pos, off); | 518 | ret = vfs_write(filp, (__force const char __user *)buf + pos, |
519 | len - pos, off); | ||
519 | /* TODO handle that correctly */ | 520 | /* TODO handle that correctly */ |
520 | /*if (ret == -ERESTARTSYS) { | 521 | /*if (ret == -ERESTARTSYS) { |
521 | continue; | 522 | continue; |
@@ -985,11 +986,13 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
985 | int num; | 986 | int num; |
986 | u8 type; | 987 | u8 type; |
987 | 988 | ||
988 | if (found_key->type == BTRFS_XATTR_ITEM_KEY) | 989 | /* |
989 | buf_len = BTRFS_MAX_XATTR_SIZE(root); | 990 | * Start with a small buffer (1 page). If later we end up needing more |
990 | else | 991 | * space, which can happen for xattrs on a fs with a leaf size greater |
991 | buf_len = PATH_MAX; | 992 | * then the page size, attempt to increase the buffer. Typically xattr |
992 | 993 | * values are small. | |
994 | */ | ||
995 | buf_len = PATH_MAX; | ||
993 | buf = kmalloc(buf_len, GFP_NOFS); | 996 | buf = kmalloc(buf_len, GFP_NOFS); |
994 | if (!buf) { | 997 | if (!buf) { |
995 | ret = -ENOMEM; | 998 | ret = -ENOMEM; |
@@ -1016,7 +1019,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
1016 | ret = -ENAMETOOLONG; | 1019 | ret = -ENAMETOOLONG; |
1017 | goto out; | 1020 | goto out; |
1018 | } | 1021 | } |
1019 | if (name_len + data_len > buf_len) { | 1022 | if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) { |
1020 | ret = -E2BIG; | 1023 | ret = -E2BIG; |
1021 | goto out; | 1024 | goto out; |
1022 | } | 1025 | } |
@@ -1024,12 +1027,34 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
1024 | /* | 1027 | /* |
1025 | * Path too long | 1028 | * Path too long |
1026 | */ | 1029 | */ |
1027 | if (name_len + data_len > buf_len) { | 1030 | if (name_len + data_len > PATH_MAX) { |
1028 | ret = -ENAMETOOLONG; | 1031 | ret = -ENAMETOOLONG; |
1029 | goto out; | 1032 | goto out; |
1030 | } | 1033 | } |
1031 | } | 1034 | } |
1032 | 1035 | ||
1036 | if (name_len + data_len > buf_len) { | ||
1037 | buf_len = name_len + data_len; | ||
1038 | if (is_vmalloc_addr(buf)) { | ||
1039 | vfree(buf); | ||
1040 | buf = NULL; | ||
1041 | } else { | ||
1042 | char *tmp = krealloc(buf, buf_len, | ||
1043 | GFP_NOFS | __GFP_NOWARN); | ||
1044 | |||
1045 | if (!tmp) | ||
1046 | kfree(buf); | ||
1047 | buf = tmp; | ||
1048 | } | ||
1049 | if (!buf) { | ||
1050 | buf = vmalloc(buf_len); | ||
1051 | if (!buf) { | ||
1052 | ret = -ENOMEM; | ||
1053 | goto out; | ||
1054 | } | ||
1055 | } | ||
1056 | } | ||
1057 | |||
1033 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), | 1058 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), |
1034 | name_len + data_len); | 1059 | name_len + data_len); |
1035 | 1060 | ||
@@ -1050,7 +1075,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
1050 | } | 1075 | } |
1051 | 1076 | ||
1052 | out: | 1077 | out: |
1053 | kfree(buf); | 1078 | kvfree(buf); |
1054 | return ret; | 1079 | return ret; |
1055 | } | 1080 | } |
1056 | 1081 | ||
@@ -3302,7 +3327,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3302 | if (ret < 0 && ret != -ENOENT) { | 3327 | if (ret < 0 && ret != -ENOENT) { |
3303 | goto out; | 3328 | goto out; |
3304 | } else if (ret == -ENOENT) { | 3329 | } else if (ret == -ENOENT) { |
3305 | ret = 1; | 3330 | ret = 0; |
3306 | break; | 3331 | break; |
3307 | } | 3332 | } |
3308 | 3333 | ||
@@ -5703,7 +5728,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5703 | NULL); | 5728 | NULL); |
5704 | sort_clone_roots = 1; | 5729 | sort_clone_roots = 1; |
5705 | 5730 | ||
5706 | current->journal_info = (void *)BTRFS_SEND_TRANS_STUB; | 5731 | current->journal_info = BTRFS_SEND_TRANS_STUB; |
5707 | ret = send_subvol(sctx); | 5732 | ret = send_subvol(sctx); |
5708 | current->journal_info = NULL; | 5733 | current->journal_info = NULL; |
5709 | if (ret < 0) | 5734 | if (ret < 0) |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c4124de4435b..a2b97ef10317 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include "backref.h" | 60 | #include "backref.h" |
61 | #include "tests/btrfs-tests.h" | 61 | #include "tests/btrfs-tests.h" |
62 | 62 | ||
63 | #include "qgroup.h" | ||
63 | #define CREATE_TRACE_POINTS | 64 | #define CREATE_TRACE_POINTS |
64 | #include <trace/events/btrfs.h> | 65 | #include <trace/events/btrfs.h> |
65 | 66 | ||
@@ -307,13 +308,7 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, | |||
307 | 308 | ||
308 | static void btrfs_put_super(struct super_block *sb) | 309 | static void btrfs_put_super(struct super_block *sb) |
309 | { | 310 | { |
310 | (void)close_ctree(btrfs_sb(sb)->tree_root); | 311 | close_ctree(btrfs_sb(sb)->tree_root); |
311 | /* FIXME: need to fix VFS to return error? */ | ||
312 | /* AV: return it _where_? ->put_super() can be triggered by any number | ||
313 | * of async events, up to and including delivery of SIGKILL to the | ||
314 | * last process that kept it busy. Or segfault in the aforementioned | ||
315 | * process... Whom would you report that to? | ||
316 | */ | ||
317 | } | 312 | } |
318 | 313 | ||
319 | enum { | 314 | enum { |
@@ -400,7 +395,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
400 | int ret = 0; | 395 | int ret = 0; |
401 | char *compress_type; | 396 | char *compress_type; |
402 | bool compress_force = false; | 397 | bool compress_force = false; |
403 | bool compress = false; | ||
404 | 398 | ||
405 | cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); | 399 | cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); |
406 | if (cache_gen) | 400 | if (cache_gen) |
@@ -478,7 +472,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
478 | /* Fallthrough */ | 472 | /* Fallthrough */ |
479 | case Opt_compress: | 473 | case Opt_compress: |
480 | case Opt_compress_type: | 474 | case Opt_compress_type: |
481 | compress = true; | ||
482 | if (token == Opt_compress || | 475 | if (token == Opt_compress || |
483 | token == Opt_compress_force || | 476 | token == Opt_compress_force || |
484 | strcmp(args[0].from, "zlib") == 0) { | 477 | strcmp(args[0].from, "zlib") == 0) { |
@@ -508,11 +501,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
508 | btrfs_set_and_info(root, FORCE_COMPRESS, | 501 | btrfs_set_and_info(root, FORCE_COMPRESS, |
509 | "force %s compression", | 502 | "force %s compression", |
510 | compress_type); | 503 | compress_type); |
511 | } else if (compress) { | 504 | } else { |
512 | if (!btrfs_test_opt(root, COMPRESS)) | 505 | if (!btrfs_test_opt(root, COMPRESS)) |
513 | btrfs_info(root->fs_info, | 506 | btrfs_info(root->fs_info, |
514 | "btrfs: use %s compression", | 507 | "btrfs: use %s compression", |
515 | compress_type); | 508 | compress_type); |
509 | /* | ||
510 | * If we remount from compress-force=xxx to | ||
511 | * compress=xxx, we need clear FORCE_COMPRESS | ||
512 | * flag, otherwise, there is no way for users | ||
513 | * to disable forcible compression separately. | ||
514 | */ | ||
515 | btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); | ||
516 | } | 516 | } |
517 | break; | 517 | break; |
518 | case Opt_ssd: | 518 | case Opt_ssd: |
@@ -1014,7 +1014,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1014 | seq_puts(seq, ",nodatacow"); | 1014 | seq_puts(seq, ",nodatacow"); |
1015 | if (btrfs_test_opt(root, NOBARRIER)) | 1015 | if (btrfs_test_opt(root, NOBARRIER)) |
1016 | seq_puts(seq, ",nobarrier"); | 1016 | seq_puts(seq, ",nobarrier"); |
1017 | if (info->max_inline != 8192 * 1024) | 1017 | if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE) |
1018 | seq_printf(seq, ",max_inline=%llu", info->max_inline); | 1018 | seq_printf(seq, ",max_inline=%llu", info->max_inline); |
1019 | if (info->alloc_start != 0) | 1019 | if (info->alloc_start != 0) |
1020 | seq_printf(seq, ",alloc_start=%llu", info->alloc_start); | 1020 | seq_printf(seq, ",alloc_start=%llu", info->alloc_start); |
@@ -1215,6 +1215,56 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, | |||
1215 | return root; | 1215 | return root; |
1216 | } | 1216 | } |
1217 | 1217 | ||
1218 | static int parse_security_options(char *orig_opts, | ||
1219 | struct security_mnt_opts *sec_opts) | ||
1220 | { | ||
1221 | char *secdata = NULL; | ||
1222 | int ret = 0; | ||
1223 | |||
1224 | secdata = alloc_secdata(); | ||
1225 | if (!secdata) | ||
1226 | return -ENOMEM; | ||
1227 | ret = security_sb_copy_data(orig_opts, secdata); | ||
1228 | if (ret) { | ||
1229 | free_secdata(secdata); | ||
1230 | return ret; | ||
1231 | } | ||
1232 | ret = security_sb_parse_opts_str(secdata, sec_opts); | ||
1233 | free_secdata(secdata); | ||
1234 | return ret; | ||
1235 | } | ||
1236 | |||
1237 | static int setup_security_options(struct btrfs_fs_info *fs_info, | ||
1238 | struct super_block *sb, | ||
1239 | struct security_mnt_opts *sec_opts) | ||
1240 | { | ||
1241 | int ret = 0; | ||
1242 | |||
1243 | /* | ||
1244 | * Call security_sb_set_mnt_opts() to check whether new sec_opts | ||
1245 | * is valid. | ||
1246 | */ | ||
1247 | ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL); | ||
1248 | if (ret) | ||
1249 | return ret; | ||
1250 | |||
1251 | #ifdef CONFIG_SECURITY | ||
1252 | if (!fs_info->security_opts.num_mnt_opts) { | ||
1253 | /* first time security setup, copy sec_opts to fs_info */ | ||
1254 | memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts)); | ||
1255 | } else { | ||
1256 | /* | ||
1257 | * Since SELinux(the only one supports security_mnt_opts) does | ||
1258 | * NOT support changing context during remount/mount same sb, | ||
1259 | * This must be the same or part of the same security options, | ||
1260 | * just free it. | ||
1261 | */ | ||
1262 | security_free_mnt_opts(sec_opts); | ||
1263 | } | ||
1264 | #endif | ||
1265 | return ret; | ||
1266 | } | ||
1267 | |||
1218 | /* | 1268 | /* |
1219 | * Find a superblock for the given device / mount point. | 1269 | * Find a superblock for the given device / mount point. |
1220 | * | 1270 | * |
@@ -1229,6 +1279,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
1229 | struct dentry *root; | 1279 | struct dentry *root; |
1230 | struct btrfs_fs_devices *fs_devices = NULL; | 1280 | struct btrfs_fs_devices *fs_devices = NULL; |
1231 | struct btrfs_fs_info *fs_info = NULL; | 1281 | struct btrfs_fs_info *fs_info = NULL; |
1282 | struct security_mnt_opts new_sec_opts; | ||
1232 | fmode_t mode = FMODE_READ; | 1283 | fmode_t mode = FMODE_READ; |
1233 | char *subvol_name = NULL; | 1284 | char *subvol_name = NULL; |
1234 | u64 subvol_objectid = 0; | 1285 | u64 subvol_objectid = 0; |
@@ -1251,9 +1302,16 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
1251 | return root; | 1302 | return root; |
1252 | } | 1303 | } |
1253 | 1304 | ||
1305 | security_init_mnt_opts(&new_sec_opts); | ||
1306 | if (data) { | ||
1307 | error = parse_security_options(data, &new_sec_opts); | ||
1308 | if (error) | ||
1309 | return ERR_PTR(error); | ||
1310 | } | ||
1311 | |||
1254 | error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); | 1312 | error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); |
1255 | if (error) | 1313 | if (error) |
1256 | return ERR_PTR(error); | 1314 | goto error_sec_opts; |
1257 | 1315 | ||
1258 | /* | 1316 | /* |
1259 | * Setup a dummy root and fs_info for test/set super. This is because | 1317 | * Setup a dummy root and fs_info for test/set super. This is because |
@@ -1262,13 +1320,16 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
1262 | * then open_ctree will properly initialize everything later. | 1320 | * then open_ctree will properly initialize everything later. |
1263 | */ | 1321 | */ |
1264 | fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); | 1322 | fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); |
1265 | if (!fs_info) | 1323 | if (!fs_info) { |
1266 | return ERR_PTR(-ENOMEM); | 1324 | error = -ENOMEM; |
1325 | goto error_sec_opts; | ||
1326 | } | ||
1267 | 1327 | ||
1268 | fs_info->fs_devices = fs_devices; | 1328 | fs_info->fs_devices = fs_devices; |
1269 | 1329 | ||
1270 | fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); | 1330 | fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); |
1271 | fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); | 1331 | fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); |
1332 | security_init_mnt_opts(&fs_info->security_opts); | ||
1272 | if (!fs_info->super_copy || !fs_info->super_for_commit) { | 1333 | if (!fs_info->super_copy || !fs_info->super_for_commit) { |
1273 | error = -ENOMEM; | 1334 | error = -ENOMEM; |
1274 | goto error_fs_info; | 1335 | goto error_fs_info; |
@@ -1306,8 +1367,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
1306 | } | 1367 | } |
1307 | 1368 | ||
1308 | root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error); | 1369 | root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error); |
1309 | if (IS_ERR(root)) | 1370 | if (IS_ERR(root)) { |
1371 | deactivate_locked_super(s); | ||
1372 | error = PTR_ERR(root); | ||
1373 | goto error_sec_opts; | ||
1374 | } | ||
1375 | |||
1376 | fs_info = btrfs_sb(s); | ||
1377 | error = setup_security_options(fs_info, s, &new_sec_opts); | ||
1378 | if (error) { | ||
1379 | dput(root); | ||
1310 | deactivate_locked_super(s); | 1380 | deactivate_locked_super(s); |
1381 | goto error_sec_opts; | ||
1382 | } | ||
1311 | 1383 | ||
1312 | return root; | 1384 | return root; |
1313 | 1385 | ||
@@ -1315,6 +1387,8 @@ error_close_devices: | |||
1315 | btrfs_close_devices(fs_devices); | 1387 | btrfs_close_devices(fs_devices); |
1316 | error_fs_info: | 1388 | error_fs_info: |
1317 | free_fs_info(fs_info); | 1389 | free_fs_info(fs_info); |
1390 | error_sec_opts: | ||
1391 | security_free_mnt_opts(&new_sec_opts); | ||
1318 | return ERR_PTR(error); | 1392 | return ERR_PTR(error); |
1319 | } | 1393 | } |
1320 | 1394 | ||
@@ -1396,6 +1470,21 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
1396 | sync_filesystem(sb); | 1470 | sync_filesystem(sb); |
1397 | btrfs_remount_prepare(fs_info); | 1471 | btrfs_remount_prepare(fs_info); |
1398 | 1472 | ||
1473 | if (data) { | ||
1474 | struct security_mnt_opts new_sec_opts; | ||
1475 | |||
1476 | security_init_mnt_opts(&new_sec_opts); | ||
1477 | ret = parse_security_options(data, &new_sec_opts); | ||
1478 | if (ret) | ||
1479 | goto restore; | ||
1480 | ret = setup_security_options(fs_info, sb, | ||
1481 | &new_sec_opts); | ||
1482 | if (ret) { | ||
1483 | security_free_mnt_opts(&new_sec_opts); | ||
1484 | goto restore; | ||
1485 | } | ||
1486 | } | ||
1487 | |||
1399 | ret = btrfs_parse_options(root, data); | 1488 | ret = btrfs_parse_options(root, data); |
1400 | if (ret) { | 1489 | if (ret) { |
1401 | ret = -EINVAL; | 1490 | ret = -EINVAL; |
@@ -1694,7 +1783,11 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1694 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | 1783 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
1695 | int ret; | 1784 | int ret; |
1696 | 1785 | ||
1697 | /* holding chunk_muext to avoid allocating new chunks */ | 1786 | /* |
1787 | * holding chunk_muext to avoid allocating new chunks, holding | ||
1788 | * device_list_mutex to avoid the device being removed | ||
1789 | */ | ||
1790 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | ||
1698 | mutex_lock(&fs_info->chunk_mutex); | 1791 | mutex_lock(&fs_info->chunk_mutex); |
1699 | rcu_read_lock(); | 1792 | rcu_read_lock(); |
1700 | list_for_each_entry_rcu(found, head, list) { | 1793 | list_for_each_entry_rcu(found, head, list) { |
@@ -1735,11 +1828,13 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1735 | ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); | 1828 | ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); |
1736 | if (ret) { | 1829 | if (ret) { |
1737 | mutex_unlock(&fs_info->chunk_mutex); | 1830 | mutex_unlock(&fs_info->chunk_mutex); |
1831 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
1738 | return ret; | 1832 | return ret; |
1739 | } | 1833 | } |
1740 | buf->f_bavail += div_u64(total_free_data, factor); | 1834 | buf->f_bavail += div_u64(total_free_data, factor); |
1741 | buf->f_bavail = buf->f_bavail >> bits; | 1835 | buf->f_bavail = buf->f_bavail >> bits; |
1742 | mutex_unlock(&fs_info->chunk_mutex); | 1836 | mutex_unlock(&fs_info->chunk_mutex); |
1837 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
1743 | 1838 | ||
1744 | buf->f_type = BTRFS_SUPER_MAGIC; | 1839 | buf->f_type = BTRFS_SUPER_MAGIC; |
1745 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1840 | buf->f_bsize = dentry->d_sb->s_blocksize; |
@@ -1769,7 +1864,7 @@ static struct file_system_type btrfs_fs_type = { | |||
1769 | .name = "btrfs", | 1864 | .name = "btrfs", |
1770 | .mount = btrfs_mount, | 1865 | .mount = btrfs_mount, |
1771 | .kill_sb = btrfs_kill_super, | 1866 | .kill_sb = btrfs_kill_super, |
1772 | .fs_flags = FS_REQUIRES_DEV, | 1867 | .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, |
1773 | }; | 1868 | }; |
1774 | MODULE_ALIAS_FS("btrfs"); | 1869 | MODULE_ALIAS_FS("btrfs"); |
1775 | 1870 | ||
@@ -1993,11 +2088,15 @@ static int __init init_btrfs_fs(void) | |||
1993 | 2088 | ||
1994 | err = btrfs_prelim_ref_init(); | 2089 | err = btrfs_prelim_ref_init(); |
1995 | if (err) | 2090 | if (err) |
2091 | goto free_delayed_ref; | ||
2092 | |||
2093 | err = btrfs_end_io_wq_init(); | ||
2094 | if (err) | ||
1996 | goto free_prelim_ref; | 2095 | goto free_prelim_ref; |
1997 | 2096 | ||
1998 | err = btrfs_interface_init(); | 2097 | err = btrfs_interface_init(); |
1999 | if (err) | 2098 | if (err) |
2000 | goto free_delayed_ref; | 2099 | goto free_end_io_wq; |
2001 | 2100 | ||
2002 | btrfs_init_lockdep(); | 2101 | btrfs_init_lockdep(); |
2003 | 2102 | ||
@@ -2015,6 +2114,8 @@ static int __init init_btrfs_fs(void) | |||
2015 | 2114 | ||
2016 | unregister_ioctl: | 2115 | unregister_ioctl: |
2017 | btrfs_interface_exit(); | 2116 | btrfs_interface_exit(); |
2117 | free_end_io_wq: | ||
2118 | btrfs_end_io_wq_exit(); | ||
2018 | free_prelim_ref: | 2119 | free_prelim_ref: |
2019 | btrfs_prelim_ref_exit(); | 2120 | btrfs_prelim_ref_exit(); |
2020 | free_delayed_ref: | 2121 | free_delayed_ref: |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 78699364f537..b2e7bb4393f6 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -242,7 +242,7 @@ static ssize_t global_rsv_size_show(struct kobject *kobj, | |||
242 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | 242 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
243 | return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf); | 243 | return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf); |
244 | } | 244 | } |
245 | BTRFS_ATTR(global_rsv_size, 0444, global_rsv_size_show); | 245 | BTRFS_ATTR(global_rsv_size, global_rsv_size_show); |
246 | 246 | ||
247 | static ssize_t global_rsv_reserved_show(struct kobject *kobj, | 247 | static ssize_t global_rsv_reserved_show(struct kobject *kobj, |
248 | struct kobj_attribute *a, char *buf) | 248 | struct kobj_attribute *a, char *buf) |
@@ -251,7 +251,7 @@ static ssize_t global_rsv_reserved_show(struct kobject *kobj, | |||
251 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | 251 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
252 | return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf); | 252 | return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf); |
253 | } | 253 | } |
254 | BTRFS_ATTR(global_rsv_reserved, 0444, global_rsv_reserved_show); | 254 | BTRFS_ATTR(global_rsv_reserved, global_rsv_reserved_show); |
255 | 255 | ||
256 | #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj) | 256 | #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj) |
257 | #define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj) | 257 | #define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj) |
@@ -306,7 +306,7 @@ static ssize_t btrfs_space_info_show_##field(struct kobject *kobj, \ | |||
306 | struct btrfs_space_info *sinfo = to_space_info(kobj); \ | 306 | struct btrfs_space_info *sinfo = to_space_info(kobj); \ |
307 | return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \ | 307 | return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \ |
308 | } \ | 308 | } \ |
309 | BTRFS_ATTR(field, 0444, btrfs_space_info_show_##field) | 309 | BTRFS_ATTR(field, btrfs_space_info_show_##field) |
310 | 310 | ||
311 | static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj, | 311 | static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj, |
312 | struct kobj_attribute *a, | 312 | struct kobj_attribute *a, |
@@ -325,7 +325,7 @@ SPACE_INFO_ATTR(bytes_reserved); | |||
325 | SPACE_INFO_ATTR(bytes_may_use); | 325 | SPACE_INFO_ATTR(bytes_may_use); |
326 | SPACE_INFO_ATTR(disk_used); | 326 | SPACE_INFO_ATTR(disk_used); |
327 | SPACE_INFO_ATTR(disk_total); | 327 | SPACE_INFO_ATTR(disk_total); |
328 | BTRFS_ATTR(total_bytes_pinned, 0444, btrfs_space_info_show_total_bytes_pinned); | 328 | BTRFS_ATTR(total_bytes_pinned, btrfs_space_info_show_total_bytes_pinned); |
329 | 329 | ||
330 | static struct attribute *space_info_attrs[] = { | 330 | static struct attribute *space_info_attrs[] = { |
331 | BTRFS_ATTR_PTR(flags), | 331 | BTRFS_ATTR_PTR(flags), |
@@ -363,7 +363,8 @@ static ssize_t btrfs_label_show(struct kobject *kobj, | |||
363 | struct kobj_attribute *a, char *buf) | 363 | struct kobj_attribute *a, char *buf) |
364 | { | 364 | { |
365 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | 365 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); |
366 | return snprintf(buf, PAGE_SIZE, "%s\n", fs_info->super_copy->label); | 366 | char *label = fs_info->super_copy->label; |
367 | return snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label); | ||
367 | } | 368 | } |
368 | 369 | ||
369 | static ssize_t btrfs_label_store(struct kobject *kobj, | 370 | static ssize_t btrfs_label_store(struct kobject *kobj, |
@@ -374,8 +375,18 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
374 | struct btrfs_trans_handle *trans; | 375 | struct btrfs_trans_handle *trans; |
375 | struct btrfs_root *root = fs_info->fs_root; | 376 | struct btrfs_root *root = fs_info->fs_root; |
376 | int ret; | 377 | int ret; |
378 | size_t p_len; | ||
377 | 379 | ||
378 | if (len >= BTRFS_LABEL_SIZE) | 380 | if (fs_info->sb->s_flags & MS_RDONLY) |
381 | return -EROFS; | ||
382 | |||
383 | /* | ||
384 | * p_len is the len until the first occurrence of either | ||
385 | * '\n' or '\0' | ||
386 | */ | ||
387 | p_len = strcspn(buf, "\n"); | ||
388 | |||
389 | if (p_len >= BTRFS_LABEL_SIZE) | ||
379 | return -EINVAL; | 390 | return -EINVAL; |
380 | 391 | ||
381 | trans = btrfs_start_transaction(root, 0); | 392 | trans = btrfs_start_transaction(root, 0); |
@@ -383,7 +394,8 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
383 | return PTR_ERR(trans); | 394 | return PTR_ERR(trans); |
384 | 395 | ||
385 | spin_lock(&root->fs_info->super_lock); | 396 | spin_lock(&root->fs_info->super_lock); |
386 | strcpy(fs_info->super_copy->label, buf); | 397 | memset(fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE); |
398 | memcpy(fs_info->super_copy->label, buf, p_len); | ||
387 | spin_unlock(&root->fs_info->super_lock); | 399 | spin_unlock(&root->fs_info->super_lock); |
388 | ret = btrfs_commit_transaction(trans, root); | 400 | ret = btrfs_commit_transaction(trans, root); |
389 | 401 | ||
@@ -392,14 +404,7 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
392 | 404 | ||
393 | return ret; | 405 | return ret; |
394 | } | 406 | } |
395 | BTRFS_ATTR_RW(label, 0644, btrfs_label_show, btrfs_label_store); | 407 | BTRFS_ATTR_RW(label, btrfs_label_show, btrfs_label_store); |
396 | |||
397 | static ssize_t btrfs_no_store(struct kobject *kobj, | ||
398 | struct kobj_attribute *a, | ||
399 | const char *buf, size_t len) | ||
400 | { | ||
401 | return -EPERM; | ||
402 | } | ||
403 | 408 | ||
404 | static ssize_t btrfs_nodesize_show(struct kobject *kobj, | 409 | static ssize_t btrfs_nodesize_show(struct kobject *kobj, |
405 | struct kobj_attribute *a, char *buf) | 410 | struct kobj_attribute *a, char *buf) |
@@ -409,7 +414,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj, | |||
409 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); | 414 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); |
410 | } | 415 | } |
411 | 416 | ||
412 | BTRFS_ATTR_RW(nodesize, 0444, btrfs_nodesize_show, btrfs_no_store); | 417 | BTRFS_ATTR(nodesize, btrfs_nodesize_show); |
413 | 418 | ||
414 | static ssize_t btrfs_sectorsize_show(struct kobject *kobj, | 419 | static ssize_t btrfs_sectorsize_show(struct kobject *kobj, |
415 | struct kobj_attribute *a, char *buf) | 420 | struct kobj_attribute *a, char *buf) |
@@ -419,7 +424,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj, | |||
419 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); | 424 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); |
420 | } | 425 | } |
421 | 426 | ||
422 | BTRFS_ATTR_RW(sectorsize, 0444, btrfs_sectorsize_show, btrfs_no_store); | 427 | BTRFS_ATTR(sectorsize, btrfs_sectorsize_show); |
423 | 428 | ||
424 | static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, | 429 | static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, |
425 | struct kobj_attribute *a, char *buf) | 430 | struct kobj_attribute *a, char *buf) |
@@ -429,7 +434,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, | |||
429 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); | 434 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); |
430 | } | 435 | } |
431 | 436 | ||
432 | BTRFS_ATTR_RW(clone_alignment, 0444, btrfs_clone_alignment_show, btrfs_no_store); | 437 | BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show); |
433 | 438 | ||
434 | static struct attribute *btrfs_attrs[] = { | 439 | static struct attribute *btrfs_attrs[] = { |
435 | BTRFS_ATTR_PTR(label), | 440 | BTRFS_ATTR_PTR(label), |
@@ -614,7 +619,7 @@ int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info, | |||
614 | if (!fs_info->device_dir_kobj) | 619 | if (!fs_info->device_dir_kobj) |
615 | return -EINVAL; | 620 | return -EINVAL; |
616 | 621 | ||
617 | if (one_device) { | 622 | if (one_device && one_device->bdev) { |
618 | disk = one_device->bdev->bd_part; | 623 | disk = one_device->bdev->bd_part; |
619 | disk_kobj = &part_to_dev(disk)->kobj; | 624 | disk_kobj = &part_to_dev(disk)->kobj; |
620 | 625 | ||
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index ac46df37504c..f7dd298b3cf6 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h | |||
@@ -20,16 +20,20 @@ enum btrfs_feature_set { | |||
20 | .store = _store, \ | 20 | .store = _store, \ |
21 | } | 21 | } |
22 | 22 | ||
23 | #define BTRFS_ATTR_RW(_name, _mode, _show, _store) \ | 23 | #define BTRFS_ATTR_RW(_name, _show, _store) \ |
24 | static struct kobj_attribute btrfs_attr_##_name = \ | 24 | static struct kobj_attribute btrfs_attr_##_name = \ |
25 | __INIT_KOBJ_ATTR(_name, _mode, _show, _store) | 25 | __INIT_KOBJ_ATTR(_name, 0644, _show, _store) |
26 | #define BTRFS_ATTR(_name, _mode, _show) \ | 26 | |
27 | BTRFS_ATTR_RW(_name, _mode, _show, NULL) | 27 | #define BTRFS_ATTR(_name, _show) \ |
28 | static struct kobj_attribute btrfs_attr_##_name = \ | ||
29 | __INIT_KOBJ_ATTR(_name, 0444, _show, NULL) | ||
30 | |||
28 | #define BTRFS_ATTR_PTR(_name) (&btrfs_attr_##_name.attr) | 31 | #define BTRFS_ATTR_PTR(_name) (&btrfs_attr_##_name.attr) |
29 | 32 | ||
30 | #define BTRFS_RAID_ATTR(_name, _show) \ | 33 | #define BTRFS_RAID_ATTR(_name, _show) \ |
31 | static struct kobj_attribute btrfs_raid_attr_##_name = \ | 34 | static struct kobj_attribute btrfs_raid_attr_##_name = \ |
32 | __INIT_KOBJ_ATTR(_name, 0444, _show, NULL) | 35 | __INIT_KOBJ_ATTR(_name, 0444, _show, NULL) |
36 | |||
33 | #define BTRFS_RAID_ATTR_PTR(_name) (&btrfs_raid_attr_##_name.attr) | 37 | #define BTRFS_RAID_ATTR_PTR(_name) (&btrfs_raid_attr_##_name.attr) |
34 | 38 | ||
35 | 39 | ||
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index c8d9ddf84c69..2299bfde39ee 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c | |||
@@ -40,11 +40,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void) | |||
40 | cache->key.offset = 1024 * 1024 * 1024; | 40 | cache->key.offset = 1024 * 1024 * 1024; |
41 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 41 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
42 | cache->sectorsize = 4096; | 42 | cache->sectorsize = 4096; |
43 | cache->full_stripe_len = 4096; | ||
43 | 44 | ||
44 | spin_lock_init(&cache->lock); | 45 | spin_lock_init(&cache->lock); |
45 | INIT_LIST_HEAD(&cache->list); | 46 | INIT_LIST_HEAD(&cache->list); |
46 | INIT_LIST_HEAD(&cache->cluster_list); | 47 | INIT_LIST_HEAD(&cache->cluster_list); |
47 | INIT_LIST_HEAD(&cache->new_bg_list); | 48 | INIT_LIST_HEAD(&cache->bg_list); |
48 | 49 | ||
49 | btrfs_init_free_space_ctl(cache); | 50 | btrfs_init_free_space_ctl(cache); |
50 | 51 | ||
@@ -364,6 +365,517 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
364 | return 0; | 365 | return 0; |
365 | } | 366 | } |
366 | 367 | ||
368 | /* Used by test_steal_space_from_bitmap_to_extent(). */ | ||
369 | static bool test_use_bitmap(struct btrfs_free_space_ctl *ctl, | ||
370 | struct btrfs_free_space *info) | ||
371 | { | ||
372 | return ctl->free_extents > 0; | ||
373 | } | ||
374 | |||
375 | /* Used by test_steal_space_from_bitmap_to_extent(). */ | ||
376 | static int | ||
377 | check_num_extents_and_bitmaps(const struct btrfs_block_group_cache *cache, | ||
378 | const int num_extents, | ||
379 | const int num_bitmaps) | ||
380 | { | ||
381 | if (cache->free_space_ctl->free_extents != num_extents) { | ||
382 | test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n", | ||
383 | cache->free_space_ctl->free_extents, num_extents); | ||
384 | return -EINVAL; | ||
385 | } | ||
386 | if (cache->free_space_ctl->total_bitmaps != num_bitmaps) { | ||
387 | test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n", | ||
388 | cache->free_space_ctl->total_bitmaps, num_bitmaps); | ||
389 | return -EINVAL; | ||
390 | } | ||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | /* Used by test_steal_space_from_bitmap_to_extent(). */ | ||
395 | static int check_cache_empty(struct btrfs_block_group_cache *cache) | ||
396 | { | ||
397 | u64 offset; | ||
398 | u64 max_extent_size; | ||
399 | |||
400 | /* | ||
401 | * Now lets confirm that there's absolutely no free space left to | ||
402 | * allocate. | ||
403 | */ | ||
404 | if (cache->free_space_ctl->free_space != 0) { | ||
405 | test_msg("Cache free space is not 0\n"); | ||
406 | return -EINVAL; | ||
407 | } | ||
408 | |||
409 | /* And any allocation request, no matter how small, should fail now. */ | ||
410 | offset = btrfs_find_space_for_alloc(cache, 0, 4096, 0, | ||
411 | &max_extent_size); | ||
412 | if (offset != 0) { | ||
413 | test_msg("Space allocation did not fail, returned offset: %llu", | ||
414 | offset); | ||
415 | return -EINVAL; | ||
416 | } | ||
417 | |||
418 | /* And no extent nor bitmap entries in the cache anymore. */ | ||
419 | return check_num_extents_and_bitmaps(cache, 0, 0); | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * Before we were able to steal free space from a bitmap entry to an extent | ||
424 | * entry, we could end up with 2 entries representing a contiguous free space. | ||
425 | * One would be an extent entry and the other a bitmap entry. Since in order | ||
426 | * to allocate space to a caller we use only 1 entry, we couldn't return that | ||
427 | * whole range to the caller if it was requested. This forced the caller to | ||
428 | * either assume ENOSPC or perform several smaller space allocations, which | ||
429 | * wasn't optimal as they could be spread all over the block group while under | ||
430 | * concurrency (extra overhead and fragmentation). | ||
431 | * | ||
432 | * This stealing approach is benefical, since we always prefer to allocate from | ||
433 | * extent entries, both for clustered and non-clustered allocation requests. | ||
434 | */ | ||
435 | static int | ||
436 | test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) | ||
437 | { | ||
438 | int ret; | ||
439 | u64 offset; | ||
440 | u64 max_extent_size; | ||
441 | |||
442 | bool (*use_bitmap_op)(struct btrfs_free_space_ctl *, | ||
443 | struct btrfs_free_space *); | ||
444 | |||
445 | test_msg("Running space stealing from bitmap to extent\n"); | ||
446 | |||
447 | /* | ||
448 | * For this test, we want to ensure we end up with an extent entry | ||
449 | * immediately adjacent to a bitmap entry, where the bitmap starts | ||
450 | * at an offset where the extent entry ends. We keep adding and | ||
451 | * removing free space to reach into this state, but to get there | ||
452 | * we need to reach a point where marking new free space doesn't | ||
453 | * result in adding new extent entries or merging the new space | ||
454 | * with existing extent entries - the space ends up being marked | ||
455 | * in an existing bitmap that covers the new free space range. | ||
456 | * | ||
457 | * To get there, we need to reach the threshold defined set at | ||
458 | * cache->free_space_ctl->extents_thresh, which currently is | ||
459 | * 256 extents on a x86_64 system at least, and a few other | ||
460 | * conditions (check free_space_cache.c). Instead of making the | ||
461 | * test much longer and complicated, use a "use_bitmap" operation | ||
462 | * that forces use of bitmaps as soon as we have at least 1 | ||
463 | * extent entry. | ||
464 | */ | ||
465 | use_bitmap_op = cache->free_space_ctl->op->use_bitmap; | ||
466 | cache->free_space_ctl->op->use_bitmap = test_use_bitmap; | ||
467 | |||
468 | /* | ||
469 | * Extent entry covering free space range [128Mb - 256Kb, 128Mb - 128Kb[ | ||
470 | */ | ||
471 | ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 - 256 * 1024, | ||
472 | 128 * 1024, 0); | ||
473 | if (ret) { | ||
474 | test_msg("Couldn't add extent entry %d\n", ret); | ||
475 | return ret; | ||
476 | } | ||
477 | |||
478 | /* Bitmap entry covering free space range [128Mb + 512Kb, 256Mb[ */ | ||
479 | ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 + 512 * 1024, | ||
480 | 128 * 1024 * 1024 - 512 * 1024, 1); | ||
481 | if (ret) { | ||
482 | test_msg("Couldn't add bitmap entry %d\n", ret); | ||
483 | return ret; | ||
484 | } | ||
485 | |||
486 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
487 | if (ret) | ||
488 | return ret; | ||
489 | |||
490 | /* | ||
491 | * Now make only the first 256Kb of the bitmap marked as free, so that | ||
492 | * we end up with only the following ranges marked as free space: | ||
493 | * | ||
494 | * [128Mb - 256Kb, 128Mb - 128Kb[ | ||
495 | * [128Mb + 512Kb, 128Mb + 768Kb[ | ||
496 | */ | ||
497 | ret = btrfs_remove_free_space(cache, | ||
498 | 128 * 1024 * 1024 + 768 * 1024, | ||
499 | 128 * 1024 * 1024 - 768 * 1024); | ||
500 | if (ret) { | ||
501 | test_msg("Failed to free part of bitmap space %d\n", ret); | ||
502 | return ret; | ||
503 | } | ||
504 | |||
505 | /* Confirm that only those 2 ranges are marked as free. */ | ||
506 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 256 * 1024, | ||
507 | 128 * 1024)) { | ||
508 | test_msg("Free space range missing\n"); | ||
509 | return -ENOENT; | ||
510 | } | ||
511 | if (!test_check_exists(cache, 128 * 1024 * 1024 + 512 * 1024, | ||
512 | 256 * 1024)) { | ||
513 | test_msg("Free space range missing\n"); | ||
514 | return -ENOENT; | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * Confirm that the bitmap range [128Mb + 768Kb, 256Mb[ isn't marked | ||
519 | * as free anymore. | ||
520 | */ | ||
521 | if (test_check_exists(cache, 128 * 1024 * 1024 + 768 * 1024, | ||
522 | 128 * 1024 * 1024 - 768 * 1024)) { | ||
523 | test_msg("Bitmap region not removed from space cache\n"); | ||
524 | return -EINVAL; | ||
525 | } | ||
526 | |||
527 | /* | ||
528 | * Confirm that the region [128Mb + 256Kb, 128Mb + 512Kb[, which is | ||
529 | * covered by the bitmap, isn't marked as free. | ||
530 | */ | ||
531 | if (test_check_exists(cache, 128 * 1024 * 1024 + 256 * 1024, | ||
532 | 256 * 1024)) { | ||
533 | test_msg("Invalid bitmap region marked as free\n"); | ||
534 | return -EINVAL; | ||
535 | } | ||
536 | |||
537 | /* | ||
538 | * Confirm that the region [128Mb, 128Mb + 256Kb[, which is covered | ||
539 | * by the bitmap too, isn't marked as free either. | ||
540 | */ | ||
541 | if (test_check_exists(cache, 128 * 1024 * 1024, | ||
542 | 256 * 1024)) { | ||
543 | test_msg("Invalid bitmap region marked as free\n"); | ||
544 | return -EINVAL; | ||
545 | } | ||
546 | |||
547 | /* | ||
548 | * Now lets mark the region [128Mb, 128Mb + 512Kb[ as free too. But, | ||
549 | * lets make sure the free space cache marks it as free in the bitmap, | ||
550 | * and doesn't insert a new extent entry to represent this region. | ||
551 | */ | ||
552 | ret = btrfs_add_free_space(cache, 128 * 1024 * 1024, 512 * 1024); | ||
553 | if (ret) { | ||
554 | test_msg("Error adding free space: %d\n", ret); | ||
555 | return ret; | ||
556 | } | ||
557 | /* Confirm the region is marked as free. */ | ||
558 | if (!test_check_exists(cache, 128 * 1024 * 1024, 512 * 1024)) { | ||
559 | test_msg("Bitmap region not marked as free\n"); | ||
560 | return -ENOENT; | ||
561 | } | ||
562 | |||
563 | /* | ||
564 | * Confirm that no new extent entries or bitmap entries were added to | ||
565 | * the cache after adding that free space region. | ||
566 | */ | ||
567 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
568 | if (ret) | ||
569 | return ret; | ||
570 | |||
571 | /* | ||
572 | * Now lets add a small free space region to the right of the previous | ||
573 | * one, which is not contiguous with it and is part of the bitmap too. | ||
574 | * The goal is to test that the bitmap entry space stealing doesn't | ||
575 | * steal this space region. | ||
576 | */ | ||
577 | ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 + 16 * 1024 * 1024, | ||
578 | 4096); | ||
579 | if (ret) { | ||
580 | test_msg("Error adding free space: %d\n", ret); | ||
581 | return ret; | ||
582 | } | ||
583 | |||
584 | /* | ||
585 | * Confirm that no new extent entries or bitmap entries were added to | ||
586 | * the cache after adding that free space region. | ||
587 | */ | ||
588 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
589 | if (ret) | ||
590 | return ret; | ||
591 | |||
592 | /* | ||
593 | * Now mark the region [128Mb - 128Kb, 128Mb[ as free too. This will | ||
594 | * expand the range covered by the existing extent entry that represents | ||
595 | * the free space [128Mb - 256Kb, 128Mb - 128Kb[. | ||
596 | */ | ||
597 | ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 - 128 * 1024, | ||
598 | 128 * 1024); | ||
599 | if (ret) { | ||
600 | test_msg("Error adding free space: %d\n", ret); | ||
601 | return ret; | ||
602 | } | ||
603 | /* Confirm the region is marked as free. */ | ||
604 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 128 * 1024, | ||
605 | 128 * 1024)) { | ||
606 | test_msg("Extent region not marked as free\n"); | ||
607 | return -ENOENT; | ||
608 | } | ||
609 | |||
610 | /* | ||
611 | * Confirm that our extent entry didn't stole all free space from the | ||
612 | * bitmap, because of the small 4Kb free space region. | ||
613 | */ | ||
614 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
615 | if (ret) | ||
616 | return ret; | ||
617 | |||
618 | /* | ||
619 | * So now we have the range [128Mb - 256Kb, 128Mb + 768Kb[ as free | ||
620 | * space. Without stealing bitmap free space into extent entry space, | ||
621 | * we would have all this free space represented by 2 entries in the | ||
622 | * cache: | ||
623 | * | ||
624 | * extent entry covering range: [128Mb - 256Kb, 128Mb[ | ||
625 | * bitmap entry covering range: [128Mb, 128Mb + 768Kb[ | ||
626 | * | ||
627 | * Attempting to allocate the whole free space (1Mb) would fail, because | ||
628 | * we can't allocate from multiple entries. | ||
629 | * With the bitmap free space stealing, we get a single extent entry | ||
630 | * that represents the 1Mb free space, and therefore we're able to | ||
631 | * allocate the whole free space at once. | ||
632 | */ | ||
633 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 256 * 1024, | ||
634 | 1 * 1024 * 1024)) { | ||
635 | test_msg("Expected region not marked as free\n"); | ||
636 | return -ENOENT; | ||
637 | } | ||
638 | |||
639 | if (cache->free_space_ctl->free_space != (1 * 1024 * 1024 + 4096)) { | ||
640 | test_msg("Cache free space is not 1Mb + 4Kb\n"); | ||
641 | return -EINVAL; | ||
642 | } | ||
643 | |||
644 | offset = btrfs_find_space_for_alloc(cache, | ||
645 | 0, 1 * 1024 * 1024, 0, | ||
646 | &max_extent_size); | ||
647 | if (offset != (128 * 1024 * 1024 - 256 * 1024)) { | ||
648 | test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n", | ||
649 | offset); | ||
650 | return -EINVAL; | ||
651 | } | ||
652 | |||
653 | /* All that remains is a 4Kb free space region in a bitmap. Confirm. */ | ||
654 | ret = check_num_extents_and_bitmaps(cache, 1, 1); | ||
655 | if (ret) | ||
656 | return ret; | ||
657 | |||
658 | if (cache->free_space_ctl->free_space != 4096) { | ||
659 | test_msg("Cache free space is not 4Kb\n"); | ||
660 | return -EINVAL; | ||
661 | } | ||
662 | |||
663 | offset = btrfs_find_space_for_alloc(cache, | ||
664 | 0, 4096, 0, | ||
665 | &max_extent_size); | ||
666 | if (offset != (128 * 1024 * 1024 + 16 * 1024 * 1024)) { | ||
667 | test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n", | ||
668 | offset); | ||
669 | return -EINVAL; | ||
670 | } | ||
671 | |||
672 | ret = check_cache_empty(cache); | ||
673 | if (ret) | ||
674 | return ret; | ||
675 | |||
676 | __btrfs_remove_free_space_cache(cache->free_space_ctl); | ||
677 | |||
678 | /* | ||
679 | * Now test a similar scenario, but where our extent entry is located | ||
680 | * to the right of the bitmap entry, so that we can check that stealing | ||
681 | * space from a bitmap to the front of an extent entry works. | ||
682 | */ | ||
683 | |||
684 | /* | ||
685 | * Extent entry covering free space range [128Mb + 128Kb, 128Mb + 256Kb[ | ||
686 | */ | ||
687 | ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 + 128 * 1024, | ||
688 | 128 * 1024, 0); | ||
689 | if (ret) { | ||
690 | test_msg("Couldn't add extent entry %d\n", ret); | ||
691 | return ret; | ||
692 | } | ||
693 | |||
694 | /* Bitmap entry covering free space range [0, 128Mb - 512Kb[ */ | ||
695 | ret = test_add_free_space_entry(cache, 0, | ||
696 | 128 * 1024 * 1024 - 512 * 1024, 1); | ||
697 | if (ret) { | ||
698 | test_msg("Couldn't add bitmap entry %d\n", ret); | ||
699 | return ret; | ||
700 | } | ||
701 | |||
702 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
703 | if (ret) | ||
704 | return ret; | ||
705 | |||
706 | /* | ||
707 | * Now make only the last 256Kb of the bitmap marked as free, so that | ||
708 | * we end up with only the following ranges marked as free space: | ||
709 | * | ||
710 | * [128Mb + 128b, 128Mb + 256Kb[ | ||
711 | * [128Mb - 768Kb, 128Mb - 512Kb[ | ||
712 | */ | ||
713 | ret = btrfs_remove_free_space(cache, | ||
714 | 0, | ||
715 | 128 * 1024 * 1024 - 768 * 1024); | ||
716 | if (ret) { | ||
717 | test_msg("Failed to free part of bitmap space %d\n", ret); | ||
718 | return ret; | ||
719 | } | ||
720 | |||
721 | /* Confirm that only those 2 ranges are marked as free. */ | ||
722 | if (!test_check_exists(cache, 128 * 1024 * 1024 + 128 * 1024, | ||
723 | 128 * 1024)) { | ||
724 | test_msg("Free space range missing\n"); | ||
725 | return -ENOENT; | ||
726 | } | ||
727 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 768 * 1024, | ||
728 | 256 * 1024)) { | ||
729 | test_msg("Free space range missing\n"); | ||
730 | return -ENOENT; | ||
731 | } | ||
732 | |||
733 | /* | ||
734 | * Confirm that the bitmap range [0, 128Mb - 768Kb[ isn't marked | ||
735 | * as free anymore. | ||
736 | */ | ||
737 | if (test_check_exists(cache, 0, | ||
738 | 128 * 1024 * 1024 - 768 * 1024)) { | ||
739 | test_msg("Bitmap region not removed from space cache\n"); | ||
740 | return -EINVAL; | ||
741 | } | ||
742 | |||
743 | /* | ||
744 | * Confirm that the region [128Mb - 512Kb, 128Mb[, which is | ||
745 | * covered by the bitmap, isn't marked as free. | ||
746 | */ | ||
747 | if (test_check_exists(cache, 128 * 1024 * 1024 - 512 * 1024, | ||
748 | 512 * 1024)) { | ||
749 | test_msg("Invalid bitmap region marked as free\n"); | ||
750 | return -EINVAL; | ||
751 | } | ||
752 | |||
753 | /* | ||
754 | * Now lets mark the region [128Mb - 512Kb, 128Mb[ as free too. But, | ||
755 | * lets make sure the free space cache marks it as free in the bitmap, | ||
756 | * and doesn't insert a new extent entry to represent this region. | ||
757 | */ | ||
758 | ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 - 512 * 1024, | ||
759 | 512 * 1024); | ||
760 | if (ret) { | ||
761 | test_msg("Error adding free space: %d\n", ret); | ||
762 | return ret; | ||
763 | } | ||
764 | /* Confirm the region is marked as free. */ | ||
765 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 512 * 1024, | ||
766 | 512 * 1024)) { | ||
767 | test_msg("Bitmap region not marked as free\n"); | ||
768 | return -ENOENT; | ||
769 | } | ||
770 | |||
771 | /* | ||
772 | * Confirm that no new extent entries or bitmap entries were added to | ||
773 | * the cache after adding that free space region. | ||
774 | */ | ||
775 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
776 | if (ret) | ||
777 | return ret; | ||
778 | |||
779 | /* | ||
780 | * Now lets add a small free space region to the left of the previous | ||
781 | * one, which is not contiguous with it and is part of the bitmap too. | ||
782 | * The goal is to test that the bitmap entry space stealing doesn't | ||
783 | * steal this space region. | ||
784 | */ | ||
785 | ret = btrfs_add_free_space(cache, 32 * 1024 * 1024, 8192); | ||
786 | if (ret) { | ||
787 | test_msg("Error adding free space: %d\n", ret); | ||
788 | return ret; | ||
789 | } | ||
790 | |||
791 | /* | ||
792 | * Now mark the region [128Mb, 128Mb + 128Kb[ as free too. This will | ||
793 | * expand the range covered by the existing extent entry that represents | ||
794 | * the free space [128Mb + 128Kb, 128Mb + 256Kb[. | ||
795 | */ | ||
796 | ret = btrfs_add_free_space(cache, 128 * 1024 * 1024, 128 * 1024); | ||
797 | if (ret) { | ||
798 | test_msg("Error adding free space: %d\n", ret); | ||
799 | return ret; | ||
800 | } | ||
801 | /* Confirm the region is marked as free. */ | ||
802 | if (!test_check_exists(cache, 128 * 1024 * 1024, 128 * 1024)) { | ||
803 | test_msg("Extent region not marked as free\n"); | ||
804 | return -ENOENT; | ||
805 | } | ||
806 | |||
807 | /* | ||
808 | * Confirm that our extent entry didn't stole all free space from the | ||
809 | * bitmap, because of the small 8Kb free space region. | ||
810 | */ | ||
811 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
812 | if (ret) | ||
813 | return ret; | ||
814 | |||
815 | /* | ||
816 | * So now we have the range [128Mb - 768Kb, 128Mb + 256Kb[ as free | ||
817 | * space. Without stealing bitmap free space into extent entry space, | ||
818 | * we would have all this free space represented by 2 entries in the | ||
819 | * cache: | ||
820 | * | ||
821 | * extent entry covering range: [128Mb, 128Mb + 256Kb[ | ||
822 | * bitmap entry covering range: [128Mb - 768Kb, 128Mb[ | ||
823 | * | ||
824 | * Attempting to allocate the whole free space (1Mb) would fail, because | ||
825 | * we can't allocate from multiple entries. | ||
826 | * With the bitmap free space stealing, we get a single extent entry | ||
827 | * that represents the 1Mb free space, and therefore we're able to | ||
828 | * allocate the whole free space at once. | ||
829 | */ | ||
830 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 768 * 1024, | ||
831 | 1 * 1024 * 1024)) { | ||
832 | test_msg("Expected region not marked as free\n"); | ||
833 | return -ENOENT; | ||
834 | } | ||
835 | |||
836 | if (cache->free_space_ctl->free_space != (1 * 1024 * 1024 + 8192)) { | ||
837 | test_msg("Cache free space is not 1Mb + 8Kb\n"); | ||
838 | return -EINVAL; | ||
839 | } | ||
840 | |||
841 | offset = btrfs_find_space_for_alloc(cache, | ||
842 | 0, 1 * 1024 * 1024, 0, | ||
843 | &max_extent_size); | ||
844 | if (offset != (128 * 1024 * 1024 - 768 * 1024)) { | ||
845 | test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n", | ||
846 | offset); | ||
847 | return -EINVAL; | ||
848 | } | ||
849 | |||
850 | /* All that remains is a 8Kb free space region in a bitmap. Confirm. */ | ||
851 | ret = check_num_extents_and_bitmaps(cache, 1, 1); | ||
852 | if (ret) | ||
853 | return ret; | ||
854 | |||
855 | if (cache->free_space_ctl->free_space != 8192) { | ||
856 | test_msg("Cache free space is not 8Kb\n"); | ||
857 | return -EINVAL; | ||
858 | } | ||
859 | |||
860 | offset = btrfs_find_space_for_alloc(cache, | ||
861 | 0, 8192, 0, | ||
862 | &max_extent_size); | ||
863 | if (offset != (32 * 1024 * 1024)) { | ||
864 | test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n", | ||
865 | offset); | ||
866 | return -EINVAL; | ||
867 | } | ||
868 | |||
869 | ret = check_cache_empty(cache); | ||
870 | if (ret) | ||
871 | return ret; | ||
872 | |||
873 | cache->free_space_ctl->op->use_bitmap = use_bitmap_op; | ||
874 | __btrfs_remove_free_space_cache(cache->free_space_ctl); | ||
875 | |||
876 | return 0; | ||
877 | } | ||
878 | |||
367 | int btrfs_test_free_space_cache(void) | 879 | int btrfs_test_free_space_cache(void) |
368 | { | 880 | { |
369 | struct btrfs_block_group_cache *cache; | 881 | struct btrfs_block_group_cache *cache; |
@@ -386,6 +898,8 @@ int btrfs_test_free_space_cache(void) | |||
386 | ret = test_bitmaps_and_extents(cache); | 898 | ret = test_bitmaps_and_extents(cache); |
387 | if (ret) | 899 | if (ret) |
388 | goto out; | 900 | goto out; |
901 | |||
902 | ret = test_steal_space_from_bitmap_to_extent(cache); | ||
389 | out: | 903 | out: |
390 | __btrfs_remove_free_space_cache(cache->free_space_ctl); | 904 | __btrfs_remove_free_space_cache(cache->free_space_ctl); |
391 | kfree(cache->free_space_ctl); | 905 | kfree(cache->free_space_ctl); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d89c6d3542ca..dcaae3616728 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -386,7 +386,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, | |||
386 | int ret; | 386 | int ret; |
387 | 387 | ||
388 | /* Send isn't supposed to start transactions. */ | 388 | /* Send isn't supposed to start transactions. */ |
389 | ASSERT(current->journal_info != (void *)BTRFS_SEND_TRANS_STUB); | 389 | ASSERT(current->journal_info != BTRFS_SEND_TRANS_STUB); |
390 | 390 | ||
391 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) | 391 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
392 | return ERR_PTR(-EROFS); | 392 | return ERR_PTR(-EROFS); |
@@ -408,7 +408,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, | |||
408 | if (num_items > 0 && root != root->fs_info->chunk_root) { | 408 | if (num_items > 0 && root != root->fs_info->chunk_root) { |
409 | if (root->fs_info->quota_enabled && | 409 | if (root->fs_info->quota_enabled && |
410 | is_fstree(root->root_key.objectid)) { | 410 | is_fstree(root->root_key.objectid)) { |
411 | qgroup_reserved = num_items * root->leafsize; | 411 | qgroup_reserved = num_items * root->nodesize; |
412 | ret = btrfs_qgroup_reserve(root, qgroup_reserved); | 412 | ret = btrfs_qgroup_reserve(root, qgroup_reserved); |
413 | if (ret) | 413 | if (ret) |
414 | return ERR_PTR(ret); | 414 | return ERR_PTR(ret); |
@@ -418,7 +418,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, | |||
418 | /* | 418 | /* |
419 | * Do the reservation for the relocation root creation | 419 | * Do the reservation for the relocation root creation |
420 | */ | 420 | */ |
421 | if (unlikely(need_reserve_reloc_root(root))) { | 421 | if (need_reserve_reloc_root(root)) { |
422 | num_bytes += root->nodesize; | 422 | num_bytes += root->nodesize; |
423 | reloc_reserved = true; | 423 | reloc_reserved = true; |
424 | } | 424 | } |
@@ -609,7 +609,6 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
609 | if (transid <= root->fs_info->last_trans_committed) | 609 | if (transid <= root->fs_info->last_trans_committed) |
610 | goto out; | 610 | goto out; |
611 | 611 | ||
612 | ret = -EINVAL; | ||
613 | /* find specified transaction */ | 612 | /* find specified transaction */ |
614 | spin_lock(&root->fs_info->trans_lock); | 613 | spin_lock(&root->fs_info->trans_lock); |
615 | list_for_each_entry(t, &root->fs_info->trans_list, list) { | 614 | list_for_each_entry(t, &root->fs_info->trans_list, list) { |
@@ -625,9 +624,16 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
625 | } | 624 | } |
626 | } | 625 | } |
627 | spin_unlock(&root->fs_info->trans_lock); | 626 | spin_unlock(&root->fs_info->trans_lock); |
628 | /* The specified transaction doesn't exist */ | 627 | |
629 | if (!cur_trans) | 628 | /* |
629 | * The specified transaction doesn't exist, or we | ||
630 | * raced with btrfs_commit_transaction | ||
631 | */ | ||
632 | if (!cur_trans) { | ||
633 | if (transid > root->fs_info->last_trans_committed) | ||
634 | ret = -EINVAL; | ||
630 | goto out; | 635 | goto out; |
636 | } | ||
631 | } else { | 637 | } else { |
632 | /* find newest transaction that is committing | committed */ | 638 | /* find newest transaction that is committing | committed */ |
633 | spin_lock(&root->fs_info->trans_lock); | 639 | spin_lock(&root->fs_info->trans_lock); |
@@ -851,6 +857,8 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
851 | struct extent_state *cached_state = NULL; | 857 | struct extent_state *cached_state = NULL; |
852 | u64 start = 0; | 858 | u64 start = 0; |
853 | u64 end; | 859 | u64 end; |
860 | struct btrfs_inode *btree_ino = BTRFS_I(root->fs_info->btree_inode); | ||
861 | bool errors = false; | ||
854 | 862 | ||
855 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 863 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
856 | EXTENT_NEED_WAIT, &cached_state)) { | 864 | EXTENT_NEED_WAIT, &cached_state)) { |
@@ -864,6 +872,26 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
864 | } | 872 | } |
865 | if (err) | 873 | if (err) |
866 | werr = err; | 874 | werr = err; |
875 | |||
876 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | ||
877 | if ((mark & EXTENT_DIRTY) && | ||
878 | test_and_clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, | ||
879 | &btree_ino->runtime_flags)) | ||
880 | errors = true; | ||
881 | |||
882 | if ((mark & EXTENT_NEW) && | ||
883 | test_and_clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, | ||
884 | &btree_ino->runtime_flags)) | ||
885 | errors = true; | ||
886 | } else { | ||
887 | if (test_and_clear_bit(BTRFS_INODE_BTREE_ERR, | ||
888 | &btree_ino->runtime_flags)) | ||
889 | errors = true; | ||
890 | } | ||
891 | |||
892 | if (errors && !werr) | ||
893 | werr = -EIO; | ||
894 | |||
867 | return werr; | 895 | return werr; |
868 | } | 896 | } |
869 | 897 | ||
@@ -1629,6 +1657,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1629 | { | 1657 | { |
1630 | struct btrfs_transaction *cur_trans = trans->transaction; | 1658 | struct btrfs_transaction *cur_trans = trans->transaction; |
1631 | struct btrfs_transaction *prev_trans = NULL; | 1659 | struct btrfs_transaction *prev_trans = NULL; |
1660 | struct btrfs_inode *btree_ino = BTRFS_I(root->fs_info->btree_inode); | ||
1632 | int ret; | 1661 | int ret; |
1633 | 1662 | ||
1634 | /* Stop the commit early if ->aborted is set */ | 1663 | /* Stop the commit early if ->aborted is set */ |
@@ -1868,6 +1897,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1868 | memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, | 1897 | memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, |
1869 | sizeof(*root->fs_info->super_copy)); | 1898 | sizeof(*root->fs_info->super_copy)); |
1870 | 1899 | ||
1900 | btrfs_update_commit_device_size(root->fs_info); | ||
1901 | btrfs_update_commit_device_bytes_used(root, cur_trans); | ||
1902 | |||
1903 | clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); | ||
1904 | clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); | ||
1905 | |||
1871 | spin_lock(&root->fs_info->trans_lock); | 1906 | spin_lock(&root->fs_info->trans_lock); |
1872 | cur_trans->state = TRANS_STATE_UNBLOCKED; | 1907 | cur_trans->state = TRANS_STATE_UNBLOCKED; |
1873 | root->fs_info->running_transaction = NULL; | 1908 | root->fs_info->running_transaction = NULL; |
@@ -1981,9 +2016,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) | |||
1981 | ret = btrfs_drop_snapshot(root, NULL, 0, 0); | 2016 | ret = btrfs_drop_snapshot(root, NULL, 0, 0); |
1982 | else | 2017 | else |
1983 | ret = btrfs_drop_snapshot(root, NULL, 1, 0); | 2018 | ret = btrfs_drop_snapshot(root, NULL, 1, 0); |
1984 | /* | 2019 | |
1985 | * If we encounter a transaction abort during snapshot cleaning, we | ||
1986 | * don't want to crash here | ||
1987 | */ | ||
1988 | return (ret < 0) ? 0 : 1; | 2020 | return (ret < 0) ? 0 : 1; |
1989 | } | 2021 | } |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 579be51b27e5..d8f40e1a5d2d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -79,7 +79,7 @@ struct btrfs_transaction { | |||
79 | #define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ | 79 | #define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ |
80 | __TRANS_ATTACH) | 80 | __TRANS_ATTACH) |
81 | 81 | ||
82 | #define BTRFS_SEND_TRANS_STUB 1 | 82 | #define BTRFS_SEND_TRANS_STUB ((void *)1) |
83 | 83 | ||
84 | struct btrfs_trans_handle { | 84 | struct btrfs_trans_handle { |
85 | u64 transid; | 85 | u64 transid; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9e1f2cd5e67a..1475979e5718 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -94,8 +94,11 @@ | |||
94 | #define LOG_WALK_REPLAY_ALL 3 | 94 | #define LOG_WALK_REPLAY_ALL 3 |
95 | 95 | ||
96 | static int btrfs_log_inode(struct btrfs_trans_handle *trans, | 96 | static int btrfs_log_inode(struct btrfs_trans_handle *trans, |
97 | struct btrfs_root *root, struct inode *inode, | 97 | struct btrfs_root *root, struct inode *inode, |
98 | int inode_only); | 98 | int inode_only, |
99 | const loff_t start, | ||
100 | const loff_t end, | ||
101 | struct btrfs_log_ctx *ctx); | ||
99 | static int link_to_fixup_dir(struct btrfs_trans_handle *trans, | 102 | static int link_to_fixup_dir(struct btrfs_trans_handle *trans, |
100 | struct btrfs_root *root, | 103 | struct btrfs_root *root, |
101 | struct btrfs_path *path, u64 objectid); | 104 | struct btrfs_path *path, u64 objectid); |
@@ -1496,7 +1499,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, | |||
1496 | return -EIO; | 1499 | return -EIO; |
1497 | 1500 | ||
1498 | key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; | 1501 | key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; |
1499 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | 1502 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
1500 | key.offset = objectid; | 1503 | key.offset = objectid; |
1501 | 1504 | ||
1502 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); | 1505 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); |
@@ -1635,6 +1638,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, | |||
1635 | found_key.type == log_key.type && | 1638 | found_key.type == log_key.type && |
1636 | found_key.offset == log_key.offset && | 1639 | found_key.offset == log_key.offset && |
1637 | btrfs_dir_type(path->nodes[0], dst_di) == log_type) { | 1640 | btrfs_dir_type(path->nodes[0], dst_di) == log_type) { |
1641 | update_size = false; | ||
1638 | goto out; | 1642 | goto out; |
1639 | } | 1643 | } |
1640 | 1644 | ||
@@ -2155,7 +2159,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
2155 | 2159 | ||
2156 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | 2160 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); |
2157 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | 2161 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); |
2158 | blocksize = btrfs_level_size(root, *level - 1); | 2162 | blocksize = root->nodesize; |
2159 | 2163 | ||
2160 | parent = path->nodes[*level]; | 2164 | parent = path->nodes[*level]; |
2161 | root_owner = btrfs_header_owner(parent); | 2165 | root_owner = btrfs_header_owner(parent); |
@@ -2981,8 +2985,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2981 | min_key.type = key_type; | 2985 | min_key.type = key_type; |
2982 | min_key.offset = min_offset; | 2986 | min_key.offset = min_offset; |
2983 | 2987 | ||
2984 | path->keep_locks = 1; | ||
2985 | |||
2986 | ret = btrfs_search_forward(root, &min_key, path, trans->transid); | 2988 | ret = btrfs_search_forward(root, &min_key, path, trans->transid); |
2987 | 2989 | ||
2988 | /* | 2990 | /* |
@@ -3298,7 +3300,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
3298 | struct list_head ordered_sums; | 3300 | struct list_head ordered_sums; |
3299 | int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 3301 | int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
3300 | bool has_extents = false; | 3302 | bool has_extents = false; |
3301 | bool need_find_last_extent = (*last_extent == 0); | 3303 | bool need_find_last_extent = true; |
3302 | bool done = false; | 3304 | bool done = false; |
3303 | 3305 | ||
3304 | INIT_LIST_HEAD(&ordered_sums); | 3306 | INIT_LIST_HEAD(&ordered_sums); |
@@ -3352,8 +3354,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
3352 | */ | 3354 | */ |
3353 | if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { | 3355 | if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { |
3354 | has_extents = true; | 3356 | has_extents = true; |
3355 | if (need_find_last_extent && | 3357 | if (first_key.objectid == (u64)-1) |
3356 | first_key.objectid == (u64)-1) | ||
3357 | first_key = ins_keys[i]; | 3358 | first_key = ins_keys[i]; |
3358 | } else { | 3359 | } else { |
3359 | need_find_last_extent = false; | 3360 | need_find_last_extent = false; |
@@ -3363,7 +3364,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
3363 | * or deletes of this inode don't have to relog the inode | 3364 | * or deletes of this inode don't have to relog the inode |
3364 | * again | 3365 | * again |
3365 | */ | 3366 | */ |
3366 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY && | 3367 | if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY && |
3367 | !skip_csum) { | 3368 | !skip_csum) { |
3368 | int found_type; | 3369 | int found_type; |
3369 | extent = btrfs_item_ptr(src, start_slot + i, | 3370 | extent = btrfs_item_ptr(src, start_slot + i, |
@@ -3427,6 +3428,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
3427 | if (!has_extents) | 3428 | if (!has_extents) |
3428 | return ret; | 3429 | return ret; |
3429 | 3430 | ||
3431 | if (need_find_last_extent && *last_extent == first_key.offset) { | ||
3432 | /* | ||
3433 | * We don't have any leafs between our current one and the one | ||
3434 | * we processed before that can have file extent items for our | ||
3435 | * inode (and have a generation number smaller than our current | ||
3436 | * transaction id). | ||
3437 | */ | ||
3438 | need_find_last_extent = false; | ||
3439 | } | ||
3440 | |||
3430 | /* | 3441 | /* |
3431 | * Because we use btrfs_search_forward we could skip leaves that were | 3442 | * Because we use btrfs_search_forward we could skip leaves that were |
3432 | * not modified and then assume *last_extent is valid when it really | 3443 | * not modified and then assume *last_extent is valid when it really |
@@ -3537,7 +3548,7 @@ fill_holes: | |||
3537 | 0, 0); | 3548 | 0, 0); |
3538 | if (ret) | 3549 | if (ret) |
3539 | break; | 3550 | break; |
3540 | *last_extent = offset + len; | 3551 | *last_extent = extent_end; |
3541 | } | 3552 | } |
3542 | /* | 3553 | /* |
3543 | * Need to let the callers know we dropped the path so they should | 3554 | * Need to let the callers know we dropped the path so they should |
@@ -3562,107 +3573,33 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
3562 | return 0; | 3573 | return 0; |
3563 | } | 3574 | } |
3564 | 3575 | ||
3565 | static int log_one_extent(struct btrfs_trans_handle *trans, | 3576 | static int wait_ordered_extents(struct btrfs_trans_handle *trans, |
3566 | struct inode *inode, struct btrfs_root *root, | 3577 | struct inode *inode, |
3567 | struct extent_map *em, struct btrfs_path *path, | 3578 | struct btrfs_root *root, |
3568 | struct list_head *logged_list) | 3579 | const struct extent_map *em, |
3580 | const struct list_head *logged_list, | ||
3581 | bool *ordered_io_error) | ||
3569 | { | 3582 | { |
3570 | struct btrfs_root *log = root->log_root; | ||
3571 | struct btrfs_file_extent_item *fi; | ||
3572 | struct extent_buffer *leaf; | ||
3573 | struct btrfs_ordered_extent *ordered; | 3583 | struct btrfs_ordered_extent *ordered; |
3574 | struct list_head ordered_sums; | 3584 | struct btrfs_root *log = root->log_root; |
3575 | struct btrfs_map_token token; | ||
3576 | struct btrfs_key key; | ||
3577 | u64 mod_start = em->mod_start; | 3585 | u64 mod_start = em->mod_start; |
3578 | u64 mod_len = em->mod_len; | 3586 | u64 mod_len = em->mod_len; |
3587 | const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
3579 | u64 csum_offset; | 3588 | u64 csum_offset; |
3580 | u64 csum_len; | 3589 | u64 csum_len; |
3581 | u64 extent_offset = em->start - em->orig_start; | 3590 | LIST_HEAD(ordered_sums); |
3582 | u64 block_len; | 3591 | int ret = 0; |
3583 | int ret; | ||
3584 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
3585 | int extent_inserted = 0; | ||
3586 | |||
3587 | INIT_LIST_HEAD(&ordered_sums); | ||
3588 | btrfs_init_map_token(&token); | ||
3589 | |||
3590 | ret = __btrfs_drop_extents(trans, log, inode, path, em->start, | ||
3591 | em->start + em->len, NULL, 0, 1, | ||
3592 | sizeof(*fi), &extent_inserted); | ||
3593 | if (ret) | ||
3594 | return ret; | ||
3595 | |||
3596 | if (!extent_inserted) { | ||
3597 | key.objectid = btrfs_ino(inode); | ||
3598 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
3599 | key.offset = em->start; | ||
3600 | |||
3601 | ret = btrfs_insert_empty_item(trans, log, path, &key, | ||
3602 | sizeof(*fi)); | ||
3603 | if (ret) | ||
3604 | return ret; | ||
3605 | } | ||
3606 | leaf = path->nodes[0]; | ||
3607 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
3608 | struct btrfs_file_extent_item); | ||
3609 | |||
3610 | btrfs_set_token_file_extent_generation(leaf, fi, em->generation, | ||
3611 | &token); | ||
3612 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | ||
3613 | skip_csum = true; | ||
3614 | btrfs_set_token_file_extent_type(leaf, fi, | ||
3615 | BTRFS_FILE_EXTENT_PREALLOC, | ||
3616 | &token); | ||
3617 | } else { | ||
3618 | btrfs_set_token_file_extent_type(leaf, fi, | ||
3619 | BTRFS_FILE_EXTENT_REG, | ||
3620 | &token); | ||
3621 | if (em->block_start == EXTENT_MAP_HOLE) | ||
3622 | skip_csum = true; | ||
3623 | } | ||
3624 | |||
3625 | block_len = max(em->block_len, em->orig_block_len); | ||
3626 | if (em->compress_type != BTRFS_COMPRESS_NONE) { | ||
3627 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
3628 | em->block_start, | ||
3629 | &token); | ||
3630 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
3631 | &token); | ||
3632 | } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { | ||
3633 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
3634 | em->block_start - | ||
3635 | extent_offset, &token); | ||
3636 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
3637 | &token); | ||
3638 | } else { | ||
3639 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token); | ||
3640 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0, | ||
3641 | &token); | ||
3642 | } | ||
3643 | 3592 | ||
3644 | btrfs_set_token_file_extent_offset(leaf, fi, | 3593 | *ordered_io_error = false; |
3645 | em->start - em->orig_start, | ||
3646 | &token); | ||
3647 | btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); | ||
3648 | btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token); | ||
3649 | btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, | ||
3650 | &token); | ||
3651 | btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); | ||
3652 | btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); | ||
3653 | btrfs_mark_buffer_dirty(leaf); | ||
3654 | 3594 | ||
3655 | btrfs_release_path(path); | 3595 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || |
3656 | if (ret) { | 3596 | em->block_start == EXTENT_MAP_HOLE) |
3657 | return ret; | ||
3658 | } | ||
3659 | |||
3660 | if (skip_csum) | ||
3661 | return 0; | 3597 | return 0; |
3662 | 3598 | ||
3663 | /* | 3599 | /* |
3664 | * First check and see if our csums are on our outstanding ordered | 3600 | * Wait far any ordered extent that covers our extent map. If it |
3665 | * extents. | 3601 | * finishes without an error, first check and see if our csums are on |
3602 | * our outstanding ordered extents. | ||
3666 | */ | 3603 | */ |
3667 | list_for_each_entry(ordered, logged_list, log_list) { | 3604 | list_for_each_entry(ordered, logged_list, log_list) { |
3668 | struct btrfs_ordered_sum *sum; | 3605 | struct btrfs_ordered_sum *sum; |
@@ -3674,6 +3611,24 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3674 | mod_start + mod_len <= ordered->file_offset) | 3611 | mod_start + mod_len <= ordered->file_offset) |
3675 | continue; | 3612 | continue; |
3676 | 3613 | ||
3614 | if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && | ||
3615 | !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) && | ||
3616 | !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { | ||
3617 | const u64 start = ordered->file_offset; | ||
3618 | const u64 end = ordered->file_offset + ordered->len - 1; | ||
3619 | |||
3620 | WARN_ON(ordered->inode != inode); | ||
3621 | filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
3622 | } | ||
3623 | |||
3624 | wait_event(ordered->wait, | ||
3625 | (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) || | ||
3626 | test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))); | ||
3627 | |||
3628 | if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) { | ||
3629 | *ordered_io_error = true; | ||
3630 | break; | ||
3631 | } | ||
3677 | /* | 3632 | /* |
3678 | * We are going to copy all the csums on this ordered extent, so | 3633 | * We are going to copy all the csums on this ordered extent, so |
3679 | * go ahead and adjust mod_start and mod_len in case this | 3634 | * go ahead and adjust mod_start and mod_len in case this |
@@ -3705,6 +3660,9 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3705 | } | 3660 | } |
3706 | } | 3661 | } |
3707 | 3662 | ||
3663 | if (skip_csum) | ||
3664 | continue; | ||
3665 | |||
3708 | /* | 3666 | /* |
3709 | * To keep us from looping for the above case of an ordered | 3667 | * To keep us from looping for the above case of an ordered |
3710 | * extent that falls inside of the logged extent. | 3668 | * extent that falls inside of the logged extent. |
@@ -3722,18 +3680,16 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3722 | list_for_each_entry(sum, &ordered->list, list) { | 3680 | list_for_each_entry(sum, &ordered->list, list) { |
3723 | ret = btrfs_csum_file_blocks(trans, log, sum); | 3681 | ret = btrfs_csum_file_blocks(trans, log, sum); |
3724 | if (ret) | 3682 | if (ret) |
3725 | goto unlocked; | 3683 | break; |
3726 | } | 3684 | } |
3727 | |||
3728 | } | 3685 | } |
3729 | unlocked: | ||
3730 | 3686 | ||
3731 | if (!mod_len || ret) | 3687 | if (*ordered_io_error || !mod_len || ret || skip_csum) |
3732 | return ret; | 3688 | return ret; |
3733 | 3689 | ||
3734 | if (em->compress_type) { | 3690 | if (em->compress_type) { |
3735 | csum_offset = 0; | 3691 | csum_offset = 0; |
3736 | csum_len = block_len; | 3692 | csum_len = max(em->block_len, em->orig_block_len); |
3737 | } else { | 3693 | } else { |
3738 | csum_offset = mod_start - em->start; | 3694 | csum_offset = mod_start - em->start; |
3739 | csum_len = mod_len; | 3695 | csum_len = mod_len; |
@@ -3760,11 +3716,106 @@ unlocked: | |||
3760 | return ret; | 3716 | return ret; |
3761 | } | 3717 | } |
3762 | 3718 | ||
3719 | static int log_one_extent(struct btrfs_trans_handle *trans, | ||
3720 | struct inode *inode, struct btrfs_root *root, | ||
3721 | const struct extent_map *em, | ||
3722 | struct btrfs_path *path, | ||
3723 | const struct list_head *logged_list, | ||
3724 | struct btrfs_log_ctx *ctx) | ||
3725 | { | ||
3726 | struct btrfs_root *log = root->log_root; | ||
3727 | struct btrfs_file_extent_item *fi; | ||
3728 | struct extent_buffer *leaf; | ||
3729 | struct btrfs_map_token token; | ||
3730 | struct btrfs_key key; | ||
3731 | u64 extent_offset = em->start - em->orig_start; | ||
3732 | u64 block_len; | ||
3733 | int ret; | ||
3734 | int extent_inserted = 0; | ||
3735 | bool ordered_io_err = false; | ||
3736 | |||
3737 | ret = wait_ordered_extents(trans, inode, root, em, logged_list, | ||
3738 | &ordered_io_err); | ||
3739 | if (ret) | ||
3740 | return ret; | ||
3741 | |||
3742 | if (ordered_io_err) { | ||
3743 | ctx->io_err = -EIO; | ||
3744 | return 0; | ||
3745 | } | ||
3746 | |||
3747 | btrfs_init_map_token(&token); | ||
3748 | |||
3749 | ret = __btrfs_drop_extents(trans, log, inode, path, em->start, | ||
3750 | em->start + em->len, NULL, 0, 1, | ||
3751 | sizeof(*fi), &extent_inserted); | ||
3752 | if (ret) | ||
3753 | return ret; | ||
3754 | |||
3755 | if (!extent_inserted) { | ||
3756 | key.objectid = btrfs_ino(inode); | ||
3757 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
3758 | key.offset = em->start; | ||
3759 | |||
3760 | ret = btrfs_insert_empty_item(trans, log, path, &key, | ||
3761 | sizeof(*fi)); | ||
3762 | if (ret) | ||
3763 | return ret; | ||
3764 | } | ||
3765 | leaf = path->nodes[0]; | ||
3766 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
3767 | struct btrfs_file_extent_item); | ||
3768 | |||
3769 | btrfs_set_token_file_extent_generation(leaf, fi, em->generation, | ||
3770 | &token); | ||
3771 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
3772 | btrfs_set_token_file_extent_type(leaf, fi, | ||
3773 | BTRFS_FILE_EXTENT_PREALLOC, | ||
3774 | &token); | ||
3775 | else | ||
3776 | btrfs_set_token_file_extent_type(leaf, fi, | ||
3777 | BTRFS_FILE_EXTENT_REG, | ||
3778 | &token); | ||
3779 | |||
3780 | block_len = max(em->block_len, em->orig_block_len); | ||
3781 | if (em->compress_type != BTRFS_COMPRESS_NONE) { | ||
3782 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
3783 | em->block_start, | ||
3784 | &token); | ||
3785 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
3786 | &token); | ||
3787 | } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { | ||
3788 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
3789 | em->block_start - | ||
3790 | extent_offset, &token); | ||
3791 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
3792 | &token); | ||
3793 | } else { | ||
3794 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token); | ||
3795 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0, | ||
3796 | &token); | ||
3797 | } | ||
3798 | |||
3799 | btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, &token); | ||
3800 | btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); | ||
3801 | btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token); | ||
3802 | btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, | ||
3803 | &token); | ||
3804 | btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); | ||
3805 | btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); | ||
3806 | btrfs_mark_buffer_dirty(leaf); | ||
3807 | |||
3808 | btrfs_release_path(path); | ||
3809 | |||
3810 | return ret; | ||
3811 | } | ||
3812 | |||
3763 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | 3813 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, |
3764 | struct btrfs_root *root, | 3814 | struct btrfs_root *root, |
3765 | struct inode *inode, | 3815 | struct inode *inode, |
3766 | struct btrfs_path *path, | 3816 | struct btrfs_path *path, |
3767 | struct list_head *logged_list) | 3817 | struct list_head *logged_list, |
3818 | struct btrfs_log_ctx *ctx) | ||
3768 | { | 3819 | { |
3769 | struct extent_map *em, *n; | 3820 | struct extent_map *em, *n; |
3770 | struct list_head extents; | 3821 | struct list_head extents; |
@@ -3822,7 +3873,8 @@ process: | |||
3822 | 3873 | ||
3823 | write_unlock(&tree->lock); | 3874 | write_unlock(&tree->lock); |
3824 | 3875 | ||
3825 | ret = log_one_extent(trans, inode, root, em, path, logged_list); | 3876 | ret = log_one_extent(trans, inode, root, em, path, logged_list, |
3877 | ctx); | ||
3826 | write_lock(&tree->lock); | 3878 | write_lock(&tree->lock); |
3827 | clear_em_logging(tree, em); | 3879 | clear_em_logging(tree, em); |
3828 | free_extent_map(em); | 3880 | free_extent_map(em); |
@@ -3849,8 +3901,11 @@ process: | |||
3849 | * This handles both files and directories. | 3901 | * This handles both files and directories. |
3850 | */ | 3902 | */ |
3851 | static int btrfs_log_inode(struct btrfs_trans_handle *trans, | 3903 | static int btrfs_log_inode(struct btrfs_trans_handle *trans, |
3852 | struct btrfs_root *root, struct inode *inode, | 3904 | struct btrfs_root *root, struct inode *inode, |
3853 | int inode_only) | 3905 | int inode_only, |
3906 | const loff_t start, | ||
3907 | const loff_t end, | ||
3908 | struct btrfs_log_ctx *ctx) | ||
3854 | { | 3909 | { |
3855 | struct btrfs_path *path; | 3910 | struct btrfs_path *path; |
3856 | struct btrfs_path *dst_path; | 3911 | struct btrfs_path *dst_path; |
@@ -3867,6 +3922,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3867 | int ins_nr; | 3922 | int ins_nr; |
3868 | bool fast_search = false; | 3923 | bool fast_search = false; |
3869 | u64 ino = btrfs_ino(inode); | 3924 | u64 ino = btrfs_ino(inode); |
3925 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
3870 | 3926 | ||
3871 | path = btrfs_alloc_path(); | 3927 | path = btrfs_alloc_path(); |
3872 | if (!path) | 3928 | if (!path) |
@@ -3950,7 +4006,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3950 | err = ret; | 4006 | err = ret; |
3951 | goto out_unlock; | 4007 | goto out_unlock; |
3952 | } | 4008 | } |
3953 | path->keep_locks = 1; | ||
3954 | 4009 | ||
3955 | while (1) { | 4010 | while (1) { |
3956 | ins_nr = 0; | 4011 | ins_nr = 0; |
@@ -3980,7 +4035,8 @@ again: | |||
3980 | if (ret < 0) { | 4035 | if (ret < 0) { |
3981 | err = ret; | 4036 | err = ret; |
3982 | goto out_unlock; | 4037 | goto out_unlock; |
3983 | } if (ret) { | 4038 | } |
4039 | if (ret) { | ||
3984 | ins_nr = 0; | 4040 | ins_nr = 0; |
3985 | btrfs_release_path(path); | 4041 | btrfs_release_path(path); |
3986 | continue; | 4042 | continue; |
@@ -4034,19 +4090,41 @@ log_extents: | |||
4034 | btrfs_release_path(dst_path); | 4090 | btrfs_release_path(dst_path); |
4035 | if (fast_search) { | 4091 | if (fast_search) { |
4036 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, | 4092 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, |
4037 | &logged_list); | 4093 | &logged_list, ctx); |
4038 | if (ret) { | 4094 | if (ret) { |
4039 | err = ret; | 4095 | err = ret; |
4040 | goto out_unlock; | 4096 | goto out_unlock; |
4041 | } | 4097 | } |
4042 | } else if (inode_only == LOG_INODE_ALL) { | 4098 | } else if (inode_only == LOG_INODE_ALL) { |
4043 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | ||
4044 | struct extent_map *em, *n; | 4099 | struct extent_map *em, *n; |
4045 | 4100 | ||
4046 | write_lock(&tree->lock); | 4101 | write_lock(&em_tree->lock); |
4047 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) | 4102 | /* |
4048 | list_del_init(&em->list); | 4103 | * We can't just remove every em if we're called for a ranged |
4049 | write_unlock(&tree->lock); | 4104 | * fsync - that is, one that doesn't cover the whole possible |
4105 | * file range (0 to LLONG_MAX). This is because we can have | ||
4106 | * em's that fall outside the range we're logging and therefore | ||
4107 | * their ordered operations haven't completed yet | ||
4108 | * (btrfs_finish_ordered_io() not invoked yet). This means we | ||
4109 | * didn't get their respective file extent item in the fs/subvol | ||
4110 | * tree yet, and need to let the next fast fsync (one which | ||
4111 | * consults the list of modified extent maps) find the em so | ||
4112 | * that it logs a matching file extent item and waits for the | ||
4113 | * respective ordered operation to complete (if it's still | ||
4114 | * running). | ||
4115 | * | ||
4116 | * Removing every em outside the range we're logging would make | ||
4117 | * the next fast fsync not log their matching file extent items, | ||
4118 | * therefore making us lose data after a log replay. | ||
4119 | */ | ||
4120 | list_for_each_entry_safe(em, n, &em_tree->modified_extents, | ||
4121 | list) { | ||
4122 | const u64 mod_end = em->mod_start + em->mod_len - 1; | ||
4123 | |||
4124 | if (em->mod_start >= start && mod_end <= end) | ||
4125 | list_del_init(&em->list); | ||
4126 | } | ||
4127 | write_unlock(&em_tree->lock); | ||
4050 | } | 4128 | } |
4051 | 4129 | ||
4052 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 4130 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
@@ -4056,6 +4134,7 @@ log_extents: | |||
4056 | goto out_unlock; | 4134 | goto out_unlock; |
4057 | } | 4135 | } |
4058 | } | 4136 | } |
4137 | |||
4059 | BTRFS_I(inode)->logged_trans = trans->transid; | 4138 | BTRFS_I(inode)->logged_trans = trans->transid; |
4060 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | 4139 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; |
4061 | out_unlock: | 4140 | out_unlock: |
@@ -4152,7 +4231,10 @@ out: | |||
4152 | */ | 4231 | */ |
4153 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | 4232 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, |
4154 | struct btrfs_root *root, struct inode *inode, | 4233 | struct btrfs_root *root, struct inode *inode, |
4155 | struct dentry *parent, int exists_only, | 4234 | struct dentry *parent, |
4235 | const loff_t start, | ||
4236 | const loff_t end, | ||
4237 | int exists_only, | ||
4156 | struct btrfs_log_ctx *ctx) | 4238 | struct btrfs_log_ctx *ctx) |
4157 | { | 4239 | { |
4158 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; | 4240 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
@@ -4198,7 +4280,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4198 | if (ret) | 4280 | if (ret) |
4199 | goto end_no_trans; | 4281 | goto end_no_trans; |
4200 | 4282 | ||
4201 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 4283 | ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx); |
4202 | if (ret) | 4284 | if (ret) |
4203 | goto end_trans; | 4285 | goto end_trans; |
4204 | 4286 | ||
@@ -4226,7 +4308,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4226 | 4308 | ||
4227 | if (BTRFS_I(inode)->generation > | 4309 | if (BTRFS_I(inode)->generation > |
4228 | root->fs_info->last_trans_committed) { | 4310 | root->fs_info->last_trans_committed) { |
4229 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 4311 | ret = btrfs_log_inode(trans, root, inode, inode_only, |
4312 | 0, LLONG_MAX, ctx); | ||
4230 | if (ret) | 4313 | if (ret) |
4231 | goto end_trans; | 4314 | goto end_trans; |
4232 | } | 4315 | } |
@@ -4260,13 +4343,15 @@ end_no_trans: | |||
4260 | */ | 4343 | */ |
4261 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 4344 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
4262 | struct btrfs_root *root, struct dentry *dentry, | 4345 | struct btrfs_root *root, struct dentry *dentry, |
4346 | const loff_t start, | ||
4347 | const loff_t end, | ||
4263 | struct btrfs_log_ctx *ctx) | 4348 | struct btrfs_log_ctx *ctx) |
4264 | { | 4349 | { |
4265 | struct dentry *parent = dget_parent(dentry); | 4350 | struct dentry *parent = dget_parent(dentry); |
4266 | int ret; | 4351 | int ret; |
4267 | 4352 | ||
4268 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, | 4353 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, |
4269 | 0, ctx); | 4354 | start, end, 0, ctx); |
4270 | dput(parent); | 4355 | dput(parent); |
4271 | 4356 | ||
4272 | return ret; | 4357 | return ret; |
@@ -4316,7 +4401,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
4316 | again: | 4401 | again: |
4317 | key.objectid = BTRFS_TREE_LOG_OBJECTID; | 4402 | key.objectid = BTRFS_TREE_LOG_OBJECTID; |
4318 | key.offset = (u64)-1; | 4403 | key.offset = (u64)-1; |
4319 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 4404 | key.type = BTRFS_ROOT_ITEM_KEY; |
4320 | 4405 | ||
4321 | while (1) { | 4406 | while (1) { |
4322 | ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); | 4407 | ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); |
@@ -4503,6 +4588,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, | |||
4503 | root->fs_info->last_trans_committed)) | 4588 | root->fs_info->last_trans_committed)) |
4504 | return 0; | 4589 | return 0; |
4505 | 4590 | ||
4506 | return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); | 4591 | return btrfs_log_inode_parent(trans, root, inode, parent, 0, |
4592 | LLONG_MAX, 1, NULL); | ||
4507 | } | 4593 | } |
4508 | 4594 | ||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 7f5b41bd5373..154990c26dcb 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -28,6 +28,7 @@ | |||
28 | struct btrfs_log_ctx { | 28 | struct btrfs_log_ctx { |
29 | int log_ret; | 29 | int log_ret; |
30 | int log_transid; | 30 | int log_transid; |
31 | int io_err; | ||
31 | struct list_head list; | 32 | struct list_head list; |
32 | }; | 33 | }; |
33 | 34 | ||
@@ -35,6 +36,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | |||
35 | { | 36 | { |
36 | ctx->log_ret = 0; | 37 | ctx->log_ret = 0; |
37 | ctx->log_transid = 0; | 38 | ctx->log_transid = 0; |
39 | ctx->io_err = 0; | ||
38 | INIT_LIST_HEAD(&ctx->list); | 40 | INIT_LIST_HEAD(&ctx->list); |
39 | } | 41 | } |
40 | 42 | ||
@@ -59,6 +61,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | |||
59 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 61 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
60 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 62 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
61 | struct btrfs_root *root, struct dentry *dentry, | 63 | struct btrfs_root *root, struct dentry *dentry, |
64 | const loff_t start, | ||
65 | const loff_t end, | ||
62 | struct btrfs_log_ctx *ctx); | 66 | struct btrfs_log_ctx *ctx); |
63 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | 67 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, |
64 | struct btrfs_root *root, | 68 | struct btrfs_root *root, |
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index f6a4c03ee7d8..778282944530 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c | |||
@@ -279,7 +279,6 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, | |||
279 | key.offset = 0; | 279 | key.offset = 0; |
280 | 280 | ||
281 | again_search_slot: | 281 | again_search_slot: |
282 | path->keep_locks = 1; | ||
283 | ret = btrfs_search_forward(root, &key, path, 0); | 282 | ret = btrfs_search_forward(root, &key, path, 0); |
284 | if (ret) { | 283 | if (ret) { |
285 | if (ret > 0) | 284 | if (ret > 0) |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6cb82f62cb7c..d47289c715c8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -50,7 +50,7 @@ static void __btrfs_reset_dev_stats(struct btrfs_device *dev); | |||
50 | static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); | 50 | static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); |
51 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); | 51 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); |
52 | 52 | ||
53 | static DEFINE_MUTEX(uuid_mutex); | 53 | DEFINE_MUTEX(uuid_mutex); |
54 | static LIST_HEAD(fs_uuids); | 54 | static LIST_HEAD(fs_uuids); |
55 | 55 | ||
56 | static void lock_chunks(struct btrfs_root *root) | 56 | static void lock_chunks(struct btrfs_root *root) |
@@ -74,6 +74,7 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void) | |||
74 | mutex_init(&fs_devs->device_list_mutex); | 74 | mutex_init(&fs_devs->device_list_mutex); |
75 | 75 | ||
76 | INIT_LIST_HEAD(&fs_devs->devices); | 76 | INIT_LIST_HEAD(&fs_devs->devices); |
77 | INIT_LIST_HEAD(&fs_devs->resized_devices); | ||
77 | INIT_LIST_HEAD(&fs_devs->alloc_list); | 78 | INIT_LIST_HEAD(&fs_devs->alloc_list); |
78 | INIT_LIST_HEAD(&fs_devs->list); | 79 | INIT_LIST_HEAD(&fs_devs->list); |
79 | 80 | ||
@@ -154,11 +155,13 @@ static struct btrfs_device *__alloc_device(void) | |||
154 | 155 | ||
155 | INIT_LIST_HEAD(&dev->dev_list); | 156 | INIT_LIST_HEAD(&dev->dev_list); |
156 | INIT_LIST_HEAD(&dev->dev_alloc_list); | 157 | INIT_LIST_HEAD(&dev->dev_alloc_list); |
158 | INIT_LIST_HEAD(&dev->resized_list); | ||
157 | 159 | ||
158 | spin_lock_init(&dev->io_lock); | 160 | spin_lock_init(&dev->io_lock); |
159 | 161 | ||
160 | spin_lock_init(&dev->reada_lock); | 162 | spin_lock_init(&dev->reada_lock); |
161 | atomic_set(&dev->reada_in_flight, 0); | 163 | atomic_set(&dev->reada_in_flight, 0); |
164 | atomic_set(&dev->dev_stats_ccnt, 0); | ||
162 | INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); | 165 | INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); |
163 | INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); | 166 | INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); |
164 | 167 | ||
@@ -474,14 +477,13 @@ static noinline int device_list_add(const char *path, | |||
474 | return PTR_ERR(fs_devices); | 477 | return PTR_ERR(fs_devices); |
475 | 478 | ||
476 | list_add(&fs_devices->list, &fs_uuids); | 479 | list_add(&fs_devices->list, &fs_uuids); |
477 | fs_devices->latest_devid = devid; | ||
478 | fs_devices->latest_trans = found_transid; | ||
479 | 480 | ||
480 | device = NULL; | 481 | device = NULL; |
481 | } else { | 482 | } else { |
482 | device = __find_device(&fs_devices->devices, devid, | 483 | device = __find_device(&fs_devices->devices, devid, |
483 | disk_super->dev_item.uuid); | 484 | disk_super->dev_item.uuid); |
484 | } | 485 | } |
486 | |||
485 | if (!device) { | 487 | if (!device) { |
486 | if (fs_devices->opened) | 488 | if (fs_devices->opened) |
487 | return -EBUSY; | 489 | return -EBUSY; |
@@ -508,6 +510,43 @@ static noinline int device_list_add(const char *path, | |||
508 | ret = 1; | 510 | ret = 1; |
509 | device->fs_devices = fs_devices; | 511 | device->fs_devices = fs_devices; |
510 | } else if (!device->name || strcmp(device->name->str, path)) { | 512 | } else if (!device->name || strcmp(device->name->str, path)) { |
513 | /* | ||
514 | * When FS is already mounted. | ||
515 | * 1. If you are here and if the device->name is NULL that | ||
516 | * means this device was missing at time of FS mount. | ||
517 | * 2. If you are here and if the device->name is different | ||
518 | * from 'path' that means either | ||
519 | * a. The same device disappeared and reappeared with | ||
520 | * different name. or | ||
521 | * b. The missing-disk-which-was-replaced, has | ||
522 | * reappeared now. | ||
523 | * | ||
524 | * We must allow 1 and 2a above. But 2b would be a spurious | ||
525 | * and unintentional. | ||
526 | * | ||
527 | * Further in case of 1 and 2a above, the disk at 'path' | ||
528 | * would have missed some transaction when it was away and | ||
529 | * in case of 2a the stale bdev has to be updated as well. | ||
530 | * 2b must not be allowed at all time. | ||
531 | */ | ||
532 | |||
533 | /* | ||
534 | * For now, we do allow update to btrfs_fs_device through the | ||
535 | * btrfs dev scan cli after FS has been mounted. We're still | ||
536 | * tracking a problem where systems fail mount by subvolume id | ||
537 | * when we reject replacement on a mounted FS. | ||
538 | */ | ||
539 | if (!fs_devices->opened && found_transid < device->generation) { | ||
540 | /* | ||
541 | * That is if the FS is _not_ mounted and if you | ||
542 | * are here, that means there is more than one | ||
543 | * disk with same uuid and devid.We keep the one | ||
544 | * with larger generation number or the last-in if | ||
545 | * generation are equal. | ||
546 | */ | ||
547 | return -EEXIST; | ||
548 | } | ||
549 | |||
511 | name = rcu_string_strdup(path, GFP_NOFS); | 550 | name = rcu_string_strdup(path, GFP_NOFS); |
512 | if (!name) | 551 | if (!name) |
513 | return -ENOMEM; | 552 | return -ENOMEM; |
@@ -519,10 +558,15 @@ static noinline int device_list_add(const char *path, | |||
519 | } | 558 | } |
520 | } | 559 | } |
521 | 560 | ||
522 | if (found_transid > fs_devices->latest_trans) { | 561 | /* |
523 | fs_devices->latest_devid = devid; | 562 | * Unmount does not free the btrfs_device struct but would zero |
524 | fs_devices->latest_trans = found_transid; | 563 | * generation along with most of the other members. So just update |
525 | } | 564 | * it back. We need it to pick the disk with largest generation |
565 | * (as above). | ||
566 | */ | ||
567 | if (!fs_devices->opened) | ||
568 | device->generation = found_transid; | ||
569 | |||
526 | *fs_devices_ret = fs_devices; | 570 | *fs_devices_ret = fs_devices; |
527 | 571 | ||
528 | return ret; | 572 | return ret; |
@@ -538,8 +582,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
538 | if (IS_ERR(fs_devices)) | 582 | if (IS_ERR(fs_devices)) |
539 | return fs_devices; | 583 | return fs_devices; |
540 | 584 | ||
541 | fs_devices->latest_devid = orig->latest_devid; | 585 | mutex_lock(&orig->device_list_mutex); |
542 | fs_devices->latest_trans = orig->latest_trans; | ||
543 | fs_devices->total_devices = orig->total_devices; | 586 | fs_devices->total_devices = orig->total_devices; |
544 | 587 | ||
545 | /* We have held the volume lock, it is safe to get the devices. */ | 588 | /* We have held the volume lock, it is safe to get the devices. */ |
@@ -568,8 +611,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
568 | device->fs_devices = fs_devices; | 611 | device->fs_devices = fs_devices; |
569 | fs_devices->num_devices++; | 612 | fs_devices->num_devices++; |
570 | } | 613 | } |
614 | mutex_unlock(&orig->device_list_mutex); | ||
571 | return fs_devices; | 615 | return fs_devices; |
572 | error: | 616 | error: |
617 | mutex_unlock(&orig->device_list_mutex); | ||
573 | free_fs_devices(fs_devices); | 618 | free_fs_devices(fs_devices); |
574 | return ERR_PTR(-ENOMEM); | 619 | return ERR_PTR(-ENOMEM); |
575 | } | 620 | } |
@@ -578,10 +623,7 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, | |||
578 | struct btrfs_fs_devices *fs_devices, int step) | 623 | struct btrfs_fs_devices *fs_devices, int step) |
579 | { | 624 | { |
580 | struct btrfs_device *device, *next; | 625 | struct btrfs_device *device, *next; |
581 | 626 | struct btrfs_device *latest_dev = NULL; | |
582 | struct block_device *latest_bdev = NULL; | ||
583 | u64 latest_devid = 0; | ||
584 | u64 latest_transid = 0; | ||
585 | 627 | ||
586 | mutex_lock(&uuid_mutex); | 628 | mutex_lock(&uuid_mutex); |
587 | again: | 629 | again: |
@@ -589,11 +631,9 @@ again: | |||
589 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { | 631 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { |
590 | if (device->in_fs_metadata) { | 632 | if (device->in_fs_metadata) { |
591 | if (!device->is_tgtdev_for_dev_replace && | 633 | if (!device->is_tgtdev_for_dev_replace && |
592 | (!latest_transid || | 634 | (!latest_dev || |
593 | device->generation > latest_transid)) { | 635 | device->generation > latest_dev->generation)) { |
594 | latest_devid = device->devid; | 636 | latest_dev = device; |
595 | latest_transid = device->generation; | ||
596 | latest_bdev = device->bdev; | ||
597 | } | 637 | } |
598 | continue; | 638 | continue; |
599 | } | 639 | } |
@@ -635,9 +675,7 @@ again: | |||
635 | goto again; | 675 | goto again; |
636 | } | 676 | } |
637 | 677 | ||
638 | fs_devices->latest_bdev = latest_bdev; | 678 | fs_devices->latest_bdev = latest_dev->bdev; |
639 | fs_devices->latest_devid = latest_devid; | ||
640 | fs_devices->latest_trans = latest_transid; | ||
641 | 679 | ||
642 | mutex_unlock(&uuid_mutex); | 680 | mutex_unlock(&uuid_mutex); |
643 | } | 681 | } |
@@ -686,8 +724,6 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
686 | fs_devices->rw_devices--; | 724 | fs_devices->rw_devices--; |
687 | } | 725 | } |
688 | 726 | ||
689 | if (device->can_discard) | ||
690 | fs_devices->num_can_discard--; | ||
691 | if (device->missing) | 727 | if (device->missing) |
692 | fs_devices->missing_devices--; | 728 | fs_devices->missing_devices--; |
693 | 729 | ||
@@ -752,11 +788,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
752 | struct block_device *bdev; | 788 | struct block_device *bdev; |
753 | struct list_head *head = &fs_devices->devices; | 789 | struct list_head *head = &fs_devices->devices; |
754 | struct btrfs_device *device; | 790 | struct btrfs_device *device; |
755 | struct block_device *latest_bdev = NULL; | 791 | struct btrfs_device *latest_dev = NULL; |
756 | struct buffer_head *bh; | 792 | struct buffer_head *bh; |
757 | struct btrfs_super_block *disk_super; | 793 | struct btrfs_super_block *disk_super; |
758 | u64 latest_devid = 0; | ||
759 | u64 latest_transid = 0; | ||
760 | u64 devid; | 794 | u64 devid; |
761 | int seeding = 1; | 795 | int seeding = 1; |
762 | int ret = 0; | 796 | int ret = 0; |
@@ -784,11 +818,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
784 | goto error_brelse; | 818 | goto error_brelse; |
785 | 819 | ||
786 | device->generation = btrfs_super_generation(disk_super); | 820 | device->generation = btrfs_super_generation(disk_super); |
787 | if (!latest_transid || device->generation > latest_transid) { | 821 | if (!latest_dev || |
788 | latest_devid = devid; | 822 | device->generation > latest_dev->generation) |
789 | latest_transid = device->generation; | 823 | latest_dev = device; |
790 | latest_bdev = bdev; | ||
791 | } | ||
792 | 824 | ||
793 | if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { | 825 | if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { |
794 | device->writeable = 0; | 826 | device->writeable = 0; |
@@ -798,10 +830,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
798 | } | 830 | } |
799 | 831 | ||
800 | q = bdev_get_queue(bdev); | 832 | q = bdev_get_queue(bdev); |
801 | if (blk_queue_discard(q)) { | 833 | if (blk_queue_discard(q)) |
802 | device->can_discard = 1; | 834 | device->can_discard = 1; |
803 | fs_devices->num_can_discard++; | ||
804 | } | ||
805 | 835 | ||
806 | device->bdev = bdev; | 836 | device->bdev = bdev; |
807 | device->in_fs_metadata = 0; | 837 | device->in_fs_metadata = 0; |
@@ -831,9 +861,7 @@ error_brelse: | |||
831 | } | 861 | } |
832 | fs_devices->seeding = seeding; | 862 | fs_devices->seeding = seeding; |
833 | fs_devices->opened = 1; | 863 | fs_devices->opened = 1; |
834 | fs_devices->latest_bdev = latest_bdev; | 864 | fs_devices->latest_bdev = latest_dev->bdev; |
835 | fs_devices->latest_devid = latest_devid; | ||
836 | fs_devices->latest_trans = latest_transid; | ||
837 | fs_devices->total_rw_bytes = 0; | 865 | fs_devices->total_rw_bytes = 0; |
838 | out: | 866 | out: |
839 | return ret; | 867 | return ret; |
@@ -1007,7 +1035,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | |||
1007 | if (key.objectid > device->devid) | 1035 | if (key.objectid > device->devid) |
1008 | break; | 1036 | break; |
1009 | 1037 | ||
1010 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | 1038 | if (key.type != BTRFS_DEV_EXTENT_KEY) |
1011 | goto next; | 1039 | goto next; |
1012 | 1040 | ||
1013 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 1041 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
@@ -1159,7 +1187,7 @@ again: | |||
1159 | if (key.objectid > device->devid) | 1187 | if (key.objectid > device->devid) |
1160 | break; | 1188 | break; |
1161 | 1189 | ||
1162 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | 1190 | if (key.type != BTRFS_DEV_EXTENT_KEY) |
1163 | goto next; | 1191 | goto next; |
1164 | 1192 | ||
1165 | if (key.offset > search_start) { | 1193 | if (key.offset > search_start) { |
@@ -1238,7 +1266,7 @@ out: | |||
1238 | 1266 | ||
1239 | static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, | 1267 | static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, |
1240 | struct btrfs_device *device, | 1268 | struct btrfs_device *device, |
1241 | u64 start) | 1269 | u64 start, u64 *dev_extent_len) |
1242 | { | 1270 | { |
1243 | int ret; | 1271 | int ret; |
1244 | struct btrfs_path *path; | 1272 | struct btrfs_path *path; |
@@ -1280,13 +1308,8 @@ again: | |||
1280 | goto out; | 1308 | goto out; |
1281 | } | 1309 | } |
1282 | 1310 | ||
1283 | if (device->bytes_used > 0) { | 1311 | *dev_extent_len = btrfs_dev_extent_length(leaf, extent); |
1284 | u64 len = btrfs_dev_extent_length(leaf, extent); | 1312 | |
1285 | device->bytes_used -= len; | ||
1286 | spin_lock(&root->fs_info->free_chunk_lock); | ||
1287 | root->fs_info->free_chunk_space += len; | ||
1288 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
1289 | } | ||
1290 | ret = btrfs_del_item(trans, root, path); | 1313 | ret = btrfs_del_item(trans, root, path); |
1291 | if (ret) { | 1314 | if (ret) { |
1292 | btrfs_error(root->fs_info, ret, | 1315 | btrfs_error(root->fs_info, ret, |
@@ -1436,8 +1459,10 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, | |||
1436 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); | 1459 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); |
1437 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); | 1460 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); |
1438 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); | 1461 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); |
1439 | btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); | 1462 | btrfs_set_device_total_bytes(leaf, dev_item, |
1440 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); | 1463 | btrfs_device_get_disk_total_bytes(device)); |
1464 | btrfs_set_device_bytes_used(leaf, dev_item, | ||
1465 | btrfs_device_get_bytes_used(device)); | ||
1441 | btrfs_set_device_group(leaf, dev_item, 0); | 1466 | btrfs_set_device_group(leaf, dev_item, 0); |
1442 | btrfs_set_device_seek_speed(leaf, dev_item, 0); | 1467 | btrfs_set_device_seek_speed(leaf, dev_item, 0); |
1443 | btrfs_set_device_bandwidth(leaf, dev_item, 0); | 1468 | btrfs_set_device_bandwidth(leaf, dev_item, 0); |
@@ -1493,7 +1518,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
1493 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | 1518 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; |
1494 | key.type = BTRFS_DEV_ITEM_KEY; | 1519 | key.type = BTRFS_DEV_ITEM_KEY; |
1495 | key.offset = device->devid; | 1520 | key.offset = device->devid; |
1496 | lock_chunks(root); | ||
1497 | 1521 | ||
1498 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 1522 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
1499 | if (ret < 0) | 1523 | if (ret < 0) |
@@ -1509,7 +1533,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
1509 | goto out; | 1533 | goto out; |
1510 | out: | 1534 | out: |
1511 | btrfs_free_path(path); | 1535 | btrfs_free_path(path); |
1512 | unlock_chunks(root); | ||
1513 | btrfs_commit_transaction(trans, root); | 1536 | btrfs_commit_transaction(trans, root); |
1514 | return ret; | 1537 | return ret; |
1515 | } | 1538 | } |
@@ -1625,8 +1648,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1625 | if (device->writeable) { | 1648 | if (device->writeable) { |
1626 | lock_chunks(root); | 1649 | lock_chunks(root); |
1627 | list_del_init(&device->dev_alloc_list); | 1650 | list_del_init(&device->dev_alloc_list); |
1651 | device->fs_devices->rw_devices--; | ||
1628 | unlock_chunks(root); | 1652 | unlock_chunks(root); |
1629 | root->fs_info->fs_devices->rw_devices--; | ||
1630 | clear_super = true; | 1653 | clear_super = true; |
1631 | } | 1654 | } |
1632 | 1655 | ||
@@ -1645,11 +1668,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1645 | if (ret) | 1668 | if (ret) |
1646 | goto error_undo; | 1669 | goto error_undo; |
1647 | 1670 | ||
1648 | spin_lock(&root->fs_info->free_chunk_lock); | ||
1649 | root->fs_info->free_chunk_space = device->total_bytes - | ||
1650 | device->bytes_used; | ||
1651 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
1652 | |||
1653 | device->in_fs_metadata = 0; | 1671 | device->in_fs_metadata = 0; |
1654 | btrfs_scrub_cancel_dev(root->fs_info, device); | 1672 | btrfs_scrub_cancel_dev(root->fs_info, device); |
1655 | 1673 | ||
@@ -1671,7 +1689,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1671 | device->fs_devices->total_devices--; | 1689 | device->fs_devices->total_devices--; |
1672 | 1690 | ||
1673 | if (device->missing) | 1691 | if (device->missing) |
1674 | root->fs_info->fs_devices->missing_devices--; | 1692 | device->fs_devices->missing_devices--; |
1675 | 1693 | ||
1676 | next_device = list_entry(root->fs_info->fs_devices->devices.next, | 1694 | next_device = list_entry(root->fs_info->fs_devices->devices.next, |
1677 | struct btrfs_device, dev_list); | 1695 | struct btrfs_device, dev_list); |
@@ -1703,9 +1721,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1703 | fs_devices = fs_devices->seed; | 1721 | fs_devices = fs_devices->seed; |
1704 | } | 1722 | } |
1705 | cur_devices->seed = NULL; | 1723 | cur_devices->seed = NULL; |
1706 | lock_chunks(root); | ||
1707 | __btrfs_close_devices(cur_devices); | 1724 | __btrfs_close_devices(cur_devices); |
1708 | unlock_chunks(root); | ||
1709 | free_fs_devices(cur_devices); | 1725 | free_fs_devices(cur_devices); |
1710 | } | 1726 | } |
1711 | 1727 | ||
@@ -1778,8 +1794,8 @@ error_undo: | |||
1778 | lock_chunks(root); | 1794 | lock_chunks(root); |
1779 | list_add(&device->dev_alloc_list, | 1795 | list_add(&device->dev_alloc_list, |
1780 | &root->fs_info->fs_devices->alloc_list); | 1796 | &root->fs_info->fs_devices->alloc_list); |
1797 | device->fs_devices->rw_devices++; | ||
1781 | unlock_chunks(root); | 1798 | unlock_chunks(root); |
1782 | root->fs_info->fs_devices->rw_devices++; | ||
1783 | } | 1799 | } |
1784 | goto error_brelse; | 1800 | goto error_brelse; |
1785 | } | 1801 | } |
@@ -1787,25 +1803,57 @@ error_undo: | |||
1787 | void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, | 1803 | void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, |
1788 | struct btrfs_device *srcdev) | 1804 | struct btrfs_device *srcdev) |
1789 | { | 1805 | { |
1806 | struct btrfs_fs_devices *fs_devices; | ||
1807 | |||
1790 | WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); | 1808 | WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); |
1791 | 1809 | ||
1810 | /* | ||
1811 | * in case of fs with no seed, srcdev->fs_devices will point | ||
1812 | * to fs_devices of fs_info. However when the dev being replaced is | ||
1813 | * a seed dev it will point to the seed's local fs_devices. In short | ||
1814 | * srcdev will have its correct fs_devices in both the cases. | ||
1815 | */ | ||
1816 | fs_devices = srcdev->fs_devices; | ||
1817 | |||
1792 | list_del_rcu(&srcdev->dev_list); | 1818 | list_del_rcu(&srcdev->dev_list); |
1793 | list_del_rcu(&srcdev->dev_alloc_list); | 1819 | list_del_rcu(&srcdev->dev_alloc_list); |
1794 | fs_info->fs_devices->num_devices--; | 1820 | fs_devices->num_devices--; |
1795 | if (srcdev->missing) { | 1821 | if (srcdev->missing) |
1796 | fs_info->fs_devices->missing_devices--; | 1822 | fs_devices->missing_devices--; |
1797 | fs_info->fs_devices->rw_devices++; | ||
1798 | } | ||
1799 | if (srcdev->can_discard) | ||
1800 | fs_info->fs_devices->num_can_discard--; | ||
1801 | if (srcdev->bdev) { | ||
1802 | fs_info->fs_devices->open_devices--; | ||
1803 | 1823 | ||
1804 | /* zero out the old super */ | 1824 | if (srcdev->writeable) { |
1825 | fs_devices->rw_devices--; | ||
1826 | /* zero out the old super if it is writable */ | ||
1805 | btrfs_scratch_superblock(srcdev); | 1827 | btrfs_scratch_superblock(srcdev); |
1806 | } | 1828 | } |
1807 | 1829 | ||
1830 | if (srcdev->bdev) | ||
1831 | fs_devices->open_devices--; | ||
1832 | |||
1808 | call_rcu(&srcdev->rcu, free_device); | 1833 | call_rcu(&srcdev->rcu, free_device); |
1834 | |||
1835 | /* | ||
1836 | * unless fs_devices is seed fs, num_devices shouldn't go | ||
1837 | * zero | ||
1838 | */ | ||
1839 | BUG_ON(!fs_devices->num_devices && !fs_devices->seeding); | ||
1840 | |||
1841 | /* if this is no devs we rather delete the fs_devices */ | ||
1842 | if (!fs_devices->num_devices) { | ||
1843 | struct btrfs_fs_devices *tmp_fs_devices; | ||
1844 | |||
1845 | tmp_fs_devices = fs_info->fs_devices; | ||
1846 | while (tmp_fs_devices) { | ||
1847 | if (tmp_fs_devices->seed == fs_devices) { | ||
1848 | tmp_fs_devices->seed = fs_devices->seed; | ||
1849 | break; | ||
1850 | } | ||
1851 | tmp_fs_devices = tmp_fs_devices->seed; | ||
1852 | } | ||
1853 | fs_devices->seed = NULL; | ||
1854 | __btrfs_close_devices(fs_devices); | ||
1855 | free_fs_devices(fs_devices); | ||
1856 | } | ||
1809 | } | 1857 | } |
1810 | 1858 | ||
1811 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | 1859 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, |
@@ -1813,6 +1861,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | |||
1813 | { | 1861 | { |
1814 | struct btrfs_device *next_device; | 1862 | struct btrfs_device *next_device; |
1815 | 1863 | ||
1864 | mutex_lock(&uuid_mutex); | ||
1816 | WARN_ON(!tgtdev); | 1865 | WARN_ON(!tgtdev); |
1817 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | 1866 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
1818 | if (tgtdev->bdev) { | 1867 | if (tgtdev->bdev) { |
@@ -1820,8 +1869,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | |||
1820 | fs_info->fs_devices->open_devices--; | 1869 | fs_info->fs_devices->open_devices--; |
1821 | } | 1870 | } |
1822 | fs_info->fs_devices->num_devices--; | 1871 | fs_info->fs_devices->num_devices--; |
1823 | if (tgtdev->can_discard) | ||
1824 | fs_info->fs_devices->num_can_discard++; | ||
1825 | 1872 | ||
1826 | next_device = list_entry(fs_info->fs_devices->devices.next, | 1873 | next_device = list_entry(fs_info->fs_devices->devices.next, |
1827 | struct btrfs_device, dev_list); | 1874 | struct btrfs_device, dev_list); |
@@ -1834,6 +1881,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | |||
1834 | call_rcu(&tgtdev->rcu, free_device); | 1881 | call_rcu(&tgtdev->rcu, free_device); |
1835 | 1882 | ||
1836 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | 1883 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
1884 | mutex_unlock(&uuid_mutex); | ||
1837 | } | 1885 | } |
1838 | 1886 | ||
1839 | static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, | 1887 | static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, |
@@ -1932,15 +1980,18 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) | |||
1932 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 1980 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
1933 | list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, | 1981 | list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, |
1934 | synchronize_rcu); | 1982 | synchronize_rcu); |
1983 | list_for_each_entry(device, &seed_devices->devices, dev_list) | ||
1984 | device->fs_devices = seed_devices; | ||
1935 | 1985 | ||
1986 | lock_chunks(root); | ||
1936 | list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); | 1987 | list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); |
1937 | list_for_each_entry(device, &seed_devices->devices, dev_list) { | 1988 | unlock_chunks(root); |
1938 | device->fs_devices = seed_devices; | ||
1939 | } | ||
1940 | 1989 | ||
1941 | fs_devices->seeding = 0; | 1990 | fs_devices->seeding = 0; |
1942 | fs_devices->num_devices = 0; | 1991 | fs_devices->num_devices = 0; |
1943 | fs_devices->open_devices = 0; | 1992 | fs_devices->open_devices = 0; |
1993 | fs_devices->missing_devices = 0; | ||
1994 | fs_devices->rotating = 0; | ||
1944 | fs_devices->seed = seed_devices; | 1995 | fs_devices->seed = seed_devices; |
1945 | 1996 | ||
1946 | generate_random_uuid(fs_devices->fsid); | 1997 | generate_random_uuid(fs_devices->fsid); |
@@ -2039,7 +2090,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2039 | struct list_head *devices; | 2090 | struct list_head *devices; |
2040 | struct super_block *sb = root->fs_info->sb; | 2091 | struct super_block *sb = root->fs_info->sb; |
2041 | struct rcu_string *name; | 2092 | struct rcu_string *name; |
2042 | u64 total_bytes; | 2093 | u64 tmp; |
2043 | int seeding_dev = 0; | 2094 | int seeding_dev = 0; |
2044 | int ret = 0; | 2095 | int ret = 0; |
2045 | 2096 | ||
@@ -2095,8 +2146,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2095 | goto error; | 2146 | goto error; |
2096 | } | 2147 | } |
2097 | 2148 | ||
2098 | lock_chunks(root); | ||
2099 | |||
2100 | q = bdev_get_queue(bdev); | 2149 | q = bdev_get_queue(bdev); |
2101 | if (blk_queue_discard(q)) | 2150 | if (blk_queue_discard(q)) |
2102 | device->can_discard = 1; | 2151 | device->can_discard = 1; |
@@ -2107,6 +2156,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2107 | device->sector_size = root->sectorsize; | 2156 | device->sector_size = root->sectorsize; |
2108 | device->total_bytes = i_size_read(bdev->bd_inode); | 2157 | device->total_bytes = i_size_read(bdev->bd_inode); |
2109 | device->disk_total_bytes = device->total_bytes; | 2158 | device->disk_total_bytes = device->total_bytes; |
2159 | device->commit_total_bytes = device->total_bytes; | ||
2110 | device->dev_root = root->fs_info->dev_root; | 2160 | device->dev_root = root->fs_info->dev_root; |
2111 | device->bdev = bdev; | 2161 | device->bdev = bdev; |
2112 | device->in_fs_metadata = 1; | 2162 | device->in_fs_metadata = 1; |
@@ -2124,6 +2174,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2124 | device->fs_devices = root->fs_info->fs_devices; | 2174 | device->fs_devices = root->fs_info->fs_devices; |
2125 | 2175 | ||
2126 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 2176 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
2177 | lock_chunks(root); | ||
2127 | list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices); | 2178 | list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices); |
2128 | list_add(&device->dev_alloc_list, | 2179 | list_add(&device->dev_alloc_list, |
2129 | &root->fs_info->fs_devices->alloc_list); | 2180 | &root->fs_info->fs_devices->alloc_list); |
@@ -2131,8 +2182,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2131 | root->fs_info->fs_devices->open_devices++; | 2182 | root->fs_info->fs_devices->open_devices++; |
2132 | root->fs_info->fs_devices->rw_devices++; | 2183 | root->fs_info->fs_devices->rw_devices++; |
2133 | root->fs_info->fs_devices->total_devices++; | 2184 | root->fs_info->fs_devices->total_devices++; |
2134 | if (device->can_discard) | ||
2135 | root->fs_info->fs_devices->num_can_discard++; | ||
2136 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; | 2185 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; |
2137 | 2186 | ||
2138 | spin_lock(&root->fs_info->free_chunk_lock); | 2187 | spin_lock(&root->fs_info->free_chunk_lock); |
@@ -2142,26 +2191,45 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2142 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) | 2191 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) |
2143 | root->fs_info->fs_devices->rotating = 1; | 2192 | root->fs_info->fs_devices->rotating = 1; |
2144 | 2193 | ||
2145 | total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); | 2194 | tmp = btrfs_super_total_bytes(root->fs_info->super_copy); |
2146 | btrfs_set_super_total_bytes(root->fs_info->super_copy, | 2195 | btrfs_set_super_total_bytes(root->fs_info->super_copy, |
2147 | total_bytes + device->total_bytes); | 2196 | tmp + device->total_bytes); |
2148 | 2197 | ||
2149 | total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); | 2198 | tmp = btrfs_super_num_devices(root->fs_info->super_copy); |
2150 | btrfs_set_super_num_devices(root->fs_info->super_copy, | 2199 | btrfs_set_super_num_devices(root->fs_info->super_copy, |
2151 | total_bytes + 1); | 2200 | tmp + 1); |
2152 | 2201 | ||
2153 | /* add sysfs device entry */ | 2202 | /* add sysfs device entry */ |
2154 | btrfs_kobj_add_device(root->fs_info, device); | 2203 | btrfs_kobj_add_device(root->fs_info, device); |
2155 | 2204 | ||
2205 | /* | ||
2206 | * we've got more storage, clear any full flags on the space | ||
2207 | * infos | ||
2208 | */ | ||
2209 | btrfs_clear_space_info_full(root->fs_info); | ||
2210 | |||
2211 | unlock_chunks(root); | ||
2156 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2212 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
2157 | 2213 | ||
2158 | if (seeding_dev) { | 2214 | if (seeding_dev) { |
2159 | char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; | 2215 | lock_chunks(root); |
2160 | ret = init_first_rw_device(trans, root, device); | 2216 | ret = init_first_rw_device(trans, root, device); |
2217 | unlock_chunks(root); | ||
2161 | if (ret) { | 2218 | if (ret) { |
2162 | btrfs_abort_transaction(trans, root, ret); | 2219 | btrfs_abort_transaction(trans, root, ret); |
2163 | goto error_trans; | 2220 | goto error_trans; |
2164 | } | 2221 | } |
2222 | } | ||
2223 | |||
2224 | ret = btrfs_add_device(trans, root, device); | ||
2225 | if (ret) { | ||
2226 | btrfs_abort_transaction(trans, root, ret); | ||
2227 | goto error_trans; | ||
2228 | } | ||
2229 | |||
2230 | if (seeding_dev) { | ||
2231 | char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; | ||
2232 | |||
2165 | ret = btrfs_finish_sprout(trans, root); | 2233 | ret = btrfs_finish_sprout(trans, root); |
2166 | if (ret) { | 2234 | if (ret) { |
2167 | btrfs_abort_transaction(trans, root, ret); | 2235 | btrfs_abort_transaction(trans, root, ret); |
@@ -2175,21 +2243,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2175 | root->fs_info->fsid); | 2243 | root->fs_info->fsid); |
2176 | if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) | 2244 | if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) |
2177 | goto error_trans; | 2245 | goto error_trans; |
2178 | } else { | ||
2179 | ret = btrfs_add_device(trans, root, device); | ||
2180 | if (ret) { | ||
2181 | btrfs_abort_transaction(trans, root, ret); | ||
2182 | goto error_trans; | ||
2183 | } | ||
2184 | } | 2246 | } |
2185 | 2247 | ||
2186 | /* | ||
2187 | * we've got more storage, clear any full flags on the space | ||
2188 | * infos | ||
2189 | */ | ||
2190 | btrfs_clear_space_info_full(root->fs_info); | ||
2191 | |||
2192 | unlock_chunks(root); | ||
2193 | root->fs_info->num_tolerated_disk_barrier_failures = | 2248 | root->fs_info->num_tolerated_disk_barrier_failures = |
2194 | btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); | 2249 | btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); |
2195 | ret = btrfs_commit_transaction(trans, root); | 2250 | ret = btrfs_commit_transaction(trans, root); |
@@ -2221,7 +2276,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2221 | return ret; | 2276 | return ret; |
2222 | 2277 | ||
2223 | error_trans: | 2278 | error_trans: |
2224 | unlock_chunks(root); | ||
2225 | btrfs_end_transaction(trans, root); | 2279 | btrfs_end_transaction(trans, root); |
2226 | rcu_string_free(device->name); | 2280 | rcu_string_free(device->name); |
2227 | btrfs_kobj_rm_device(root->fs_info, device); | 2281 | btrfs_kobj_rm_device(root->fs_info, device); |
@@ -2236,6 +2290,7 @@ error: | |||
2236 | } | 2290 | } |
2237 | 2291 | ||
2238 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | 2292 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, |
2293 | struct btrfs_device *srcdev, | ||
2239 | struct btrfs_device **device_out) | 2294 | struct btrfs_device **device_out) |
2240 | { | 2295 | { |
2241 | struct request_queue *q; | 2296 | struct request_queue *q; |
@@ -2248,24 +2303,38 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | |||
2248 | int ret = 0; | 2303 | int ret = 0; |
2249 | 2304 | ||
2250 | *device_out = NULL; | 2305 | *device_out = NULL; |
2251 | if (fs_info->fs_devices->seeding) | 2306 | if (fs_info->fs_devices->seeding) { |
2307 | btrfs_err(fs_info, "the filesystem is a seed filesystem!"); | ||
2252 | return -EINVAL; | 2308 | return -EINVAL; |
2309 | } | ||
2253 | 2310 | ||
2254 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, | 2311 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, |
2255 | fs_info->bdev_holder); | 2312 | fs_info->bdev_holder); |
2256 | if (IS_ERR(bdev)) | 2313 | if (IS_ERR(bdev)) { |
2314 | btrfs_err(fs_info, "target device %s is invalid!", device_path); | ||
2257 | return PTR_ERR(bdev); | 2315 | return PTR_ERR(bdev); |
2316 | } | ||
2258 | 2317 | ||
2259 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | 2318 | filemap_write_and_wait(bdev->bd_inode->i_mapping); |
2260 | 2319 | ||
2261 | devices = &fs_info->fs_devices->devices; | 2320 | devices = &fs_info->fs_devices->devices; |
2262 | list_for_each_entry(device, devices, dev_list) { | 2321 | list_for_each_entry(device, devices, dev_list) { |
2263 | if (device->bdev == bdev) { | 2322 | if (device->bdev == bdev) { |
2323 | btrfs_err(fs_info, "target device is in the filesystem!"); | ||
2264 | ret = -EEXIST; | 2324 | ret = -EEXIST; |
2265 | goto error; | 2325 | goto error; |
2266 | } | 2326 | } |
2267 | } | 2327 | } |
2268 | 2328 | ||
2329 | |||
2330 | if (i_size_read(bdev->bd_inode) < | ||
2331 | btrfs_device_get_total_bytes(srcdev)) { | ||
2332 | btrfs_err(fs_info, "target device is smaller than source device!"); | ||
2333 | ret = -EINVAL; | ||
2334 | goto error; | ||
2335 | } | ||
2336 | |||
2337 | |||
2269 | device = btrfs_alloc_device(NULL, &devid, NULL); | 2338 | device = btrfs_alloc_device(NULL, &devid, NULL); |
2270 | if (IS_ERR(device)) { | 2339 | if (IS_ERR(device)) { |
2271 | ret = PTR_ERR(device); | 2340 | ret = PTR_ERR(device); |
@@ -2289,8 +2358,12 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | |||
2289 | device->io_width = root->sectorsize; | 2358 | device->io_width = root->sectorsize; |
2290 | device->io_align = root->sectorsize; | 2359 | device->io_align = root->sectorsize; |
2291 | device->sector_size = root->sectorsize; | 2360 | device->sector_size = root->sectorsize; |
2292 | device->total_bytes = i_size_read(bdev->bd_inode); | 2361 | device->total_bytes = btrfs_device_get_total_bytes(srcdev); |
2293 | device->disk_total_bytes = device->total_bytes; | 2362 | device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev); |
2363 | device->bytes_used = btrfs_device_get_bytes_used(srcdev); | ||
2364 | ASSERT(list_empty(&srcdev->resized_list)); | ||
2365 | device->commit_total_bytes = srcdev->commit_total_bytes; | ||
2366 | device->commit_bytes_used = device->bytes_used; | ||
2294 | device->dev_root = fs_info->dev_root; | 2367 | device->dev_root = fs_info->dev_root; |
2295 | device->bdev = bdev; | 2368 | device->bdev = bdev; |
2296 | device->in_fs_metadata = 1; | 2369 | device->in_fs_metadata = 1; |
@@ -2302,8 +2375,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | |||
2302 | list_add(&device->dev_list, &fs_info->fs_devices->devices); | 2375 | list_add(&device->dev_list, &fs_info->fs_devices->devices); |
2303 | fs_info->fs_devices->num_devices++; | 2376 | fs_info->fs_devices->num_devices++; |
2304 | fs_info->fs_devices->open_devices++; | 2377 | fs_info->fs_devices->open_devices++; |
2305 | if (device->can_discard) | ||
2306 | fs_info->fs_devices->num_can_discard++; | ||
2307 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2378 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
2308 | 2379 | ||
2309 | *device_out = device; | 2380 | *device_out = device; |
@@ -2362,8 +2433,10 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, | |||
2362 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); | 2433 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); |
2363 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); | 2434 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); |
2364 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); | 2435 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); |
2365 | btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); | 2436 | btrfs_set_device_total_bytes(leaf, dev_item, |
2366 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); | 2437 | btrfs_device_get_disk_total_bytes(device)); |
2438 | btrfs_set_device_bytes_used(leaf, dev_item, | ||
2439 | btrfs_device_get_bytes_used(device)); | ||
2367 | btrfs_mark_buffer_dirty(leaf); | 2440 | btrfs_mark_buffer_dirty(leaf); |
2368 | 2441 | ||
2369 | out: | 2442 | out: |
@@ -2371,40 +2444,44 @@ out: | |||
2371 | return ret; | 2444 | return ret; |
2372 | } | 2445 | } |
2373 | 2446 | ||
2374 | static int __btrfs_grow_device(struct btrfs_trans_handle *trans, | 2447 | int btrfs_grow_device(struct btrfs_trans_handle *trans, |
2375 | struct btrfs_device *device, u64 new_size) | 2448 | struct btrfs_device *device, u64 new_size) |
2376 | { | 2449 | { |
2377 | struct btrfs_super_block *super_copy = | 2450 | struct btrfs_super_block *super_copy = |
2378 | device->dev_root->fs_info->super_copy; | 2451 | device->dev_root->fs_info->super_copy; |
2379 | u64 old_total = btrfs_super_total_bytes(super_copy); | 2452 | struct btrfs_fs_devices *fs_devices; |
2380 | u64 diff = new_size - device->total_bytes; | 2453 | u64 old_total; |
2454 | u64 diff; | ||
2381 | 2455 | ||
2382 | if (!device->writeable) | 2456 | if (!device->writeable) |
2383 | return -EACCES; | 2457 | return -EACCES; |
2458 | |||
2459 | lock_chunks(device->dev_root); | ||
2460 | old_total = btrfs_super_total_bytes(super_copy); | ||
2461 | diff = new_size - device->total_bytes; | ||
2462 | |||
2384 | if (new_size <= device->total_bytes || | 2463 | if (new_size <= device->total_bytes || |
2385 | device->is_tgtdev_for_dev_replace) | 2464 | device->is_tgtdev_for_dev_replace) { |
2465 | unlock_chunks(device->dev_root); | ||
2386 | return -EINVAL; | 2466 | return -EINVAL; |
2467 | } | ||
2468 | |||
2469 | fs_devices = device->dev_root->fs_info->fs_devices; | ||
2387 | 2470 | ||
2388 | btrfs_set_super_total_bytes(super_copy, old_total + diff); | 2471 | btrfs_set_super_total_bytes(super_copy, old_total + diff); |
2389 | device->fs_devices->total_rw_bytes += diff; | 2472 | device->fs_devices->total_rw_bytes += diff; |
2390 | 2473 | ||
2391 | device->total_bytes = new_size; | 2474 | btrfs_device_set_total_bytes(device, new_size); |
2392 | device->disk_total_bytes = new_size; | 2475 | btrfs_device_set_disk_total_bytes(device, new_size); |
2393 | btrfs_clear_space_info_full(device->dev_root->fs_info); | 2476 | btrfs_clear_space_info_full(device->dev_root->fs_info); |
2477 | if (list_empty(&device->resized_list)) | ||
2478 | list_add_tail(&device->resized_list, | ||
2479 | &fs_devices->resized_devices); | ||
2480 | unlock_chunks(device->dev_root); | ||
2394 | 2481 | ||
2395 | return btrfs_update_device(trans, device); | 2482 | return btrfs_update_device(trans, device); |
2396 | } | 2483 | } |
2397 | 2484 | ||
2398 | int btrfs_grow_device(struct btrfs_trans_handle *trans, | ||
2399 | struct btrfs_device *device, u64 new_size) | ||
2400 | { | ||
2401 | int ret; | ||
2402 | lock_chunks(device->dev_root); | ||
2403 | ret = __btrfs_grow_device(trans, device, new_size); | ||
2404 | unlock_chunks(device->dev_root); | ||
2405 | return ret; | ||
2406 | } | ||
2407 | |||
2408 | static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | 2485 | static int btrfs_free_chunk(struct btrfs_trans_handle *trans, |
2409 | struct btrfs_root *root, | 2486 | struct btrfs_root *root, |
2410 | u64 chunk_tree, u64 chunk_objectid, | 2487 | u64 chunk_tree, u64 chunk_objectid, |
@@ -2456,6 +2533,7 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 | |||
2456 | u32 cur; | 2533 | u32 cur; |
2457 | struct btrfs_key key; | 2534 | struct btrfs_key key; |
2458 | 2535 | ||
2536 | lock_chunks(root); | ||
2459 | array_size = btrfs_super_sys_array_size(super_copy); | 2537 | array_size = btrfs_super_sys_array_size(super_copy); |
2460 | 2538 | ||
2461 | ptr = super_copy->sys_chunk_array; | 2539 | ptr = super_copy->sys_chunk_array; |
@@ -2485,79 +2563,95 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 | |||
2485 | cur += len; | 2563 | cur += len; |
2486 | } | 2564 | } |
2487 | } | 2565 | } |
2566 | unlock_chunks(root); | ||
2488 | return ret; | 2567 | return ret; |
2489 | } | 2568 | } |
2490 | 2569 | ||
2491 | static int btrfs_relocate_chunk(struct btrfs_root *root, | 2570 | int btrfs_remove_chunk(struct btrfs_trans_handle *trans, |
2492 | u64 chunk_tree, u64 chunk_objectid, | 2571 | struct btrfs_root *root, u64 chunk_offset) |
2493 | u64 chunk_offset) | ||
2494 | { | 2572 | { |
2495 | struct extent_map_tree *em_tree; | 2573 | struct extent_map_tree *em_tree; |
2496 | struct btrfs_root *extent_root; | ||
2497 | struct btrfs_trans_handle *trans; | ||
2498 | struct extent_map *em; | 2574 | struct extent_map *em; |
2575 | struct btrfs_root *extent_root = root->fs_info->extent_root; | ||
2499 | struct map_lookup *map; | 2576 | struct map_lookup *map; |
2500 | int ret; | 2577 | u64 dev_extent_len = 0; |
2501 | int i; | 2578 | u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
2579 | u64 chunk_tree = root->fs_info->chunk_root->objectid; | ||
2580 | int i, ret = 0; | ||
2502 | 2581 | ||
2582 | /* Just in case */ | ||
2503 | root = root->fs_info->chunk_root; | 2583 | root = root->fs_info->chunk_root; |
2504 | extent_root = root->fs_info->extent_root; | ||
2505 | em_tree = &root->fs_info->mapping_tree.map_tree; | 2584 | em_tree = &root->fs_info->mapping_tree.map_tree; |
2506 | 2585 | ||
2507 | ret = btrfs_can_relocate(extent_root, chunk_offset); | ||
2508 | if (ret) | ||
2509 | return -ENOSPC; | ||
2510 | |||
2511 | /* step one, relocate all the extents inside this chunk */ | ||
2512 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | ||
2513 | if (ret) | ||
2514 | return ret; | ||
2515 | |||
2516 | trans = btrfs_start_transaction(root, 0); | ||
2517 | if (IS_ERR(trans)) { | ||
2518 | ret = PTR_ERR(trans); | ||
2519 | btrfs_std_error(root->fs_info, ret); | ||
2520 | return ret; | ||
2521 | } | ||
2522 | |||
2523 | lock_chunks(root); | ||
2524 | |||
2525 | /* | ||
2526 | * step two, delete the device extents and the | ||
2527 | * chunk tree entries | ||
2528 | */ | ||
2529 | read_lock(&em_tree->lock); | 2586 | read_lock(&em_tree->lock); |
2530 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); | 2587 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); |
2531 | read_unlock(&em_tree->lock); | 2588 | read_unlock(&em_tree->lock); |
2532 | 2589 | ||
2533 | BUG_ON(!em || em->start > chunk_offset || | 2590 | if (!em || em->start > chunk_offset || |
2534 | em->start + em->len < chunk_offset); | 2591 | em->start + em->len < chunk_offset) { |
2592 | /* | ||
2593 | * This is a logic error, but we don't want to just rely on the | ||
2594 | * user having built with ASSERT enabled, so if ASSERT doens't | ||
2595 | * do anything we still error out. | ||
2596 | */ | ||
2597 | ASSERT(0); | ||
2598 | if (em) | ||
2599 | free_extent_map(em); | ||
2600 | return -EINVAL; | ||
2601 | } | ||
2535 | map = (struct map_lookup *)em->bdev; | 2602 | map = (struct map_lookup *)em->bdev; |
2536 | 2603 | ||
2537 | for (i = 0; i < map->num_stripes; i++) { | 2604 | for (i = 0; i < map->num_stripes; i++) { |
2538 | ret = btrfs_free_dev_extent(trans, map->stripes[i].dev, | 2605 | struct btrfs_device *device = map->stripes[i].dev; |
2539 | map->stripes[i].physical); | 2606 | ret = btrfs_free_dev_extent(trans, device, |
2540 | BUG_ON(ret); | 2607 | map->stripes[i].physical, |
2608 | &dev_extent_len); | ||
2609 | if (ret) { | ||
2610 | btrfs_abort_transaction(trans, root, ret); | ||
2611 | goto out; | ||
2612 | } | ||
2613 | |||
2614 | if (device->bytes_used > 0) { | ||
2615 | lock_chunks(root); | ||
2616 | btrfs_device_set_bytes_used(device, | ||
2617 | device->bytes_used - dev_extent_len); | ||
2618 | spin_lock(&root->fs_info->free_chunk_lock); | ||
2619 | root->fs_info->free_chunk_space += dev_extent_len; | ||
2620 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
2621 | btrfs_clear_space_info_full(root->fs_info); | ||
2622 | unlock_chunks(root); | ||
2623 | } | ||
2541 | 2624 | ||
2542 | if (map->stripes[i].dev) { | 2625 | if (map->stripes[i].dev) { |
2543 | ret = btrfs_update_device(trans, map->stripes[i].dev); | 2626 | ret = btrfs_update_device(trans, map->stripes[i].dev); |
2544 | BUG_ON(ret); | 2627 | if (ret) { |
2628 | btrfs_abort_transaction(trans, root, ret); | ||
2629 | goto out; | ||
2630 | } | ||
2545 | } | 2631 | } |
2546 | } | 2632 | } |
2547 | ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, | 2633 | ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, |
2548 | chunk_offset); | 2634 | chunk_offset); |
2549 | 2635 | if (ret) { | |
2550 | BUG_ON(ret); | 2636 | btrfs_abort_transaction(trans, root, ret); |
2637 | goto out; | ||
2638 | } | ||
2551 | 2639 | ||
2552 | trace_btrfs_chunk_free(root, map, chunk_offset, em->len); | 2640 | trace_btrfs_chunk_free(root, map, chunk_offset, em->len); |
2553 | 2641 | ||
2554 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | 2642 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { |
2555 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); | 2643 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); |
2556 | BUG_ON(ret); | 2644 | if (ret) { |
2645 | btrfs_abort_transaction(trans, root, ret); | ||
2646 | goto out; | ||
2647 | } | ||
2557 | } | 2648 | } |
2558 | 2649 | ||
2559 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); | 2650 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); |
2560 | BUG_ON(ret); | 2651 | if (ret) { |
2652 | btrfs_abort_transaction(trans, extent_root, ret); | ||
2653 | goto out; | ||
2654 | } | ||
2561 | 2655 | ||
2562 | write_lock(&em_tree->lock); | 2656 | write_lock(&em_tree->lock); |
2563 | remove_extent_mapping(em_tree, em); | 2657 | remove_extent_mapping(em_tree, em); |
@@ -2565,12 +2659,46 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
2565 | 2659 | ||
2566 | /* once for the tree */ | 2660 | /* once for the tree */ |
2567 | free_extent_map(em); | 2661 | free_extent_map(em); |
2662 | out: | ||
2568 | /* once for us */ | 2663 | /* once for us */ |
2569 | free_extent_map(em); | 2664 | free_extent_map(em); |
2665 | return ret; | ||
2666 | } | ||
2570 | 2667 | ||
2571 | unlock_chunks(root); | 2668 | static int btrfs_relocate_chunk(struct btrfs_root *root, |
2669 | u64 chunk_tree, u64 chunk_objectid, | ||
2670 | u64 chunk_offset) | ||
2671 | { | ||
2672 | struct btrfs_root *extent_root; | ||
2673 | struct btrfs_trans_handle *trans; | ||
2674 | int ret; | ||
2675 | |||
2676 | root = root->fs_info->chunk_root; | ||
2677 | extent_root = root->fs_info->extent_root; | ||
2678 | |||
2679 | ret = btrfs_can_relocate(extent_root, chunk_offset); | ||
2680 | if (ret) | ||
2681 | return -ENOSPC; | ||
2682 | |||
2683 | /* step one, relocate all the extents inside this chunk */ | ||
2684 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | ||
2685 | if (ret) | ||
2686 | return ret; | ||
2687 | |||
2688 | trans = btrfs_start_transaction(root, 0); | ||
2689 | if (IS_ERR(trans)) { | ||
2690 | ret = PTR_ERR(trans); | ||
2691 | btrfs_std_error(root->fs_info, ret); | ||
2692 | return ret; | ||
2693 | } | ||
2694 | |||
2695 | /* | ||
2696 | * step two, delete the device extents and the | ||
2697 | * chunk tree entries | ||
2698 | */ | ||
2699 | ret = btrfs_remove_chunk(trans, root, chunk_offset); | ||
2572 | btrfs_end_transaction(trans, root); | 2700 | btrfs_end_transaction(trans, root); |
2573 | return 0; | 2701 | return ret; |
2574 | } | 2702 | } |
2575 | 2703 | ||
2576 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | 2704 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root) |
@@ -2623,8 +2751,8 @@ again: | |||
2623 | found_key.offset); | 2751 | found_key.offset); |
2624 | if (ret == -ENOSPC) | 2752 | if (ret == -ENOSPC) |
2625 | failed++; | 2753 | failed++; |
2626 | else if (ret) | 2754 | else |
2627 | BUG(); | 2755 | BUG_ON(ret); |
2628 | } | 2756 | } |
2629 | 2757 | ||
2630 | if (found_key.offset == 0) | 2758 | if (found_key.offset == 0) |
@@ -3031,11 +3159,12 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) | |||
3031 | /* step one make some room on all the devices */ | 3159 | /* step one make some room on all the devices */ |
3032 | devices = &fs_info->fs_devices->devices; | 3160 | devices = &fs_info->fs_devices->devices; |
3033 | list_for_each_entry(device, devices, dev_list) { | 3161 | list_for_each_entry(device, devices, dev_list) { |
3034 | old_size = device->total_bytes; | 3162 | old_size = btrfs_device_get_total_bytes(device); |
3035 | size_to_free = div_factor(old_size, 1); | 3163 | size_to_free = div_factor(old_size, 1); |
3036 | size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); | 3164 | size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); |
3037 | if (!device->writeable || | 3165 | if (!device->writeable || |
3038 | device->total_bytes - device->bytes_used > size_to_free || | 3166 | btrfs_device_get_total_bytes(device) - |
3167 | btrfs_device_get_bytes_used(device) > size_to_free || | ||
3039 | device->is_tgtdev_for_dev_replace) | 3168 | device->is_tgtdev_for_dev_replace) |
3040 | continue; | 3169 | continue; |
3041 | 3170 | ||
@@ -3590,8 +3719,6 @@ static int btrfs_uuid_scan_kthread(void *data) | |||
3590 | max_key.type = BTRFS_ROOT_ITEM_KEY; | 3719 | max_key.type = BTRFS_ROOT_ITEM_KEY; |
3591 | max_key.offset = (u64)-1; | 3720 | max_key.offset = (u64)-1; |
3592 | 3721 | ||
3593 | path->keep_locks = 1; | ||
3594 | |||
3595 | while (1) { | 3722 | while (1) { |
3596 | ret = btrfs_search_forward(root, &key, path, 0); | 3723 | ret = btrfs_search_forward(root, &key, path, 0); |
3597 | if (ret) { | 3724 | if (ret) { |
@@ -3843,8 +3970,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
3843 | struct btrfs_key key; | 3970 | struct btrfs_key key; |
3844 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; | 3971 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; |
3845 | u64 old_total = btrfs_super_total_bytes(super_copy); | 3972 | u64 old_total = btrfs_super_total_bytes(super_copy); |
3846 | u64 old_size = device->total_bytes; | 3973 | u64 old_size = btrfs_device_get_total_bytes(device); |
3847 | u64 diff = device->total_bytes - new_size; | 3974 | u64 diff = old_size - new_size; |
3848 | 3975 | ||
3849 | if (device->is_tgtdev_for_dev_replace) | 3976 | if (device->is_tgtdev_for_dev_replace) |
3850 | return -EINVAL; | 3977 | return -EINVAL; |
@@ -3857,7 +3984,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
3857 | 3984 | ||
3858 | lock_chunks(root); | 3985 | lock_chunks(root); |
3859 | 3986 | ||
3860 | device->total_bytes = new_size; | 3987 | btrfs_device_set_total_bytes(device, new_size); |
3861 | if (device->writeable) { | 3988 | if (device->writeable) { |
3862 | device->fs_devices->total_rw_bytes -= diff; | 3989 | device->fs_devices->total_rw_bytes -= diff; |
3863 | spin_lock(&root->fs_info->free_chunk_lock); | 3990 | spin_lock(&root->fs_info->free_chunk_lock); |
@@ -3923,7 +4050,7 @@ again: | |||
3923 | ret = -ENOSPC; | 4050 | ret = -ENOSPC; |
3924 | lock_chunks(root); | 4051 | lock_chunks(root); |
3925 | 4052 | ||
3926 | device->total_bytes = old_size; | 4053 | btrfs_device_set_total_bytes(device, old_size); |
3927 | if (device->writeable) | 4054 | if (device->writeable) |
3928 | device->fs_devices->total_rw_bytes += diff; | 4055 | device->fs_devices->total_rw_bytes += diff; |
3929 | spin_lock(&root->fs_info->free_chunk_lock); | 4056 | spin_lock(&root->fs_info->free_chunk_lock); |
@@ -3941,18 +4068,17 @@ again: | |||
3941 | } | 4068 | } |
3942 | 4069 | ||
3943 | lock_chunks(root); | 4070 | lock_chunks(root); |
4071 | btrfs_device_set_disk_total_bytes(device, new_size); | ||
4072 | if (list_empty(&device->resized_list)) | ||
4073 | list_add_tail(&device->resized_list, | ||
4074 | &root->fs_info->fs_devices->resized_devices); | ||
3944 | 4075 | ||
3945 | device->disk_total_bytes = new_size; | ||
3946 | /* Now btrfs_update_device() will change the on-disk size. */ | ||
3947 | ret = btrfs_update_device(trans, device); | ||
3948 | if (ret) { | ||
3949 | unlock_chunks(root); | ||
3950 | btrfs_end_transaction(trans, root); | ||
3951 | goto done; | ||
3952 | } | ||
3953 | WARN_ON(diff > old_total); | 4076 | WARN_ON(diff > old_total); |
3954 | btrfs_set_super_total_bytes(super_copy, old_total - diff); | 4077 | btrfs_set_super_total_bytes(super_copy, old_total - diff); |
3955 | unlock_chunks(root); | 4078 | unlock_chunks(root); |
4079 | |||
4080 | /* Now btrfs_update_device() will change the on-disk size. */ | ||
4081 | ret = btrfs_update_device(trans, device); | ||
3956 | btrfs_end_transaction(trans, root); | 4082 | btrfs_end_transaction(trans, root); |
3957 | done: | 4083 | done: |
3958 | btrfs_free_path(path); | 4084 | btrfs_free_path(path); |
@@ -3968,10 +4094,13 @@ static int btrfs_add_system_chunk(struct btrfs_root *root, | |||
3968 | u32 array_size; | 4094 | u32 array_size; |
3969 | u8 *ptr; | 4095 | u8 *ptr; |
3970 | 4096 | ||
4097 | lock_chunks(root); | ||
3971 | array_size = btrfs_super_sys_array_size(super_copy); | 4098 | array_size = btrfs_super_sys_array_size(super_copy); |
3972 | if (array_size + item_size + sizeof(disk_key) | 4099 | if (array_size + item_size + sizeof(disk_key) |
3973 | > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) | 4100 | > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { |
4101 | unlock_chunks(root); | ||
3974 | return -EFBIG; | 4102 | return -EFBIG; |
4103 | } | ||
3975 | 4104 | ||
3976 | ptr = super_copy->sys_chunk_array + array_size; | 4105 | ptr = super_copy->sys_chunk_array + array_size; |
3977 | btrfs_cpu_key_to_disk(&disk_key, key); | 4106 | btrfs_cpu_key_to_disk(&disk_key, key); |
@@ -3980,6 +4109,8 @@ static int btrfs_add_system_chunk(struct btrfs_root *root, | |||
3980 | memcpy(ptr, chunk, item_size); | 4109 | memcpy(ptr, chunk, item_size); |
3981 | item_size += sizeof(disk_key); | 4110 | item_size += sizeof(disk_key); |
3982 | btrfs_set_super_sys_array_size(super_copy, array_size + item_size); | 4111 | btrfs_set_super_sys_array_size(super_copy, array_size + item_size); |
4112 | unlock_chunks(root); | ||
4113 | |||
3983 | return 0; | 4114 | return 0; |
3984 | } | 4115 | } |
3985 | 4116 | ||
@@ -4349,6 +4480,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
4349 | if (ret) | 4480 | if (ret) |
4350 | goto error_del_extent; | 4481 | goto error_del_extent; |
4351 | 4482 | ||
4483 | for (i = 0; i < map->num_stripes; i++) { | ||
4484 | num_bytes = map->stripes[i].dev->bytes_used + stripe_size; | ||
4485 | btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes); | ||
4486 | } | ||
4487 | |||
4488 | spin_lock(&extent_root->fs_info->free_chunk_lock); | ||
4489 | extent_root->fs_info->free_chunk_space -= (stripe_size * | ||
4490 | map->num_stripes); | ||
4491 | spin_unlock(&extent_root->fs_info->free_chunk_lock); | ||
4492 | |||
4352 | free_extent_map(em); | 4493 | free_extent_map(em); |
4353 | check_raid56_incompat_flag(extent_root->fs_info, type); | 4494 | check_raid56_incompat_flag(extent_root->fs_info, type); |
4354 | 4495 | ||
@@ -4420,7 +4561,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4420 | device = map->stripes[i].dev; | 4561 | device = map->stripes[i].dev; |
4421 | dev_offset = map->stripes[i].physical; | 4562 | dev_offset = map->stripes[i].physical; |
4422 | 4563 | ||
4423 | device->bytes_used += stripe_size; | ||
4424 | ret = btrfs_update_device(trans, device); | 4564 | ret = btrfs_update_device(trans, device); |
4425 | if (ret) | 4565 | if (ret) |
4426 | goto out; | 4566 | goto out; |
@@ -4433,11 +4573,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4433 | goto out; | 4573 | goto out; |
4434 | } | 4574 | } |
4435 | 4575 | ||
4436 | spin_lock(&extent_root->fs_info->free_chunk_lock); | ||
4437 | extent_root->fs_info->free_chunk_space -= (stripe_size * | ||
4438 | map->num_stripes); | ||
4439 | spin_unlock(&extent_root->fs_info->free_chunk_lock); | ||
4440 | |||
4441 | stripe = &chunk->stripe; | 4576 | stripe = &chunk->stripe; |
4442 | for (i = 0; i < map->num_stripes; i++) { | 4577 | for (i = 0; i < map->num_stripes; i++) { |
4443 | device = map->stripes[i].dev; | 4578 | device = map->stripes[i].dev; |
@@ -4517,16 +4652,25 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
4517 | alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); | 4652 | alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); |
4518 | ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset, | 4653 | ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset, |
4519 | alloc_profile); | 4654 | alloc_profile); |
4520 | if (ret) { | 4655 | return ret; |
4521 | btrfs_abort_transaction(trans, root, ret); | 4656 | } |
4522 | goto out; | 4657 | |
4658 | static inline int btrfs_chunk_max_errors(struct map_lookup *map) | ||
4659 | { | ||
4660 | int max_errors; | ||
4661 | |||
4662 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | ||
4663 | BTRFS_BLOCK_GROUP_RAID10 | | ||
4664 | BTRFS_BLOCK_GROUP_RAID5 | | ||
4665 | BTRFS_BLOCK_GROUP_DUP)) { | ||
4666 | max_errors = 1; | ||
4667 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) { | ||
4668 | max_errors = 2; | ||
4669 | } else { | ||
4670 | max_errors = 0; | ||
4523 | } | 4671 | } |
4524 | 4672 | ||
4525 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); | 4673 | return max_errors; |
4526 | if (ret) | ||
4527 | btrfs_abort_transaction(trans, root, ret); | ||
4528 | out: | ||
4529 | return ret; | ||
4530 | } | 4674 | } |
4531 | 4675 | ||
4532 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | 4676 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) |
@@ -4535,6 +4679,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | |||
4535 | struct map_lookup *map; | 4679 | struct map_lookup *map; |
4536 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | 4680 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; |
4537 | int readonly = 0; | 4681 | int readonly = 0; |
4682 | int miss_ndevs = 0; | ||
4538 | int i; | 4683 | int i; |
4539 | 4684 | ||
4540 | read_lock(&map_tree->map_tree.lock); | 4685 | read_lock(&map_tree->map_tree.lock); |
@@ -4543,18 +4688,27 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | |||
4543 | if (!em) | 4688 | if (!em) |
4544 | return 1; | 4689 | return 1; |
4545 | 4690 | ||
4546 | if (btrfs_test_opt(root, DEGRADED)) { | ||
4547 | free_extent_map(em); | ||
4548 | return 0; | ||
4549 | } | ||
4550 | |||
4551 | map = (struct map_lookup *)em->bdev; | 4691 | map = (struct map_lookup *)em->bdev; |
4552 | for (i = 0; i < map->num_stripes; i++) { | 4692 | for (i = 0; i < map->num_stripes; i++) { |
4693 | if (map->stripes[i].dev->missing) { | ||
4694 | miss_ndevs++; | ||
4695 | continue; | ||
4696 | } | ||
4697 | |||
4553 | if (!map->stripes[i].dev->writeable) { | 4698 | if (!map->stripes[i].dev->writeable) { |
4554 | readonly = 1; | 4699 | readonly = 1; |
4555 | break; | 4700 | goto end; |
4556 | } | 4701 | } |
4557 | } | 4702 | } |
4703 | |||
4704 | /* | ||
4705 | * If the number of missing devices is larger than max errors, | ||
4706 | * we can not write the data into that chunk successfully, so | ||
4707 | * set it readonly. | ||
4708 | */ | ||
4709 | if (miss_ndevs > btrfs_chunk_max_errors(map)) | ||
4710 | readonly = 1; | ||
4711 | end: | ||
4558 | free_extent_map(em); | 4712 | free_extent_map(em); |
4559 | return readonly; | 4713 | return readonly; |
4560 | } | 4714 | } |
@@ -4955,6 +5109,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
4955 | num_stripes = min_t(u64, map->num_stripes, | 5109 | num_stripes = min_t(u64, map->num_stripes, |
4956 | stripe_nr_end - stripe_nr_orig); | 5110 | stripe_nr_end - stripe_nr_orig); |
4957 | stripe_index = do_div(stripe_nr, map->num_stripes); | 5111 | stripe_index = do_div(stripe_nr, map->num_stripes); |
5112 | if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))) | ||
5113 | mirror_num = 1; | ||
4958 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 5114 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
4959 | if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) | 5115 | if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) |
4960 | num_stripes = map->num_stripes; | 5116 | num_stripes = map->num_stripes; |
@@ -5058,6 +5214,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5058 | /* We distribute the parity blocks across stripes */ | 5214 | /* We distribute the parity blocks across stripes */ |
5059 | tmp = stripe_nr + stripe_index; | 5215 | tmp = stripe_nr + stripe_index; |
5060 | stripe_index = do_div(tmp, map->num_stripes); | 5216 | stripe_index = do_div(tmp, map->num_stripes); |
5217 | if (!(rw & (REQ_WRITE | REQ_DISCARD | | ||
5218 | REQ_GET_READ_MIRRORS)) && mirror_num <= 1) | ||
5219 | mirror_num = 1; | ||
5061 | } | 5220 | } |
5062 | } else { | 5221 | } else { |
5063 | /* | 5222 | /* |
@@ -5165,16 +5324,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5165 | } | 5324 | } |
5166 | } | 5325 | } |
5167 | 5326 | ||
5168 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) { | 5327 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) |
5169 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | 5328 | max_errors = btrfs_chunk_max_errors(map); |
5170 | BTRFS_BLOCK_GROUP_RAID10 | | ||
5171 | BTRFS_BLOCK_GROUP_RAID5 | | ||
5172 | BTRFS_BLOCK_GROUP_DUP)) { | ||
5173 | max_errors = 1; | ||
5174 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) { | ||
5175 | max_errors = 2; | ||
5176 | } | ||
5177 | } | ||
5178 | 5329 | ||
5179 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && | 5330 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && |
5180 | dev_replace->tgtdev != NULL) { | 5331 | dev_replace->tgtdev != NULL) { |
@@ -5557,8 +5708,8 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | |||
5557 | name = rcu_dereference(dev->name); | 5708 | name = rcu_dereference(dev->name); |
5558 | pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu " | 5709 | pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu " |
5559 | "(%s id %llu), size=%u\n", rw, | 5710 | "(%s id %llu), size=%u\n", rw, |
5560 | (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, | 5711 | (u64)bio->bi_iter.bi_sector, (u_long)dev->bdev->bd_dev, |
5561 | name->str, dev->devid, bio->bi_size); | 5712 | name->str, dev->devid, bio->bi_iter.bi_size); |
5562 | rcu_read_unlock(); | 5713 | rcu_read_unlock(); |
5563 | } | 5714 | } |
5564 | #endif | 5715 | #endif |
@@ -5736,10 +5887,10 @@ struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, | |||
5736 | } | 5887 | } |
5737 | 5888 | ||
5738 | static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | 5889 | static struct btrfs_device *add_missing_dev(struct btrfs_root *root, |
5890 | struct btrfs_fs_devices *fs_devices, | ||
5739 | u64 devid, u8 *dev_uuid) | 5891 | u64 devid, u8 *dev_uuid) |
5740 | { | 5892 | { |
5741 | struct btrfs_device *device; | 5893 | struct btrfs_device *device; |
5742 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | ||
5743 | 5894 | ||
5744 | device = btrfs_alloc_device(NULL, &devid, dev_uuid); | 5895 | device = btrfs_alloc_device(NULL, &devid, dev_uuid); |
5745 | if (IS_ERR(device)) | 5896 | if (IS_ERR(device)) |
@@ -5800,7 +5951,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, | |||
5800 | else | 5951 | else |
5801 | generate_random_uuid(dev->uuid); | 5952 | generate_random_uuid(dev->uuid); |
5802 | 5953 | ||
5803 | btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); | 5954 | btrfs_init_work(&dev->work, btrfs_submit_helper, |
5955 | pending_bios_fn, NULL, NULL); | ||
5804 | 5956 | ||
5805 | return dev; | 5957 | return dev; |
5806 | } | 5958 | } |
@@ -5875,7 +6027,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
5875 | } | 6027 | } |
5876 | if (!map->stripes[i].dev) { | 6028 | if (!map->stripes[i].dev) { |
5877 | map->stripes[i].dev = | 6029 | map->stripes[i].dev = |
5878 | add_missing_dev(root, devid, uuid); | 6030 | add_missing_dev(root, root->fs_info->fs_devices, |
6031 | devid, uuid); | ||
5879 | if (!map->stripes[i].dev) { | 6032 | if (!map->stripes[i].dev) { |
5880 | free_extent_map(em); | 6033 | free_extent_map(em); |
5881 | return -EIO; | 6034 | return -EIO; |
@@ -5902,7 +6055,9 @@ static void fill_device_from_item(struct extent_buffer *leaf, | |||
5902 | device->devid = btrfs_device_id(leaf, dev_item); | 6055 | device->devid = btrfs_device_id(leaf, dev_item); |
5903 | device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item); | 6056 | device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item); |
5904 | device->total_bytes = device->disk_total_bytes; | 6057 | device->total_bytes = device->disk_total_bytes; |
6058 | device->commit_total_bytes = device->disk_total_bytes; | ||
5905 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); | 6059 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); |
6060 | device->commit_bytes_used = device->bytes_used; | ||
5906 | device->type = btrfs_device_type(leaf, dev_item); | 6061 | device->type = btrfs_device_type(leaf, dev_item); |
5907 | device->io_align = btrfs_device_io_align(leaf, dev_item); | 6062 | device->io_align = btrfs_device_io_align(leaf, dev_item); |
5908 | device->io_width = btrfs_device_io_width(leaf, dev_item); | 6063 | device->io_width = btrfs_device_io_width(leaf, dev_item); |
@@ -5914,7 +6069,8 @@ static void fill_device_from_item(struct extent_buffer *leaf, | |||
5914 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); | 6069 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); |
5915 | } | 6070 | } |
5916 | 6071 | ||
5917 | static int open_seed_devices(struct btrfs_root *root, u8 *fsid) | 6072 | static struct btrfs_fs_devices *open_seed_devices(struct btrfs_root *root, |
6073 | u8 *fsid) | ||
5918 | { | 6074 | { |
5919 | struct btrfs_fs_devices *fs_devices; | 6075 | struct btrfs_fs_devices *fs_devices; |
5920 | int ret; | 6076 | int ret; |
@@ -5923,49 +6079,56 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) | |||
5923 | 6079 | ||
5924 | fs_devices = root->fs_info->fs_devices->seed; | 6080 | fs_devices = root->fs_info->fs_devices->seed; |
5925 | while (fs_devices) { | 6081 | while (fs_devices) { |
5926 | if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) { | 6082 | if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) |
5927 | ret = 0; | 6083 | return fs_devices; |
5928 | goto out; | 6084 | |
5929 | } | ||
5930 | fs_devices = fs_devices->seed; | 6085 | fs_devices = fs_devices->seed; |
5931 | } | 6086 | } |
5932 | 6087 | ||
5933 | fs_devices = find_fsid(fsid); | 6088 | fs_devices = find_fsid(fsid); |
5934 | if (!fs_devices) { | 6089 | if (!fs_devices) { |
5935 | ret = -ENOENT; | 6090 | if (!btrfs_test_opt(root, DEGRADED)) |
5936 | goto out; | 6091 | return ERR_PTR(-ENOENT); |
6092 | |||
6093 | fs_devices = alloc_fs_devices(fsid); | ||
6094 | if (IS_ERR(fs_devices)) | ||
6095 | return fs_devices; | ||
6096 | |||
6097 | fs_devices->seeding = 1; | ||
6098 | fs_devices->opened = 1; | ||
6099 | return fs_devices; | ||
5937 | } | 6100 | } |
5938 | 6101 | ||
5939 | fs_devices = clone_fs_devices(fs_devices); | 6102 | fs_devices = clone_fs_devices(fs_devices); |
5940 | if (IS_ERR(fs_devices)) { | 6103 | if (IS_ERR(fs_devices)) |
5941 | ret = PTR_ERR(fs_devices); | 6104 | return fs_devices; |
5942 | goto out; | ||
5943 | } | ||
5944 | 6105 | ||
5945 | ret = __btrfs_open_devices(fs_devices, FMODE_READ, | 6106 | ret = __btrfs_open_devices(fs_devices, FMODE_READ, |
5946 | root->fs_info->bdev_holder); | 6107 | root->fs_info->bdev_holder); |
5947 | if (ret) { | 6108 | if (ret) { |
5948 | free_fs_devices(fs_devices); | 6109 | free_fs_devices(fs_devices); |
6110 | fs_devices = ERR_PTR(ret); | ||
5949 | goto out; | 6111 | goto out; |
5950 | } | 6112 | } |
5951 | 6113 | ||
5952 | if (!fs_devices->seeding) { | 6114 | if (!fs_devices->seeding) { |
5953 | __btrfs_close_devices(fs_devices); | 6115 | __btrfs_close_devices(fs_devices); |
5954 | free_fs_devices(fs_devices); | 6116 | free_fs_devices(fs_devices); |
5955 | ret = -EINVAL; | 6117 | fs_devices = ERR_PTR(-EINVAL); |
5956 | goto out; | 6118 | goto out; |
5957 | } | 6119 | } |
5958 | 6120 | ||
5959 | fs_devices->seed = root->fs_info->fs_devices->seed; | 6121 | fs_devices->seed = root->fs_info->fs_devices->seed; |
5960 | root->fs_info->fs_devices->seed = fs_devices; | 6122 | root->fs_info->fs_devices->seed = fs_devices; |
5961 | out: | 6123 | out: |
5962 | return ret; | 6124 | return fs_devices; |
5963 | } | 6125 | } |
5964 | 6126 | ||
5965 | static int read_one_dev(struct btrfs_root *root, | 6127 | static int read_one_dev(struct btrfs_root *root, |
5966 | struct extent_buffer *leaf, | 6128 | struct extent_buffer *leaf, |
5967 | struct btrfs_dev_item *dev_item) | 6129 | struct btrfs_dev_item *dev_item) |
5968 | { | 6130 | { |
6131 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | ||
5969 | struct btrfs_device *device; | 6132 | struct btrfs_device *device; |
5970 | u64 devid; | 6133 | u64 devid; |
5971 | int ret; | 6134 | int ret; |
@@ -5979,31 +6142,48 @@ static int read_one_dev(struct btrfs_root *root, | |||
5979 | BTRFS_UUID_SIZE); | 6142 | BTRFS_UUID_SIZE); |
5980 | 6143 | ||
5981 | if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { | 6144 | if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { |
5982 | ret = open_seed_devices(root, fs_uuid); | 6145 | fs_devices = open_seed_devices(root, fs_uuid); |
5983 | if (ret && !btrfs_test_opt(root, DEGRADED)) | 6146 | if (IS_ERR(fs_devices)) |
5984 | return ret; | 6147 | return PTR_ERR(fs_devices); |
5985 | } | 6148 | } |
5986 | 6149 | ||
5987 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); | 6150 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); |
5988 | if (!device || !device->bdev) { | 6151 | if (!device) { |
5989 | if (!btrfs_test_opt(root, DEGRADED)) | 6152 | if (!btrfs_test_opt(root, DEGRADED)) |
5990 | return -EIO; | 6153 | return -EIO; |
5991 | 6154 | ||
5992 | if (!device) { | 6155 | btrfs_warn(root->fs_info, "devid %llu missing", devid); |
5993 | btrfs_warn(root->fs_info, "devid %llu missing", devid); | 6156 | device = add_missing_dev(root, fs_devices, devid, dev_uuid); |
5994 | device = add_missing_dev(root, devid, dev_uuid); | 6157 | if (!device) |
5995 | if (!device) | 6158 | return -ENOMEM; |
5996 | return -ENOMEM; | 6159 | } else { |
5997 | } else if (!device->missing) { | 6160 | if (!device->bdev && !btrfs_test_opt(root, DEGRADED)) |
6161 | return -EIO; | ||
6162 | |||
6163 | if(!device->bdev && !device->missing) { | ||
5998 | /* | 6164 | /* |
5999 | * this happens when a device that was properly setup | 6165 | * this happens when a device that was properly setup |
6000 | * in the device info lists suddenly goes bad. | 6166 | * in the device info lists suddenly goes bad. |
6001 | * device->bdev is NULL, and so we have to set | 6167 | * device->bdev is NULL, and so we have to set |
6002 | * device->missing to one here | 6168 | * device->missing to one here |
6003 | */ | 6169 | */ |
6004 | root->fs_info->fs_devices->missing_devices++; | 6170 | device->fs_devices->missing_devices++; |
6005 | device->missing = 1; | 6171 | device->missing = 1; |
6006 | } | 6172 | } |
6173 | |||
6174 | /* Move the device to its own fs_devices */ | ||
6175 | if (device->fs_devices != fs_devices) { | ||
6176 | ASSERT(device->missing); | ||
6177 | |||
6178 | list_move(&device->dev_list, &fs_devices->devices); | ||
6179 | device->fs_devices->num_devices--; | ||
6180 | fs_devices->num_devices++; | ||
6181 | |||
6182 | device->fs_devices->missing_devices--; | ||
6183 | fs_devices->missing_devices++; | ||
6184 | |||
6185 | device->fs_devices = fs_devices; | ||
6186 | } | ||
6007 | } | 6187 | } |
6008 | 6188 | ||
6009 | if (device->fs_devices != root->fs_info->fs_devices) { | 6189 | if (device->fs_devices != root->fs_info->fs_devices) { |
@@ -6319,16 +6499,18 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | |||
6319 | struct btrfs_root *dev_root = fs_info->dev_root; | 6499 | struct btrfs_root *dev_root = fs_info->dev_root; |
6320 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | 6500 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
6321 | struct btrfs_device *device; | 6501 | struct btrfs_device *device; |
6502 | int stats_cnt; | ||
6322 | int ret = 0; | 6503 | int ret = 0; |
6323 | 6504 | ||
6324 | mutex_lock(&fs_devices->device_list_mutex); | 6505 | mutex_lock(&fs_devices->device_list_mutex); |
6325 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | 6506 | list_for_each_entry(device, &fs_devices->devices, dev_list) { |
6326 | if (!device->dev_stats_valid || !device->dev_stats_dirty) | 6507 | if (!device->dev_stats_valid || !btrfs_dev_stats_dirty(device)) |
6327 | continue; | 6508 | continue; |
6328 | 6509 | ||
6510 | stats_cnt = atomic_read(&device->dev_stats_ccnt); | ||
6329 | ret = update_dev_stat_item(trans, dev_root, device); | 6511 | ret = update_dev_stat_item(trans, dev_root, device); |
6330 | if (!ret) | 6512 | if (!ret) |
6331 | device->dev_stats_dirty = 0; | 6513 | atomic_sub(stats_cnt, &device->dev_stats_ccnt); |
6332 | } | 6514 | } |
6333 | mutex_unlock(&fs_devices->device_list_mutex); | 6515 | mutex_unlock(&fs_devices->device_list_mutex); |
6334 | 6516 | ||
@@ -6427,3 +6609,51 @@ int btrfs_scratch_superblock(struct btrfs_device *device) | |||
6427 | 6609 | ||
6428 | return 0; | 6610 | return 0; |
6429 | } | 6611 | } |
6612 | |||
6613 | /* | ||
6614 | * Update the size of all devices, which is used for writing out the | ||
6615 | * super blocks. | ||
6616 | */ | ||
6617 | void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info) | ||
6618 | { | ||
6619 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
6620 | struct btrfs_device *curr, *next; | ||
6621 | |||
6622 | if (list_empty(&fs_devices->resized_devices)) | ||
6623 | return; | ||
6624 | |||
6625 | mutex_lock(&fs_devices->device_list_mutex); | ||
6626 | lock_chunks(fs_info->dev_root); | ||
6627 | list_for_each_entry_safe(curr, next, &fs_devices->resized_devices, | ||
6628 | resized_list) { | ||
6629 | list_del_init(&curr->resized_list); | ||
6630 | curr->commit_total_bytes = curr->disk_total_bytes; | ||
6631 | } | ||
6632 | unlock_chunks(fs_info->dev_root); | ||
6633 | mutex_unlock(&fs_devices->device_list_mutex); | ||
6634 | } | ||
6635 | |||
6636 | /* Must be invoked during the transaction commit */ | ||
6637 | void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, | ||
6638 | struct btrfs_transaction *transaction) | ||
6639 | { | ||
6640 | struct extent_map *em; | ||
6641 | struct map_lookup *map; | ||
6642 | struct btrfs_device *dev; | ||
6643 | int i; | ||
6644 | |||
6645 | if (list_empty(&transaction->pending_chunks)) | ||
6646 | return; | ||
6647 | |||
6648 | /* In order to kick the device replace finish process */ | ||
6649 | lock_chunks(root); | ||
6650 | list_for_each_entry(em, &transaction->pending_chunks, list) { | ||
6651 | map = (struct map_lookup *)em->bdev; | ||
6652 | |||
6653 | for (i = 0; i < map->num_stripes; i++) { | ||
6654 | dev = map->stripes[i].dev; | ||
6655 | dev->commit_bytes_used = dev->bytes_used; | ||
6656 | } | ||
6657 | } | ||
6658 | unlock_chunks(root); | ||
6659 | } | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2aaa00c47816..08980fa23039 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -24,6 +24,8 @@ | |||
24 | #include <linux/btrfs.h> | 24 | #include <linux/btrfs.h> |
25 | #include "async-thread.h" | 25 | #include "async-thread.h" |
26 | 26 | ||
27 | extern struct mutex uuid_mutex; | ||
28 | |||
27 | #define BTRFS_STRIPE_LEN (64 * 1024) | 29 | #define BTRFS_STRIPE_LEN (64 * 1024) |
28 | 30 | ||
29 | struct buffer_head; | 31 | struct buffer_head; |
@@ -32,41 +34,59 @@ struct btrfs_pending_bios { | |||
32 | struct bio *tail; | 34 | struct bio *tail; |
33 | }; | 35 | }; |
34 | 36 | ||
37 | /* | ||
38 | * Use sequence counter to get consistent device stat data on | ||
39 | * 32-bit processors. | ||
40 | */ | ||
41 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | ||
42 | #include <linux/seqlock.h> | ||
43 | #define __BTRFS_NEED_DEVICE_DATA_ORDERED | ||
44 | #define btrfs_device_data_ordered_init(device) \ | ||
45 | seqcount_init(&device->data_seqcount) | ||
46 | #else | ||
47 | #define btrfs_device_data_ordered_init(device) do { } while (0) | ||
48 | #endif | ||
49 | |||
35 | struct btrfs_device { | 50 | struct btrfs_device { |
36 | struct list_head dev_list; | 51 | struct list_head dev_list; |
37 | struct list_head dev_alloc_list; | 52 | struct list_head dev_alloc_list; |
38 | struct btrfs_fs_devices *fs_devices; | 53 | struct btrfs_fs_devices *fs_devices; |
54 | |||
39 | struct btrfs_root *dev_root; | 55 | struct btrfs_root *dev_root; |
40 | 56 | ||
57 | struct rcu_string *name; | ||
58 | |||
59 | u64 generation; | ||
60 | |||
61 | spinlock_t io_lock ____cacheline_aligned; | ||
62 | int running_pending; | ||
41 | /* regular prio bios */ | 63 | /* regular prio bios */ |
42 | struct btrfs_pending_bios pending_bios; | 64 | struct btrfs_pending_bios pending_bios; |
43 | /* WRITE_SYNC bios */ | 65 | /* WRITE_SYNC bios */ |
44 | struct btrfs_pending_bios pending_sync_bios; | 66 | struct btrfs_pending_bios pending_sync_bios; |
45 | 67 | ||
46 | u64 generation; | 68 | struct block_device *bdev; |
47 | int running_pending; | 69 | |
70 | /* the mode sent to blkdev_get */ | ||
71 | fmode_t mode; | ||
72 | |||
48 | int writeable; | 73 | int writeable; |
49 | int in_fs_metadata; | 74 | int in_fs_metadata; |
50 | int missing; | 75 | int missing; |
51 | int can_discard; | 76 | int can_discard; |
52 | int is_tgtdev_for_dev_replace; | 77 | int is_tgtdev_for_dev_replace; |
53 | 78 | ||
54 | spinlock_t io_lock; | 79 | #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED |
55 | /* the mode sent to blkdev_get */ | 80 | seqcount_t data_seqcount; |
56 | fmode_t mode; | 81 | #endif |
57 | |||
58 | struct block_device *bdev; | ||
59 | |||
60 | |||
61 | struct rcu_string *name; | ||
62 | 82 | ||
63 | /* the internal btrfs device id */ | 83 | /* the internal btrfs device id */ |
64 | u64 devid; | 84 | u64 devid; |
65 | 85 | ||
66 | /* size of the device */ | 86 | /* size of the device in memory */ |
67 | u64 total_bytes; | 87 | u64 total_bytes; |
68 | 88 | ||
69 | /* size of the disk */ | 89 | /* size of the device on disk */ |
70 | u64 disk_total_bytes; | 90 | u64 disk_total_bytes; |
71 | 91 | ||
72 | /* bytes used */ | 92 | /* bytes used */ |
@@ -83,10 +103,26 @@ struct btrfs_device { | |||
83 | /* minimal io size for this device */ | 103 | /* minimal io size for this device */ |
84 | u32 sector_size; | 104 | u32 sector_size; |
85 | 105 | ||
86 | |||
87 | /* physical drive uuid (or lvm uuid) */ | 106 | /* physical drive uuid (or lvm uuid) */ |
88 | u8 uuid[BTRFS_UUID_SIZE]; | 107 | u8 uuid[BTRFS_UUID_SIZE]; |
89 | 108 | ||
109 | /* | ||
110 | * size of the device on the current transaction | ||
111 | * | ||
112 | * This variant is update when committing the transaction, | ||
113 | * and protected by device_list_mutex | ||
114 | */ | ||
115 | u64 commit_total_bytes; | ||
116 | |||
117 | /* bytes used on the current transaction */ | ||
118 | u64 commit_bytes_used; | ||
119 | /* | ||
120 | * used to manage the device which is resized | ||
121 | * | ||
122 | * It is protected by chunk_lock. | ||
123 | */ | ||
124 | struct list_head resized_list; | ||
125 | |||
90 | /* for sending down flush barriers */ | 126 | /* for sending down flush barriers */ |
91 | int nobarriers; | 127 | int nobarriers; |
92 | struct bio *flush_bio; | 128 | struct bio *flush_bio; |
@@ -107,26 +143,90 @@ struct btrfs_device { | |||
107 | struct radix_tree_root reada_zones; | 143 | struct radix_tree_root reada_zones; |
108 | struct radix_tree_root reada_extents; | 144 | struct radix_tree_root reada_extents; |
109 | 145 | ||
110 | |||
111 | /* disk I/O failure stats. For detailed description refer to | 146 | /* disk I/O failure stats. For detailed description refer to |
112 | * enum btrfs_dev_stat_values in ioctl.h */ | 147 | * enum btrfs_dev_stat_values in ioctl.h */ |
113 | int dev_stats_valid; | 148 | int dev_stats_valid; |
114 | int dev_stats_dirty; /* counters need to be written to disk */ | 149 | |
150 | /* Counter to record the change of device stats */ | ||
151 | atomic_t dev_stats_ccnt; | ||
115 | atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; | 152 | atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; |
116 | }; | 153 | }; |
117 | 154 | ||
155 | /* | ||
156 | * If we read those variants at the context of their own lock, we needn't | ||
157 | * use the following helpers, reading them directly is safe. | ||
158 | */ | ||
159 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | ||
160 | #define BTRFS_DEVICE_GETSET_FUNCS(name) \ | ||
161 | static inline u64 \ | ||
162 | btrfs_device_get_##name(const struct btrfs_device *dev) \ | ||
163 | { \ | ||
164 | u64 size; \ | ||
165 | unsigned int seq; \ | ||
166 | \ | ||
167 | do { \ | ||
168 | seq = read_seqcount_begin(&dev->data_seqcount); \ | ||
169 | size = dev->name; \ | ||
170 | } while (read_seqcount_retry(&dev->data_seqcount, seq)); \ | ||
171 | return size; \ | ||
172 | } \ | ||
173 | \ | ||
174 | static inline void \ | ||
175 | btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ | ||
176 | { \ | ||
177 | preempt_disable(); \ | ||
178 | write_seqcount_begin(&dev->data_seqcount); \ | ||
179 | dev->name = size; \ | ||
180 | write_seqcount_end(&dev->data_seqcount); \ | ||
181 | preempt_enable(); \ | ||
182 | } | ||
183 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) | ||
184 | #define BTRFS_DEVICE_GETSET_FUNCS(name) \ | ||
185 | static inline u64 \ | ||
186 | btrfs_device_get_##name(const struct btrfs_device *dev) \ | ||
187 | { \ | ||
188 | u64 size; \ | ||
189 | \ | ||
190 | preempt_disable(); \ | ||
191 | size = dev->name; \ | ||
192 | preempt_enable(); \ | ||
193 | return size; \ | ||
194 | } \ | ||
195 | \ | ||
196 | static inline void \ | ||
197 | btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ | ||
198 | { \ | ||
199 | preempt_disable(); \ | ||
200 | dev->name = size; \ | ||
201 | preempt_enable(); \ | ||
202 | } | ||
203 | #else | ||
204 | #define BTRFS_DEVICE_GETSET_FUNCS(name) \ | ||
205 | static inline u64 \ | ||
206 | btrfs_device_get_##name(const struct btrfs_device *dev) \ | ||
207 | { \ | ||
208 | return dev->name; \ | ||
209 | } \ | ||
210 | \ | ||
211 | static inline void \ | ||
212 | btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ | ||
213 | { \ | ||
214 | dev->name = size; \ | ||
215 | } | ||
216 | #endif | ||
217 | |||
218 | BTRFS_DEVICE_GETSET_FUNCS(total_bytes); | ||
219 | BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes); | ||
220 | BTRFS_DEVICE_GETSET_FUNCS(bytes_used); | ||
221 | |||
118 | struct btrfs_fs_devices { | 222 | struct btrfs_fs_devices { |
119 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ | 223 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ |
120 | 224 | ||
121 | /* the device with this id has the most recent copy of the super */ | ||
122 | u64 latest_devid; | ||
123 | u64 latest_trans; | ||
124 | u64 num_devices; | 225 | u64 num_devices; |
125 | u64 open_devices; | 226 | u64 open_devices; |
126 | u64 rw_devices; | 227 | u64 rw_devices; |
127 | u64 missing_devices; | 228 | u64 missing_devices; |
128 | u64 total_rw_bytes; | 229 | u64 total_rw_bytes; |
129 | u64 num_can_discard; | ||
130 | u64 total_devices; | 230 | u64 total_devices; |
131 | struct block_device *latest_bdev; | 231 | struct block_device *latest_bdev; |
132 | 232 | ||
@@ -139,6 +239,7 @@ struct btrfs_fs_devices { | |||
139 | struct mutex device_list_mutex; | 239 | struct mutex device_list_mutex; |
140 | struct list_head devices; | 240 | struct list_head devices; |
141 | 241 | ||
242 | struct list_head resized_devices; | ||
142 | /* devices not currently being allocated */ | 243 | /* devices not currently being allocated */ |
143 | struct list_head alloc_list; | 244 | struct list_head alloc_list; |
144 | struct list_head list; | 245 | struct list_head list; |
@@ -167,8 +268,9 @@ struct btrfs_fs_devices { | |||
167 | */ | 268 | */ |
168 | typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err); | 269 | typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err); |
169 | struct btrfs_io_bio { | 270 | struct btrfs_io_bio { |
170 | unsigned long mirror_num; | 271 | unsigned int mirror_num; |
171 | unsigned long stripe_index; | 272 | unsigned int stripe_index; |
273 | u64 logical; | ||
172 | u8 *csum; | 274 | u8 *csum; |
173 | u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; | 275 | u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; |
174 | u8 *csum_allocated; | 276 | u8 *csum_allocated; |
@@ -325,6 +427,7 @@ struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, | |||
325 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); | 427 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); |
326 | int btrfs_init_new_device(struct btrfs_root *root, char *path); | 428 | int btrfs_init_new_device(struct btrfs_root *root, char *path); |
327 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | 429 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, |
430 | struct btrfs_device *srcdev, | ||
328 | struct btrfs_device **device_out); | 431 | struct btrfs_device **device_out); |
329 | int btrfs_balance(struct btrfs_balance_control *bctl, | 432 | int btrfs_balance(struct btrfs_balance_control *bctl, |
330 | struct btrfs_ioctl_balance_args *bargs); | 433 | struct btrfs_ioctl_balance_args *bargs); |
@@ -360,11 +463,20 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root, | |||
360 | int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | 463 | int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, |
361 | struct btrfs_root *extent_root, | 464 | struct btrfs_root *extent_root, |
362 | u64 chunk_offset, u64 chunk_size); | 465 | u64 chunk_offset, u64 chunk_size); |
466 | int btrfs_remove_chunk(struct btrfs_trans_handle *trans, | ||
467 | struct btrfs_root *root, u64 chunk_offset); | ||
468 | |||
469 | static inline int btrfs_dev_stats_dirty(struct btrfs_device *dev) | ||
470 | { | ||
471 | return atomic_read(&dev->dev_stats_ccnt); | ||
472 | } | ||
473 | |||
363 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, | 474 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, |
364 | int index) | 475 | int index) |
365 | { | 476 | { |
366 | atomic_inc(dev->dev_stat_values + index); | 477 | atomic_inc(dev->dev_stat_values + index); |
367 | dev->dev_stats_dirty = 1; | 478 | smp_mb__before_atomic(); |
479 | atomic_inc(&dev->dev_stats_ccnt); | ||
368 | } | 480 | } |
369 | 481 | ||
370 | static inline int btrfs_dev_stat_read(struct btrfs_device *dev, | 482 | static inline int btrfs_dev_stat_read(struct btrfs_device *dev, |
@@ -379,7 +491,8 @@ static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, | |||
379 | int ret; | 491 | int ret; |
380 | 492 | ||
381 | ret = atomic_xchg(dev->dev_stat_values + index, 0); | 493 | ret = atomic_xchg(dev->dev_stat_values + index, 0); |
382 | dev->dev_stats_dirty = 1; | 494 | smp_mb__before_atomic(); |
495 | atomic_inc(&dev->dev_stats_ccnt); | ||
383 | return ret; | 496 | return ret; |
384 | } | 497 | } |
385 | 498 | ||
@@ -387,7 +500,8 @@ static inline void btrfs_dev_stat_set(struct btrfs_device *dev, | |||
387 | int index, unsigned long val) | 500 | int index, unsigned long val) |
388 | { | 501 | { |
389 | atomic_set(dev->dev_stat_values + index, val); | 502 | atomic_set(dev->dev_stat_values + index, val); |
390 | dev->dev_stats_dirty = 1; | 503 | smp_mb__before_atomic(); |
504 | atomic_inc(&dev->dev_stats_ccnt); | ||
391 | } | 505 | } |
392 | 506 | ||
393 | static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, | 507 | static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, |
@@ -395,4 +509,8 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, | |||
395 | { | 509 | { |
396 | btrfs_dev_stat_set(dev, index, 0); | 510 | btrfs_dev_stat_set(dev, index, 0); |
397 | } | 511 | } |
512 | |||
513 | void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info); | ||
514 | void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, | ||
515 | struct btrfs_transaction *transaction); | ||
398 | #endif | 516 | #endif |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index ad8328d797ea..dcf20131fbe4 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -237,7 +237,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
237 | * first xattr that we find and walk forward | 237 | * first xattr that we find and walk forward |
238 | */ | 238 | */ |
239 | key.objectid = btrfs_ino(inode); | 239 | key.objectid = btrfs_ino(inode); |
240 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | 240 | key.type = BTRFS_XATTR_ITEM_KEY; |
241 | key.offset = 0; | 241 | key.offset = 0; |
242 | 242 | ||
243 | path = btrfs_alloc_path(); | 243 | path = btrfs_alloc_path(); |
@@ -273,7 +273,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
273 | /* check to make sure this item is what we want */ | 273 | /* check to make sure this item is what we want */ |
274 | if (found_key.objectid != key.objectid) | 274 | if (found_key.objectid != key.objectid) |
275 | break; | 275 | break; |
276 | if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY) | 276 | if (found_key.type != BTRFS_XATTR_ITEM_KEY) |
277 | break; | 277 | break; |
278 | 278 | ||
279 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 279 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index b67d8fc81277..759fa4e2de8f 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -33,8 +33,7 @@ | |||
33 | #include "compression.h" | 33 | #include "compression.h" |
34 | 34 | ||
35 | struct workspace { | 35 | struct workspace { |
36 | z_stream inf_strm; | 36 | z_stream strm; |
37 | z_stream def_strm; | ||
38 | char *buf; | 37 | char *buf; |
39 | struct list_head list; | 38 | struct list_head list; |
40 | }; | 39 | }; |
@@ -43,8 +42,7 @@ static void zlib_free_workspace(struct list_head *ws) | |||
43 | { | 42 | { |
44 | struct workspace *workspace = list_entry(ws, struct workspace, list); | 43 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
45 | 44 | ||
46 | vfree(workspace->def_strm.workspace); | 45 | vfree(workspace->strm.workspace); |
47 | vfree(workspace->inf_strm.workspace); | ||
48 | kfree(workspace->buf); | 46 | kfree(workspace->buf); |
49 | kfree(workspace); | 47 | kfree(workspace); |
50 | } | 48 | } |
@@ -52,17 +50,17 @@ static void zlib_free_workspace(struct list_head *ws) | |||
52 | static struct list_head *zlib_alloc_workspace(void) | 50 | static struct list_head *zlib_alloc_workspace(void) |
53 | { | 51 | { |
54 | struct workspace *workspace; | 52 | struct workspace *workspace; |
53 | int workspacesize; | ||
55 | 54 | ||
56 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | 55 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); |
57 | if (!workspace) | 56 | if (!workspace) |
58 | return ERR_PTR(-ENOMEM); | 57 | return ERR_PTR(-ENOMEM); |
59 | 58 | ||
60 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize( | 59 | workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), |
61 | MAX_WBITS, MAX_MEM_LEVEL)); | 60 | zlib_inflate_workspacesize()); |
62 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | 61 | workspace->strm.workspace = vmalloc(workspacesize); |
63 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 62 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
64 | if (!workspace->def_strm.workspace || | 63 | if (!workspace->strm.workspace || !workspace->buf) |
65 | !workspace->inf_strm.workspace || !workspace->buf) | ||
66 | goto fail; | 64 | goto fail; |
67 | 65 | ||
68 | INIT_LIST_HEAD(&workspace->list); | 66 | INIT_LIST_HEAD(&workspace->list); |
@@ -96,14 +94,14 @@ static int zlib_compress_pages(struct list_head *ws, | |||
96 | *total_out = 0; | 94 | *total_out = 0; |
97 | *total_in = 0; | 95 | *total_in = 0; |
98 | 96 | ||
99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 97 | if (Z_OK != zlib_deflateInit(&workspace->strm, 3)) { |
100 | printk(KERN_WARNING "BTRFS: deflateInit failed\n"); | 98 | printk(KERN_WARNING "BTRFS: deflateInit failed\n"); |
101 | ret = -EIO; | 99 | ret = -EIO; |
102 | goto out; | 100 | goto out; |
103 | } | 101 | } |
104 | 102 | ||
105 | workspace->def_strm.total_in = 0; | 103 | workspace->strm.total_in = 0; |
106 | workspace->def_strm.total_out = 0; | 104 | workspace->strm.total_out = 0; |
107 | 105 | ||
108 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | 106 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); |
109 | data_in = kmap(in_page); | 107 | data_in = kmap(in_page); |
@@ -117,25 +115,25 @@ static int zlib_compress_pages(struct list_head *ws, | |||
117 | pages[0] = out_page; | 115 | pages[0] = out_page; |
118 | nr_pages = 1; | 116 | nr_pages = 1; |
119 | 117 | ||
120 | workspace->def_strm.next_in = data_in; | 118 | workspace->strm.next_in = data_in; |
121 | workspace->def_strm.next_out = cpage_out; | 119 | workspace->strm.next_out = cpage_out; |
122 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; | 120 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
123 | workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); | 121 | workspace->strm.avail_in = min(len, PAGE_CACHE_SIZE); |
124 | 122 | ||
125 | while (workspace->def_strm.total_in < len) { | 123 | while (workspace->strm.total_in < len) { |
126 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); | 124 | ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH); |
127 | if (ret != Z_OK) { | 125 | if (ret != Z_OK) { |
128 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", | 126 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", |
129 | ret); | 127 | ret); |
130 | zlib_deflateEnd(&workspace->def_strm); | 128 | zlib_deflateEnd(&workspace->strm); |
131 | ret = -EIO; | 129 | ret = -EIO; |
132 | goto out; | 130 | goto out; |
133 | } | 131 | } |
134 | 132 | ||
135 | /* we're making it bigger, give up */ | 133 | /* we're making it bigger, give up */ |
136 | if (workspace->def_strm.total_in > 8192 && | 134 | if (workspace->strm.total_in > 8192 && |
137 | workspace->def_strm.total_in < | 135 | workspace->strm.total_in < |
138 | workspace->def_strm.total_out) { | 136 | workspace->strm.total_out) { |
139 | ret = -E2BIG; | 137 | ret = -E2BIG; |
140 | goto out; | 138 | goto out; |
141 | } | 139 | } |
@@ -143,7 +141,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
143 | * before the total_in so we will pull in a new page for | 141 | * before the total_in so we will pull in a new page for |
144 | * the stream end if required | 142 | * the stream end if required |
145 | */ | 143 | */ |
146 | if (workspace->def_strm.avail_out == 0) { | 144 | if (workspace->strm.avail_out == 0) { |
147 | kunmap(out_page); | 145 | kunmap(out_page); |
148 | if (nr_pages == nr_dest_pages) { | 146 | if (nr_pages == nr_dest_pages) { |
149 | out_page = NULL; | 147 | out_page = NULL; |
@@ -158,19 +156,19 @@ static int zlib_compress_pages(struct list_head *ws, | |||
158 | cpage_out = kmap(out_page); | 156 | cpage_out = kmap(out_page); |
159 | pages[nr_pages] = out_page; | 157 | pages[nr_pages] = out_page; |
160 | nr_pages++; | 158 | nr_pages++; |
161 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; | 159 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
162 | workspace->def_strm.next_out = cpage_out; | 160 | workspace->strm.next_out = cpage_out; |
163 | } | 161 | } |
164 | /* we're all done */ | 162 | /* we're all done */ |
165 | if (workspace->def_strm.total_in >= len) | 163 | if (workspace->strm.total_in >= len) |
166 | break; | 164 | break; |
167 | 165 | ||
168 | /* we've read in a full page, get a new one */ | 166 | /* we've read in a full page, get a new one */ |
169 | if (workspace->def_strm.avail_in == 0) { | 167 | if (workspace->strm.avail_in == 0) { |
170 | if (workspace->def_strm.total_out > max_out) | 168 | if (workspace->strm.total_out > max_out) |
171 | break; | 169 | break; |
172 | 170 | ||
173 | bytes_left = len - workspace->def_strm.total_in; | 171 | bytes_left = len - workspace->strm.total_in; |
174 | kunmap(in_page); | 172 | kunmap(in_page); |
175 | page_cache_release(in_page); | 173 | page_cache_release(in_page); |
176 | 174 | ||
@@ -178,28 +176,28 @@ static int zlib_compress_pages(struct list_head *ws, | |||
178 | in_page = find_get_page(mapping, | 176 | in_page = find_get_page(mapping, |
179 | start >> PAGE_CACHE_SHIFT); | 177 | start >> PAGE_CACHE_SHIFT); |
180 | data_in = kmap(in_page); | 178 | data_in = kmap(in_page); |
181 | workspace->def_strm.avail_in = min(bytes_left, | 179 | workspace->strm.avail_in = min(bytes_left, |
182 | PAGE_CACHE_SIZE); | 180 | PAGE_CACHE_SIZE); |
183 | workspace->def_strm.next_in = data_in; | 181 | workspace->strm.next_in = data_in; |
184 | } | 182 | } |
185 | } | 183 | } |
186 | workspace->def_strm.avail_in = 0; | 184 | workspace->strm.avail_in = 0; |
187 | ret = zlib_deflate(&workspace->def_strm, Z_FINISH); | 185 | ret = zlib_deflate(&workspace->strm, Z_FINISH); |
188 | zlib_deflateEnd(&workspace->def_strm); | 186 | zlib_deflateEnd(&workspace->strm); |
189 | 187 | ||
190 | if (ret != Z_STREAM_END) { | 188 | if (ret != Z_STREAM_END) { |
191 | ret = -EIO; | 189 | ret = -EIO; |
192 | goto out; | 190 | goto out; |
193 | } | 191 | } |
194 | 192 | ||
195 | if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { | 193 | if (workspace->strm.total_out >= workspace->strm.total_in) { |
196 | ret = -E2BIG; | 194 | ret = -E2BIG; |
197 | goto out; | 195 | goto out; |
198 | } | 196 | } |
199 | 197 | ||
200 | ret = 0; | 198 | ret = 0; |
201 | *total_out = workspace->def_strm.total_out; | 199 | *total_out = workspace->strm.total_out; |
202 | *total_in = workspace->def_strm.total_in; | 200 | *total_in = workspace->strm.total_in; |
203 | out: | 201 | out: |
204 | *out_pages = nr_pages; | 202 | *out_pages = nr_pages; |
205 | if (out_page) | 203 | if (out_page) |
@@ -225,19 +223,18 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
225 | size_t total_out = 0; | 223 | size_t total_out = 0; |
226 | unsigned long page_in_index = 0; | 224 | unsigned long page_in_index = 0; |
227 | unsigned long page_out_index = 0; | 225 | unsigned long page_out_index = 0; |
228 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | 226 | unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE); |
229 | PAGE_CACHE_SIZE; | ||
230 | unsigned long buf_start; | 227 | unsigned long buf_start; |
231 | unsigned long pg_offset; | 228 | unsigned long pg_offset; |
232 | 229 | ||
233 | data_in = kmap(pages_in[page_in_index]); | 230 | data_in = kmap(pages_in[page_in_index]); |
234 | workspace->inf_strm.next_in = data_in; | 231 | workspace->strm.next_in = data_in; |
235 | workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE); | 232 | workspace->strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE); |
236 | workspace->inf_strm.total_in = 0; | 233 | workspace->strm.total_in = 0; |
237 | 234 | ||
238 | workspace->inf_strm.total_out = 0; | 235 | workspace->strm.total_out = 0; |
239 | workspace->inf_strm.next_out = workspace->buf; | 236 | workspace->strm.next_out = workspace->buf; |
240 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 237 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
241 | pg_offset = 0; | 238 | pg_offset = 0; |
242 | 239 | ||
243 | /* If it's deflate, and it's got no preset dictionary, then | 240 | /* If it's deflate, and it's got no preset dictionary, then |
@@ -247,21 +244,21 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
247 | !(((data_in[0]<<8) + data_in[1]) % 31)) { | 244 | !(((data_in[0]<<8) + data_in[1]) % 31)) { |
248 | 245 | ||
249 | wbits = -((data_in[0] >> 4) + 8); | 246 | wbits = -((data_in[0] >> 4) + 8); |
250 | workspace->inf_strm.next_in += 2; | 247 | workspace->strm.next_in += 2; |
251 | workspace->inf_strm.avail_in -= 2; | 248 | workspace->strm.avail_in -= 2; |
252 | } | 249 | } |
253 | 250 | ||
254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 251 | if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) { |
255 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); | 252 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); |
256 | return -EIO; | 253 | return -EIO; |
257 | } | 254 | } |
258 | while (workspace->inf_strm.total_in < srclen) { | 255 | while (workspace->strm.total_in < srclen) { |
259 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 256 | ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); |
260 | if (ret != Z_OK && ret != Z_STREAM_END) | 257 | if (ret != Z_OK && ret != Z_STREAM_END) |
261 | break; | 258 | break; |
262 | 259 | ||
263 | buf_start = total_out; | 260 | buf_start = total_out; |
264 | total_out = workspace->inf_strm.total_out; | 261 | total_out = workspace->strm.total_out; |
265 | 262 | ||
266 | /* we didn't make progress in this inflate call, we're done */ | 263 | /* we didn't make progress in this inflate call, we're done */ |
267 | if (buf_start == total_out) | 264 | if (buf_start == total_out) |
@@ -276,10 +273,10 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
276 | goto done; | 273 | goto done; |
277 | } | 274 | } |
278 | 275 | ||
279 | workspace->inf_strm.next_out = workspace->buf; | 276 | workspace->strm.next_out = workspace->buf; |
280 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 277 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
281 | 278 | ||
282 | if (workspace->inf_strm.avail_in == 0) { | 279 | if (workspace->strm.avail_in == 0) { |
283 | unsigned long tmp; | 280 | unsigned long tmp; |
284 | kunmap(pages_in[page_in_index]); | 281 | kunmap(pages_in[page_in_index]); |
285 | page_in_index++; | 282 | page_in_index++; |
@@ -288,9 +285,9 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
288 | break; | 285 | break; |
289 | } | 286 | } |
290 | data_in = kmap(pages_in[page_in_index]); | 287 | data_in = kmap(pages_in[page_in_index]); |
291 | workspace->inf_strm.next_in = data_in; | 288 | workspace->strm.next_in = data_in; |
292 | tmp = srclen - workspace->inf_strm.total_in; | 289 | tmp = srclen - workspace->strm.total_in; |
293 | workspace->inf_strm.avail_in = min(tmp, | 290 | workspace->strm.avail_in = min(tmp, |
294 | PAGE_CACHE_SIZE); | 291 | PAGE_CACHE_SIZE); |
295 | } | 292 | } |
296 | } | 293 | } |
@@ -299,7 +296,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
299 | else | 296 | else |
300 | ret = 0; | 297 | ret = 0; |
301 | done: | 298 | done: |
302 | zlib_inflateEnd(&workspace->inf_strm); | 299 | zlib_inflateEnd(&workspace->strm); |
303 | if (data_in) | 300 | if (data_in) |
304 | kunmap(pages_in[page_in_index]); | 301 | kunmap(pages_in[page_in_index]); |
305 | return ret; | 302 | return ret; |
@@ -317,13 +314,13 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
317 | unsigned long total_out = 0; | 314 | unsigned long total_out = 0; |
318 | char *kaddr; | 315 | char *kaddr; |
319 | 316 | ||
320 | workspace->inf_strm.next_in = data_in; | 317 | workspace->strm.next_in = data_in; |
321 | workspace->inf_strm.avail_in = srclen; | 318 | workspace->strm.avail_in = srclen; |
322 | workspace->inf_strm.total_in = 0; | 319 | workspace->strm.total_in = 0; |
323 | 320 | ||
324 | workspace->inf_strm.next_out = workspace->buf; | 321 | workspace->strm.next_out = workspace->buf; |
325 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 322 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
326 | workspace->inf_strm.total_out = 0; | 323 | workspace->strm.total_out = 0; |
327 | /* If it's deflate, and it's got no preset dictionary, then | 324 | /* If it's deflate, and it's got no preset dictionary, then |
328 | we can tell zlib to skip the adler32 check. */ | 325 | we can tell zlib to skip the adler32 check. */ |
329 | if (srclen > 2 && !(data_in[1] & PRESET_DICT) && | 326 | if (srclen > 2 && !(data_in[1] & PRESET_DICT) && |
@@ -331,11 +328,11 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
331 | !(((data_in[0]<<8) + data_in[1]) % 31)) { | 328 | !(((data_in[0]<<8) + data_in[1]) % 31)) { |
332 | 329 | ||
333 | wbits = -((data_in[0] >> 4) + 8); | 330 | wbits = -((data_in[0] >> 4) + 8); |
334 | workspace->inf_strm.next_in += 2; | 331 | workspace->strm.next_in += 2; |
335 | workspace->inf_strm.avail_in -= 2; | 332 | workspace->strm.avail_in -= 2; |
336 | } | 333 | } |
337 | 334 | ||
338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 335 | if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) { |
339 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); | 336 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); |
340 | return -EIO; | 337 | return -EIO; |
341 | } | 338 | } |
@@ -346,12 +343,12 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
346 | unsigned long bytes; | 343 | unsigned long bytes; |
347 | unsigned long pg_offset = 0; | 344 | unsigned long pg_offset = 0; |
348 | 345 | ||
349 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 346 | ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); |
350 | if (ret != Z_OK && ret != Z_STREAM_END) | 347 | if (ret != Z_OK && ret != Z_STREAM_END) |
351 | break; | 348 | break; |
352 | 349 | ||
353 | buf_start = total_out; | 350 | buf_start = total_out; |
354 | total_out = workspace->inf_strm.total_out; | 351 | total_out = workspace->strm.total_out; |
355 | 352 | ||
356 | if (total_out == buf_start) { | 353 | if (total_out == buf_start) { |
357 | ret = -EIO; | 354 | ret = -EIO; |
@@ -377,8 +374,8 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
377 | pg_offset += bytes; | 374 | pg_offset += bytes; |
378 | bytes_left -= bytes; | 375 | bytes_left -= bytes; |
379 | next: | 376 | next: |
380 | workspace->inf_strm.next_out = workspace->buf; | 377 | workspace->strm.next_out = workspace->buf; |
381 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 378 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
382 | } | 379 | } |
383 | 380 | ||
384 | if (ret != Z_STREAM_END && bytes_left != 0) | 381 | if (ret != Z_STREAM_END && bytes_left != 0) |
@@ -386,7 +383,7 @@ next: | |||
386 | else | 383 | else |
387 | ret = 0; | 384 | ret = 0; |
388 | 385 | ||
389 | zlib_inflateEnd(&workspace->inf_strm); | 386 | zlib_inflateEnd(&workspace->strm); |
390 | return ret; | 387 | return ret; |
391 | } | 388 | } |
392 | 389 | ||
diff --git a/fs/buffer.c b/fs/buffer.c index 8f05111bbb8b..44c14a87750e 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1022,7 +1022,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, | |||
1022 | bh = page_buffers(page); | 1022 | bh = page_buffers(page); |
1023 | if (bh->b_size == size) { | 1023 | if (bh->b_size == size) { |
1024 | end_block = init_page_buffers(page, bdev, | 1024 | end_block = init_page_buffers(page, bdev, |
1025 | index << sizebits, size); | 1025 | (sector_t)index << sizebits, |
1026 | size); | ||
1026 | goto done; | 1027 | goto done; |
1027 | } | 1028 | } |
1028 | if (!try_to_free_buffers(page)) | 1029 | if (!try_to_free_buffers(page)) |
@@ -1043,7 +1044,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, | |||
1043 | */ | 1044 | */ |
1044 | spin_lock(&inode->i_mapping->private_lock); | 1045 | spin_lock(&inode->i_mapping->private_lock); |
1045 | link_dev_buffers(page, bh); | 1046 | link_dev_buffers(page, bh); |
1046 | end_block = init_page_buffers(page, bdev, index << sizebits, size); | 1047 | end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits, |
1048 | size); | ||
1047 | spin_unlock(&inode->i_mapping->private_lock); | 1049 | spin_unlock(&inode->i_mapping->private_lock); |
1048 | done: | 1050 | done: |
1049 | ret = (block < end_block) ? 1 : -ENXIO; | 1051 | ret = (block < end_block) ? 1 : -ENXIO; |
@@ -1251,7 +1253,7 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh) | |||
1251 | * a local interrupt disable for that. | 1253 | * a local interrupt disable for that. |
1252 | */ | 1254 | */ |
1253 | 1255 | ||
1254 | #define BH_LRU_SIZE 8 | 1256 | #define BH_LRU_SIZE 16 |
1255 | 1257 | ||
1256 | struct bh_lru { | 1258 | struct bh_lru { |
1257 | struct buffer_head *bhs[BH_LRU_SIZE]; | 1259 | struct buffer_head *bhs[BH_LRU_SIZE]; |
@@ -2954,7 +2956,7 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) | |||
2954 | 2956 | ||
2955 | /* | 2957 | /* |
2956 | * This allows us to do IO even on the odd last sectors | 2958 | * This allows us to do IO even on the odd last sectors |
2957 | * of a device, even if the bh block size is some multiple | 2959 | * of a device, even if the block size is some multiple |
2958 | * of the physical sector size. | 2960 | * of the physical sector size. |
2959 | * | 2961 | * |
2960 | * We'll just truncate the bio to the size of the device, | 2962 | * We'll just truncate the bio to the size of the device, |
@@ -2964,10 +2966,11 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) | |||
2964 | * errors, this only handles the "we need to be able to | 2966 | * errors, this only handles the "we need to be able to |
2965 | * do IO at the final sector" case. | 2967 | * do IO at the final sector" case. |
2966 | */ | 2968 | */ |
2967 | static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) | 2969 | void guard_bio_eod(int rw, struct bio *bio) |
2968 | { | 2970 | { |
2969 | sector_t maxsector; | 2971 | sector_t maxsector; |
2970 | unsigned bytes; | 2972 | struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1]; |
2973 | unsigned truncated_bytes; | ||
2971 | 2974 | ||
2972 | maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; | 2975 | maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; |
2973 | if (!maxsector) | 2976 | if (!maxsector) |
@@ -2982,23 +2985,20 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) | |||
2982 | return; | 2985 | return; |
2983 | 2986 | ||
2984 | maxsector -= bio->bi_iter.bi_sector; | 2987 | maxsector -= bio->bi_iter.bi_sector; |
2985 | bytes = bio->bi_iter.bi_size; | 2988 | if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) |
2986 | if (likely((bytes >> 9) <= maxsector)) | ||
2987 | return; | 2989 | return; |
2988 | 2990 | ||
2989 | /* Uhhuh. We've got a bh that straddles the device size! */ | 2991 | /* Uhhuh. We've got a bio that straddles the device size! */ |
2990 | bytes = maxsector << 9; | 2992 | truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9); |
2991 | 2993 | ||
2992 | /* Truncate the bio.. */ | 2994 | /* Truncate the bio.. */ |
2993 | bio->bi_iter.bi_size = bytes; | 2995 | bio->bi_iter.bi_size -= truncated_bytes; |
2994 | bio->bi_io_vec[0].bv_len = bytes; | 2996 | bvec->bv_len -= truncated_bytes; |
2995 | 2997 | ||
2996 | /* ..and clear the end of the buffer for reads */ | 2998 | /* ..and clear the end of the buffer for reads */ |
2997 | if ((rw & RW_MASK) == READ) { | 2999 | if ((rw & RW_MASK) == READ) { |
2998 | void *kaddr = kmap_atomic(bh->b_page); | 3000 | zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len, |
2999 | memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); | 3001 | truncated_bytes); |
3000 | kunmap_atomic(kaddr); | ||
3001 | flush_dcache_page(bh->b_page); | ||
3002 | } | 3002 | } |
3003 | } | 3003 | } |
3004 | 3004 | ||
@@ -3039,7 +3039,7 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags) | |||
3039 | bio->bi_flags |= bio_flags; | 3039 | bio->bi_flags |= bio_flags; |
3040 | 3040 | ||
3041 | /* Take care of bh's that straddle the end of the device */ | 3041 | /* Take care of bh's that straddle the end of the device */ |
3042 | guard_bh_eod(rw, bio, bh); | 3042 | guard_bio_eod(rw, bio); |
3043 | 3043 | ||
3044 | if (buffer_meta(bh)) | 3044 | if (buffer_meta(bh)) |
3045 | rw |= REQ_META; | 3045 | rw |= REQ_META; |
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index d749731dc0ee..fbb08e97438d 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c | |||
@@ -50,18 +50,18 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) | |||
50 | cache->brun_percent < 100); | 50 | cache->brun_percent < 100); |
51 | 51 | ||
52 | if (*args) { | 52 | if (*args) { |
53 | pr_err("'bind' command doesn't take an argument"); | 53 | pr_err("'bind' command doesn't take an argument\n"); |
54 | return -EINVAL; | 54 | return -EINVAL; |
55 | } | 55 | } |
56 | 56 | ||
57 | if (!cache->rootdirname) { | 57 | if (!cache->rootdirname) { |
58 | pr_err("No cache directory specified"); | 58 | pr_err("No cache directory specified\n"); |
59 | return -EINVAL; | 59 | return -EINVAL; |
60 | } | 60 | } |
61 | 61 | ||
62 | /* don't permit already bound caches to be re-bound */ | 62 | /* don't permit already bound caches to be re-bound */ |
63 | if (test_bit(CACHEFILES_READY, &cache->flags)) { | 63 | if (test_bit(CACHEFILES_READY, &cache->flags)) { |
64 | pr_err("Cache already bound"); | 64 | pr_err("Cache already bound\n"); |
65 | return -EBUSY; | 65 | return -EBUSY; |
66 | } | 66 | } |
67 | 67 | ||
@@ -248,7 +248,7 @@ error_open_root: | |||
248 | kmem_cache_free(cachefiles_object_jar, fsdef); | 248 | kmem_cache_free(cachefiles_object_jar, fsdef); |
249 | error_root_object: | 249 | error_root_object: |
250 | cachefiles_end_secure(cache, saved_cred); | 250 | cachefiles_end_secure(cache, saved_cred); |
251 | pr_err("Failed to register: %d", ret); | 251 | pr_err("Failed to register: %d\n", ret); |
252 | return ret; | 252 | return ret; |
253 | } | 253 | } |
254 | 254 | ||
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index b078d3081d6c..ce1b115dcc28 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c | |||
@@ -315,7 +315,7 @@ static unsigned int cachefiles_daemon_poll(struct file *file, | |||
315 | static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, | 315 | static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, |
316 | char *args) | 316 | char *args) |
317 | { | 317 | { |
318 | pr_err("Free space limits must be in range 0%%<=stop<cull<run<100%%"); | 318 | pr_err("Free space limits must be in range 0%%<=stop<cull<run<100%%\n"); |
319 | 319 | ||
320 | return -EINVAL; | 320 | return -EINVAL; |
321 | } | 321 | } |
@@ -475,12 +475,12 @@ static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) | |||
475 | _enter(",%s", args); | 475 | _enter(",%s", args); |
476 | 476 | ||
477 | if (!*args) { | 477 | if (!*args) { |
478 | pr_err("Empty directory specified"); | 478 | pr_err("Empty directory specified\n"); |
479 | return -EINVAL; | 479 | return -EINVAL; |
480 | } | 480 | } |
481 | 481 | ||
482 | if (cache->rootdirname) { | 482 | if (cache->rootdirname) { |
483 | pr_err("Second cache directory specified"); | 483 | pr_err("Second cache directory specified\n"); |
484 | return -EEXIST; | 484 | return -EEXIST; |
485 | } | 485 | } |
486 | 486 | ||
@@ -503,12 +503,12 @@ static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) | |||
503 | _enter(",%s", args); | 503 | _enter(",%s", args); |
504 | 504 | ||
505 | if (!*args) { | 505 | if (!*args) { |
506 | pr_err("Empty security context specified"); | 506 | pr_err("Empty security context specified\n"); |
507 | return -EINVAL; | 507 | return -EINVAL; |
508 | } | 508 | } |
509 | 509 | ||
510 | if (cache->secctx) { | 510 | if (cache->secctx) { |
511 | pr_err("Second security context specified"); | 511 | pr_err("Second security context specified\n"); |
512 | return -EINVAL; | 512 | return -EINVAL; |
513 | } | 513 | } |
514 | 514 | ||
@@ -531,7 +531,7 @@ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) | |||
531 | _enter(",%s", args); | 531 | _enter(",%s", args); |
532 | 532 | ||
533 | if (!*args) { | 533 | if (!*args) { |
534 | pr_err("Empty tag specified"); | 534 | pr_err("Empty tag specified\n"); |
535 | return -EINVAL; | 535 | return -EINVAL; |
536 | } | 536 | } |
537 | 537 | ||
@@ -562,12 +562,12 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) | |||
562 | goto inval; | 562 | goto inval; |
563 | 563 | ||
564 | if (!test_bit(CACHEFILES_READY, &cache->flags)) { | 564 | if (!test_bit(CACHEFILES_READY, &cache->flags)) { |
565 | pr_err("cull applied to unready cache"); | 565 | pr_err("cull applied to unready cache\n"); |
566 | return -EIO; | 566 | return -EIO; |
567 | } | 567 | } |
568 | 568 | ||
569 | if (test_bit(CACHEFILES_DEAD, &cache->flags)) { | 569 | if (test_bit(CACHEFILES_DEAD, &cache->flags)) { |
570 | pr_err("cull applied to dead cache"); | 570 | pr_err("cull applied to dead cache\n"); |
571 | return -EIO; | 571 | return -EIO; |
572 | } | 572 | } |
573 | 573 | ||
@@ -587,11 +587,11 @@ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) | |||
587 | 587 | ||
588 | notdir: | 588 | notdir: |
589 | path_put(&path); | 589 | path_put(&path); |
590 | pr_err("cull command requires dirfd to be a directory"); | 590 | pr_err("cull command requires dirfd to be a directory\n"); |
591 | return -ENOTDIR; | 591 | return -ENOTDIR; |
592 | 592 | ||
593 | inval: | 593 | inval: |
594 | pr_err("cull command requires dirfd and filename"); | 594 | pr_err("cull command requires dirfd and filename\n"); |
595 | return -EINVAL; | 595 | return -EINVAL; |
596 | } | 596 | } |
597 | 597 | ||
@@ -614,7 +614,7 @@ static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args) | |||
614 | return 0; | 614 | return 0; |
615 | 615 | ||
616 | inval: | 616 | inval: |
617 | pr_err("debug command requires mask"); | 617 | pr_err("debug command requires mask\n"); |
618 | return -EINVAL; | 618 | return -EINVAL; |
619 | } | 619 | } |
620 | 620 | ||
@@ -634,12 +634,12 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) | |||
634 | goto inval; | 634 | goto inval; |
635 | 635 | ||
636 | if (!test_bit(CACHEFILES_READY, &cache->flags)) { | 636 | if (!test_bit(CACHEFILES_READY, &cache->flags)) { |
637 | pr_err("inuse applied to unready cache"); | 637 | pr_err("inuse applied to unready cache\n"); |
638 | return -EIO; | 638 | return -EIO; |
639 | } | 639 | } |
640 | 640 | ||
641 | if (test_bit(CACHEFILES_DEAD, &cache->flags)) { | 641 | if (test_bit(CACHEFILES_DEAD, &cache->flags)) { |
642 | pr_err("inuse applied to dead cache"); | 642 | pr_err("inuse applied to dead cache\n"); |
643 | return -EIO; | 643 | return -EIO; |
644 | } | 644 | } |
645 | 645 | ||
@@ -659,11 +659,11 @@ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) | |||
659 | 659 | ||
660 | notdir: | 660 | notdir: |
661 | path_put(&path); | 661 | path_put(&path); |
662 | pr_err("inuse command requires dirfd to be a directory"); | 662 | pr_err("inuse command requires dirfd to be a directory\n"); |
663 | return -ENOTDIR; | 663 | return -ENOTDIR; |
664 | 664 | ||
665 | inval: | 665 | inval: |
666 | pr_err("inuse command requires dirfd and filename"); | 666 | pr_err("inuse command requires dirfd and filename\n"); |
667 | return -EINVAL; | 667 | return -EINVAL; |
668 | } | 668 | } |
669 | 669 | ||
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 3d50998abf57..8c52472d2efa 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h | |||
@@ -255,7 +255,7 @@ extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, | |||
255 | 255 | ||
256 | #define cachefiles_io_error(___cache, FMT, ...) \ | 256 | #define cachefiles_io_error(___cache, FMT, ...) \ |
257 | do { \ | 257 | do { \ |
258 | pr_err("I/O Error: " FMT, ##__VA_ARGS__); \ | 258 | pr_err("I/O Error: " FMT"\n", ##__VA_ARGS__); \ |
259 | fscache_io_error(&(___cache)->cache); \ | 259 | fscache_io_error(&(___cache)->cache); \ |
260 | set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ | 260 | set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ |
261 | } while (0) | 261 | } while (0) |
diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c index 180edfb45f66..711f13d8c2de 100644 --- a/fs/cachefiles/main.c +++ b/fs/cachefiles/main.c | |||
@@ -84,7 +84,7 @@ error_proc: | |||
84 | error_object_jar: | 84 | error_object_jar: |
85 | misc_deregister(&cachefiles_dev); | 85 | misc_deregister(&cachefiles_dev); |
86 | error_dev: | 86 | error_dev: |
87 | pr_err("failed to register: %d", ret); | 87 | pr_err("failed to register: %d\n", ret); |
88 | return ret; | 88 | return ret; |
89 | } | 89 | } |
90 | 90 | ||
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 5bf2b41e66d3..dad7d9542a24 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c | |||
@@ -543,7 +543,7 @@ lookup_again: | |||
543 | next, next->d_inode, next->d_inode->i_ino); | 543 | next, next->d_inode, next->d_inode->i_ino); |
544 | 544 | ||
545 | } else if (!S_ISDIR(next->d_inode->i_mode)) { | 545 | } else if (!S_ISDIR(next->d_inode->i_mode)) { |
546 | pr_err("inode %lu is not a directory", | 546 | pr_err("inode %lu is not a directory\n", |
547 | next->d_inode->i_ino); | 547 | next->d_inode->i_ino); |
548 | ret = -ENOBUFS; | 548 | ret = -ENOBUFS; |
549 | goto error; | 549 | goto error; |
@@ -574,7 +574,7 @@ lookup_again: | |||
574 | } else if (!S_ISDIR(next->d_inode->i_mode) && | 574 | } else if (!S_ISDIR(next->d_inode->i_mode) && |
575 | !S_ISREG(next->d_inode->i_mode) | 575 | !S_ISREG(next->d_inode->i_mode) |
576 | ) { | 576 | ) { |
577 | pr_err("inode %lu is not a file or directory", | 577 | pr_err("inode %lu is not a file or directory\n", |
578 | next->d_inode->i_ino); | 578 | next->d_inode->i_ino); |
579 | ret = -ENOBUFS; | 579 | ret = -ENOBUFS; |
580 | goto error; | 580 | goto error; |
@@ -768,7 +768,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, | |||
768 | ASSERT(subdir->d_inode); | 768 | ASSERT(subdir->d_inode); |
769 | 769 | ||
770 | if (!S_ISDIR(subdir->d_inode->i_mode)) { | 770 | if (!S_ISDIR(subdir->d_inode->i_mode)) { |
771 | pr_err("%s is not a directory", dirname); | 771 | pr_err("%s is not a directory\n", dirname); |
772 | ret = -EIO; | 772 | ret = -EIO; |
773 | goto check_error; | 773 | goto check_error; |
774 | } | 774 | } |
@@ -779,7 +779,8 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, | |||
779 | !subdir->d_inode->i_op->lookup || | 779 | !subdir->d_inode->i_op->lookup || |
780 | !subdir->d_inode->i_op->mkdir || | 780 | !subdir->d_inode->i_op->mkdir || |
781 | !subdir->d_inode->i_op->create || | 781 | !subdir->d_inode->i_op->create || |
782 | !subdir->d_inode->i_op->rename || | 782 | (!subdir->d_inode->i_op->rename && |
783 | !subdir->d_inode->i_op->rename2) || | ||
783 | !subdir->d_inode->i_op->rmdir || | 784 | !subdir->d_inode->i_op->rmdir || |
784 | !subdir->d_inode->i_op->unlink) | 785 | !subdir->d_inode->i_op->unlink) |
785 | goto check_error; | 786 | goto check_error; |
@@ -795,13 +796,13 @@ check_error: | |||
795 | mkdir_error: | 796 | mkdir_error: |
796 | mutex_unlock(&dir->d_inode->i_mutex); | 797 | mutex_unlock(&dir->d_inode->i_mutex); |
797 | dput(subdir); | 798 | dput(subdir); |
798 | pr_err("mkdir %s failed with error %d", dirname, ret); | 799 | pr_err("mkdir %s failed with error %d\n", dirname, ret); |
799 | return ERR_PTR(ret); | 800 | return ERR_PTR(ret); |
800 | 801 | ||
801 | lookup_error: | 802 | lookup_error: |
802 | mutex_unlock(&dir->d_inode->i_mutex); | 803 | mutex_unlock(&dir->d_inode->i_mutex); |
803 | ret = PTR_ERR(subdir); | 804 | ret = PTR_ERR(subdir); |
804 | pr_err("Lookup %s failed with error %d", dirname, ret); | 805 | pr_err("Lookup %s failed with error %d\n", dirname, ret); |
805 | return ERR_PTR(ret); | 806 | return ERR_PTR(ret); |
806 | 807 | ||
807 | nomem_d_alloc: | 808 | nomem_d_alloc: |
@@ -891,7 +892,7 @@ lookup_error: | |||
891 | if (ret == -EIO) { | 892 | if (ret == -EIO) { |
892 | cachefiles_io_error(cache, "Lookup failed"); | 893 | cachefiles_io_error(cache, "Lookup failed"); |
893 | } else if (ret != -ENOMEM) { | 894 | } else if (ret != -ENOMEM) { |
894 | pr_err("Internal error: %d", ret); | 895 | pr_err("Internal error: %d\n", ret); |
895 | ret = -EIO; | 896 | ret = -EIO; |
896 | } | 897 | } |
897 | 898 | ||
@@ -950,7 +951,7 @@ error: | |||
950 | } | 951 | } |
951 | 952 | ||
952 | if (ret != -ENOMEM) { | 953 | if (ret != -ENOMEM) { |
953 | pr_err("Internal error: %d", ret); | 954 | pr_err("Internal error: %d\n", ret); |
954 | ret = -EIO; | 955 | ret = -EIO; |
955 | } | 956 | } |
956 | 957 | ||
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 4b1fb5ca65b8..25e745b8eb1b 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c | |||
@@ -151,7 +151,6 @@ static void cachefiles_read_copier(struct fscache_operation *_op) | |||
151 | struct cachefiles_one_read *monitor; | 151 | struct cachefiles_one_read *monitor; |
152 | struct cachefiles_object *object; | 152 | struct cachefiles_object *object; |
153 | struct fscache_retrieval *op; | 153 | struct fscache_retrieval *op; |
154 | struct pagevec pagevec; | ||
155 | int error, max; | 154 | int error, max; |
156 | 155 | ||
157 | op = container_of(_op, struct fscache_retrieval, op); | 156 | op = container_of(_op, struct fscache_retrieval, op); |
@@ -160,8 +159,6 @@ static void cachefiles_read_copier(struct fscache_operation *_op) | |||
160 | 159 | ||
161 | _enter("{ino=%lu}", object->backer->d_inode->i_ino); | 160 | _enter("{ino=%lu}", object->backer->d_inode->i_ino); |
162 | 161 | ||
163 | pagevec_init(&pagevec, 0); | ||
164 | |||
165 | max = 8; | 162 | max = 8; |
166 | spin_lock_irq(&object->work_lock); | 163 | spin_lock_irq(&object->work_lock); |
167 | 164 | ||
@@ -396,7 +393,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, | |||
396 | { | 393 | { |
397 | struct cachefiles_object *object; | 394 | struct cachefiles_object *object; |
398 | struct cachefiles_cache *cache; | 395 | struct cachefiles_cache *cache; |
399 | struct pagevec pagevec; | ||
400 | struct inode *inode; | 396 | struct inode *inode; |
401 | sector_t block0, block; | 397 | sector_t block0, block; |
402 | unsigned shift; | 398 | unsigned shift; |
@@ -427,8 +423,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, | |||
427 | op->op.flags |= FSCACHE_OP_ASYNC; | 423 | op->op.flags |= FSCACHE_OP_ASYNC; |
428 | op->op.processor = cachefiles_read_copier; | 424 | op->op.processor = cachefiles_read_copier; |
429 | 425 | ||
430 | pagevec_init(&pagevec, 0); | ||
431 | |||
432 | /* we assume the absence or presence of the first block is a good | 426 | /* we assume the absence or presence of the first block is a good |
433 | * enough indication for the page as a whole | 427 | * enough indication for the page as a whole |
434 | * - TODO: don't use bmap() for this as it is _not_ actually good | 428 | * - TODO: don't use bmap() for this as it is _not_ actually good |
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 1ad51ffbb275..acbc1f094fb1 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c | |||
@@ -51,7 +51,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object) | |||
51 | } | 51 | } |
52 | 52 | ||
53 | if (ret != -EEXIST) { | 53 | if (ret != -EEXIST) { |
54 | pr_err("Can't set xattr on %*.*s [%lu] (err %d)", | 54 | pr_err("Can't set xattr on %*.*s [%lu] (err %d)\n", |
55 | dentry->d_name.len, dentry->d_name.len, | 55 | dentry->d_name.len, dentry->d_name.len, |
56 | dentry->d_name.name, dentry->d_inode->i_ino, | 56 | dentry->d_name.name, dentry->d_inode->i_ino, |
57 | -ret); | 57 | -ret); |
@@ -64,7 +64,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object) | |||
64 | if (ret == -ERANGE) | 64 | if (ret == -ERANGE) |
65 | goto bad_type_length; | 65 | goto bad_type_length; |
66 | 66 | ||
67 | pr_err("Can't read xattr on %*.*s [%lu] (err %d)", | 67 | pr_err("Can't read xattr on %*.*s [%lu] (err %d)\n", |
68 | dentry->d_name.len, dentry->d_name.len, | 68 | dentry->d_name.len, dentry->d_name.len, |
69 | dentry->d_name.name, dentry->d_inode->i_ino, | 69 | dentry->d_name.name, dentry->d_inode->i_ino, |
70 | -ret); | 70 | -ret); |
@@ -85,14 +85,14 @@ error: | |||
85 | return ret; | 85 | return ret; |
86 | 86 | ||
87 | bad_type_length: | 87 | bad_type_length: |
88 | pr_err("Cache object %lu type xattr length incorrect", | 88 | pr_err("Cache object %lu type xattr length incorrect\n", |
89 | dentry->d_inode->i_ino); | 89 | dentry->d_inode->i_ino); |
90 | ret = -EIO; | 90 | ret = -EIO; |
91 | goto error; | 91 | goto error; |
92 | 92 | ||
93 | bad_type: | 93 | bad_type: |
94 | xtype[2] = 0; | 94 | xtype[2] = 0; |
95 | pr_err("Cache object %*.*s [%lu] type %s not %s", | 95 | pr_err("Cache object %*.*s [%lu] type %s not %s\n", |
96 | dentry->d_name.len, dentry->d_name.len, | 96 | dentry->d_name.len, dentry->d_name.len, |
97 | dentry->d_name.name, dentry->d_inode->i_ino, | 97 | dentry->d_name.name, dentry->d_inode->i_ino, |
98 | xtype, type); | 98 | xtype, type); |
@@ -293,7 +293,7 @@ error: | |||
293 | return ret; | 293 | return ret; |
294 | 294 | ||
295 | bad_type_length: | 295 | bad_type_length: |
296 | pr_err("Cache object %lu xattr length incorrect", | 296 | pr_err("Cache object %lu xattr length incorrect\n", |
297 | dentry->d_inode->i_ino); | 297 | dentry->d_inode->i_ino); |
298 | ret = -EIO; | 298 | ret = -EIO; |
299 | goto error; | 299 | goto error; |
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 603f18a65c12..a2172f3f69e3 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig | |||
@@ -22,6 +22,11 @@ config CIFS | |||
22 | support for OS/2 and Windows ME and similar servers is provided as | 22 | support for OS/2 and Windows ME and similar servers is provided as |
23 | well. | 23 | well. |
24 | 24 | ||
25 | The module also provides optional support for the followon | ||
26 | protocols for CIFS including SMB3, which enables | ||
27 | useful performance and security features (see the description | ||
28 | of CONFIG_CIFS_SMB2). | ||
29 | |||
25 | The cifs module provides an advanced network file system | 30 | The cifs module provides an advanced network file system |
26 | client for mounting to CIFS compliant servers. It includes | 31 | client for mounting to CIFS compliant servers. It includes |
27 | support for DFS (hierarchical name space), secure per-user | 32 | support for DFS (hierarchical name space), secure per-user |
@@ -121,7 +126,8 @@ config CIFS_ACL | |||
121 | depends on CIFS_XATTR && KEYS | 126 | depends on CIFS_XATTR && KEYS |
122 | help | 127 | help |
123 | Allows fetching CIFS/NTFS ACL from the server. The DACL blob | 128 | Allows fetching CIFS/NTFS ACL from the server. The DACL blob |
124 | is handed over to the application/caller. | 129 | is handed over to the application/caller. See the man |
130 | page for getcifsacl for more information. | ||
125 | 131 | ||
126 | config CIFS_DEBUG | 132 | config CIFS_DEBUG |
127 | bool "Enable CIFS debugging routines" | 133 | bool "Enable CIFS debugging routines" |
@@ -162,7 +168,7 @@ config CIFS_NFSD_EXPORT | |||
162 | Allows NFS server to export a CIFS mounted share (nfsd over cifs) | 168 | Allows NFS server to export a CIFS mounted share (nfsd over cifs) |
163 | 169 | ||
164 | config CIFS_SMB2 | 170 | config CIFS_SMB2 |
165 | bool "SMB2 network file system support" | 171 | bool "SMB2 and SMB3 network file system support" |
166 | depends on CIFS && INET | 172 | depends on CIFS && INET |
167 | select NLS | 173 | select NLS |
168 | select KEYS | 174 | select KEYS |
@@ -170,16 +176,21 @@ config CIFS_SMB2 | |||
170 | select DNS_RESOLVER | 176 | select DNS_RESOLVER |
171 | 177 | ||
172 | help | 178 | help |
173 | This enables experimental support for the SMB2 (Server Message Block | 179 | This enables support for the Server Message Block version 2 |
174 | version 2) protocol. The SMB2 protocol is the successor to the | 180 | family of protocols, including SMB3. SMB3 support is |
175 | popular CIFS and SMB network file sharing protocols. SMB2 is the | 181 | enabled on mount by specifying "vers=3.0" in the mount |
176 | native file sharing mechanism for recent versions of Windows | 182 | options. These protocols are the successors to the popular |
177 | operating systems (since Vista). SMB2 enablement will eventually | 183 | CIFS and SMB network file sharing protocols. SMB3 is the |
178 | allow users better performance, security and features, than would be | 184 | native file sharing mechanism for the more recent |
179 | possible with cifs. Note that smb2 mount options also are simpler | 185 | versions of Windows (Windows 8 and Windows 2012 and |
180 | (compared to cifs) due to protocol improvements. | 186 | later) and Samba server and many others support SMB3 well. |
181 | 187 | In general SMB3 enables better performance, security | |
182 | Unless you are a developer or tester, say N. | 188 | and features, than would be possible with CIFS (Note that |
189 | when mounting to Samba, due to the CIFS POSIX extensions, | ||
190 | CIFS mounts can provide slightly better POSIX compatibility | ||
191 | than SMB3 mounts do though). Note that SMB2/SMB3 mount | ||
192 | options are also slightly simpler (compared to CIFS) due | ||
193 | to protocol improvements. | ||
183 | 194 | ||
184 | config CIFS_FSCACHE | 195 | config CIFS_FSCACHE |
185 | bool "Provide CIFS client caching support" | 196 | bool "Provide CIFS client caching support" |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 85c70d5969ac..9d7996e8e793 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -207,6 +207,19 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
207 | return 0; | 207 | return 0; |
208 | } | 208 | } |
209 | 209 | ||
210 | static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) | ||
211 | { | ||
212 | struct super_block *sb = file->f_path.dentry->d_sb; | ||
213 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | ||
214 | struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); | ||
215 | struct TCP_Server_Info *server = tcon->ses->server; | ||
216 | |||
217 | if (server->ops->fallocate) | ||
218 | return server->ops->fallocate(file, tcon, mode, off, len); | ||
219 | |||
220 | return -EOPNOTSUPP; | ||
221 | } | ||
222 | |||
210 | static int cifs_permission(struct inode *inode, int mask) | 223 | static int cifs_permission(struct inode *inode, int mask) |
211 | { | 224 | { |
212 | struct cifs_sb_info *cifs_sb; | 225 | struct cifs_sb_info *cifs_sb; |
@@ -813,8 +826,9 @@ cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv | |||
813 | if (!(S_ISREG(inode->i_mode))) | 826 | if (!(S_ISREG(inode->i_mode))) |
814 | return -EINVAL; | 827 | return -EINVAL; |
815 | 828 | ||
816 | /* check if file is oplocked */ | 829 | /* Check if file is oplocked if this is request for new lease */ |
817 | if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) || | 830 | if (arg == F_UNLCK || |
831 | ((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) || | ||
818 | ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode)))) | 832 | ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode)))) |
819 | return generic_setlease(file, arg, lease, priv); | 833 | return generic_setlease(file, arg, lease, priv); |
820 | else if (tlink_tcon(cfile->tlink)->local_lease && | 834 | else if (tlink_tcon(cfile->tlink)->local_lease && |
@@ -909,6 +923,7 @@ const struct file_operations cifs_file_ops = { | |||
909 | .unlocked_ioctl = cifs_ioctl, | 923 | .unlocked_ioctl = cifs_ioctl, |
910 | #endif /* CONFIG_CIFS_POSIX */ | 924 | #endif /* CONFIG_CIFS_POSIX */ |
911 | .setlease = cifs_setlease, | 925 | .setlease = cifs_setlease, |
926 | .fallocate = cifs_fallocate, | ||
912 | }; | 927 | }; |
913 | 928 | ||
914 | const struct file_operations cifs_file_strict_ops = { | 929 | const struct file_operations cifs_file_strict_ops = { |
@@ -928,6 +943,7 @@ const struct file_operations cifs_file_strict_ops = { | |||
928 | .unlocked_ioctl = cifs_ioctl, | 943 | .unlocked_ioctl = cifs_ioctl, |
929 | #endif /* CONFIG_CIFS_POSIX */ | 944 | #endif /* CONFIG_CIFS_POSIX */ |
930 | .setlease = cifs_setlease, | 945 | .setlease = cifs_setlease, |
946 | .fallocate = cifs_fallocate, | ||
931 | }; | 947 | }; |
932 | 948 | ||
933 | const struct file_operations cifs_file_direct_ops = { | 949 | const struct file_operations cifs_file_direct_ops = { |
@@ -948,6 +964,7 @@ const struct file_operations cifs_file_direct_ops = { | |||
948 | #endif /* CONFIG_CIFS_POSIX */ | 964 | #endif /* CONFIG_CIFS_POSIX */ |
949 | .llseek = cifs_llseek, | 965 | .llseek = cifs_llseek, |
950 | .setlease = cifs_setlease, | 966 | .setlease = cifs_setlease, |
967 | .fallocate = cifs_fallocate, | ||
951 | }; | 968 | }; |
952 | 969 | ||
953 | const struct file_operations cifs_file_nobrl_ops = { | 970 | const struct file_operations cifs_file_nobrl_ops = { |
@@ -966,6 +983,7 @@ const struct file_operations cifs_file_nobrl_ops = { | |||
966 | .unlocked_ioctl = cifs_ioctl, | 983 | .unlocked_ioctl = cifs_ioctl, |
967 | #endif /* CONFIG_CIFS_POSIX */ | 984 | #endif /* CONFIG_CIFS_POSIX */ |
968 | .setlease = cifs_setlease, | 985 | .setlease = cifs_setlease, |
986 | .fallocate = cifs_fallocate, | ||
969 | }; | 987 | }; |
970 | 988 | ||
971 | const struct file_operations cifs_file_strict_nobrl_ops = { | 989 | const struct file_operations cifs_file_strict_nobrl_ops = { |
@@ -984,6 +1002,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { | |||
984 | .unlocked_ioctl = cifs_ioctl, | 1002 | .unlocked_ioctl = cifs_ioctl, |
985 | #endif /* CONFIG_CIFS_POSIX */ | 1003 | #endif /* CONFIG_CIFS_POSIX */ |
986 | .setlease = cifs_setlease, | 1004 | .setlease = cifs_setlease, |
1005 | .fallocate = cifs_fallocate, | ||
987 | }; | 1006 | }; |
988 | 1007 | ||
989 | const struct file_operations cifs_file_direct_nobrl_ops = { | 1008 | const struct file_operations cifs_file_direct_nobrl_ops = { |
@@ -1003,6 +1022,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { | |||
1003 | #endif /* CONFIG_CIFS_POSIX */ | 1022 | #endif /* CONFIG_CIFS_POSIX */ |
1004 | .llseek = cifs_llseek, | 1023 | .llseek = cifs_llseek, |
1005 | .setlease = cifs_setlease, | 1024 | .setlease = cifs_setlease, |
1025 | .fallocate = cifs_fallocate, | ||
1006 | }; | 1026 | }; |
1007 | 1027 | ||
1008 | const struct file_operations cifs_dir_ops = { | 1028 | const struct file_operations cifs_dir_ops = { |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index b0fafa499505..002e0c173939 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
136 | extern const struct export_operations cifs_export_ops; | 136 | extern const struct export_operations cifs_export_ops; |
137 | #endif /* CONFIG_CIFS_NFSD_EXPORT */ | 137 | #endif /* CONFIG_CIFS_NFSD_EXPORT */ |
138 | 138 | ||
139 | #define CIFS_VERSION "2.04" | 139 | #define CIFS_VERSION "2.05" |
140 | #endif /* _CIFSFS_H */ | 140 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0012e1e291d4..25b8392bfdd2 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -70,11 +70,6 @@ | |||
70 | #define SERVER_NAME_LENGTH 40 | 70 | #define SERVER_NAME_LENGTH 40 |
71 | #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) | 71 | #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) |
72 | 72 | ||
73 | /* used to define string lengths for reversing unicode strings */ | ||
74 | /* (256+1)*2 = 514 */ | ||
75 | /* (max path length + 1 for null) * 2 for unicode */ | ||
76 | #define MAX_NAME 514 | ||
77 | |||
78 | /* SMB echo "timeout" -- FIXME: tunable? */ | 73 | /* SMB echo "timeout" -- FIXME: tunable? */ |
79 | #define SMB_ECHO_INTERVAL (60 * HZ) | 74 | #define SMB_ECHO_INTERVAL (60 * HZ) |
80 | 75 | ||
@@ -409,6 +404,10 @@ struct smb_version_operations { | |||
409 | /* get mtu credits */ | 404 | /* get mtu credits */ |
410 | int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int, | 405 | int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int, |
411 | unsigned int *, unsigned int *); | 406 | unsigned int *, unsigned int *); |
407 | /* check if we need to issue closedir */ | ||
408 | bool (*dir_needs_close)(struct cifsFileInfo *); | ||
409 | long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t, | ||
410 | loff_t); | ||
412 | }; | 411 | }; |
413 | 412 | ||
414 | struct smb_version_values { | 413 | struct smb_version_values { |
@@ -883,6 +882,7 @@ struct cifs_tcon { | |||
883 | for this mount even if server would support */ | 882 | for this mount even if server would support */ |
884 | bool local_lease:1; /* check leases (only) on local system not remote */ | 883 | bool local_lease:1; /* check leases (only) on local system not remote */ |
885 | bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ | 884 | bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ |
885 | bool broken_sparse_sup; /* if server or share does not support sparse */ | ||
886 | bool need_reconnect:1; /* connection reset, tid now invalid */ | 886 | bool need_reconnect:1; /* connection reset, tid now invalid */ |
887 | #ifdef CONFIG_CIFS_SMB2 | 887 | #ifdef CONFIG_CIFS_SMB2 |
888 | bool print:1; /* set if connection to printer share */ | 888 | bool print:1; /* set if connection to printer share */ |
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 33df36ef9d52..5f9822ac0245 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h | |||
@@ -2253,6 +2253,29 @@ typedef struct { | |||
2253 | /* minimum includes first three fields, and empty FS Name */ | 2253 | /* minimum includes first three fields, and empty FS Name */ |
2254 | #define MIN_FS_ATTR_INFO_SIZE 12 | 2254 | #define MIN_FS_ATTR_INFO_SIZE 12 |
2255 | 2255 | ||
2256 | |||
2257 | /* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */ | ||
2258 | #define FILE_SUPPORT_INTEGRITY_STREAMS 0x04000000 | ||
2259 | #define FILE_SUPPORTS_USN_JOURNAL 0x02000000 | ||
2260 | #define FILE_SUPPORTS_OPEN_BY_FILE_ID 0x01000000 | ||
2261 | #define FILE_SUPPORTS_EXTENDED_ATTRIBUTES 0x00800000 | ||
2262 | #define FILE_SUPPORTS_HARD_LINKS 0x00400000 | ||
2263 | #define FILE_SUPPORTS_TRANSACTIONS 0x00200000 | ||
2264 | #define FILE_SEQUENTIAL_WRITE_ONCE 0x00100000 | ||
2265 | #define FILE_READ_ONLY_VOLUME 0x00080000 | ||
2266 | #define FILE_NAMED_STREAMS 0x00040000 | ||
2267 | #define FILE_SUPPORTS_ENCRYPTION 0x00020000 | ||
2268 | #define FILE_SUPPORTS_OBJECT_IDS 0x00010000 | ||
2269 | #define FILE_VOLUME_IS_COMPRESSED 0x00008000 | ||
2270 | #define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100 | ||
2271 | #define FILE_SUPPORTS_REPARSE_POINTS 0x00000080 | ||
2272 | #define FILE_SUPPORTS_SPARSE_FILES 0x00000040 | ||
2273 | #define FILE_VOLUME_QUOTAS 0x00000020 | ||
2274 | #define FILE_FILE_COMPRESSION 0x00000010 | ||
2275 | #define FILE_PERSISTENT_ACLS 0x00000008 | ||
2276 | #define FILE_UNICODE_ON_DISK 0x00000004 | ||
2277 | #define FILE_CASE_PRESERVED_NAMES 0x00000002 | ||
2278 | #define FILE_CASE_SENSITIVE_SEARCH 0x00000001 | ||
2256 | typedef struct { | 2279 | typedef struct { |
2257 | __le32 Attributes; | 2280 | __le32 Attributes; |
2258 | __le32 MaxPathNameComponentLength; | 2281 | __le32 MaxPathNameComponentLength; |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 03ed8a09581c..36ca2045009b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -1600,6 +1600,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1600 | tmp_end++; | 1600 | tmp_end++; |
1601 | if (!(tmp_end < end && tmp_end[1] == delim)) { | 1601 | if (!(tmp_end < end && tmp_end[1] == delim)) { |
1602 | /* No it is not. Set the password to NULL */ | 1602 | /* No it is not. Set the password to NULL */ |
1603 | kfree(vol->password); | ||
1603 | vol->password = NULL; | 1604 | vol->password = NULL; |
1604 | break; | 1605 | break; |
1605 | } | 1606 | } |
@@ -1637,6 +1638,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1637 | options = end; | 1638 | options = end; |
1638 | } | 1639 | } |
1639 | 1640 | ||
1641 | kfree(vol->password); | ||
1640 | /* Now build new password string */ | 1642 | /* Now build new password string */ |
1641 | temp_len = strlen(value); | 1643 | temp_len = strlen(value); |
1642 | vol->password = kzalloc(temp_len+1, GFP_KERNEL); | 1644 | vol->password = kzalloc(temp_len+1, GFP_KERNEL); |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 3db0c5fd9a11..6cbd9c688cfe 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -497,6 +497,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, | |||
497 | goto out; | 497 | goto out; |
498 | } | 498 | } |
499 | 499 | ||
500 | if (file->f_flags & O_DIRECT && | ||
501 | CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { | ||
502 | if (CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) | ||
503 | file->f_op = &cifs_file_direct_nobrl_ops; | ||
504 | else | ||
505 | file->f_op = &cifs_file_direct_ops; | ||
506 | } | ||
507 | |||
500 | file_info = cifs_new_fileinfo(&fid, file, tlink, oplock); | 508 | file_info = cifs_new_fileinfo(&fid, file, tlink, oplock); |
501 | if (file_info == NULL) { | 509 | if (file_info == NULL) { |
502 | if (server->ops->close) | 510 | if (server->ops->close) |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4ab2f79ffa7a..5f29354b072a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -467,6 +467,14 @@ int cifs_open(struct inode *inode, struct file *file) | |||
467 | cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", | 467 | cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", |
468 | inode, file->f_flags, full_path); | 468 | inode, file->f_flags, full_path); |
469 | 469 | ||
470 | if (file->f_flags & O_DIRECT && | ||
471 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { | ||
472 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) | ||
473 | file->f_op = &cifs_file_direct_nobrl_ops; | ||
474 | else | ||
475 | file->f_op = &cifs_file_direct_ops; | ||
476 | } | ||
477 | |||
470 | if (server->oplocks) | 478 | if (server->oplocks) |
471 | oplock = REQ_OPLOCK; | 479 | oplock = REQ_OPLOCK; |
472 | else | 480 | else |
@@ -762,7 +770,7 @@ int cifs_closedir(struct inode *inode, struct file *file) | |||
762 | 770 | ||
763 | cifs_dbg(FYI, "Freeing private data in close dir\n"); | 771 | cifs_dbg(FYI, "Freeing private data in close dir\n"); |
764 | spin_lock(&cifs_file_list_lock); | 772 | spin_lock(&cifs_file_list_lock); |
765 | if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { | 773 | if (server->ops->dir_needs_close(cfile)) { |
766 | cfile->invalidHandle = true; | 774 | cfile->invalidHandle = true; |
767 | spin_unlock(&cifs_file_list_lock); | 775 | spin_unlock(&cifs_file_list_lock); |
768 | if (server->ops->close_dir) | 776 | if (server->ops->close_dir) |
@@ -3560,15 +3568,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, | |||
3560 | lru_cache_add_file(page); | 3568 | lru_cache_add_file(page); |
3561 | unlock_page(page); | 3569 | unlock_page(page); |
3562 | page_cache_release(page); | 3570 | page_cache_release(page); |
3563 | if (rc == -EAGAIN) | ||
3564 | list_add_tail(&page->lru, &tmplist); | ||
3565 | } | 3571 | } |
3572 | /* Fallback to the readpage in error/reconnect cases */ | ||
3566 | kref_put(&rdata->refcount, cifs_readdata_release); | 3573 | kref_put(&rdata->refcount, cifs_readdata_release); |
3567 | if (rc == -EAGAIN) { | ||
3568 | /* Re-add pages to the page_list and retry */ | ||
3569 | list_splice(&tmplist, page_list); | ||
3570 | continue; | ||
3571 | } | ||
3572 | break; | 3574 | break; |
3573 | } | 3575 | } |
3574 | 3576 | ||
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 426d6c6ad8bf..7899a40465b3 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -1720,13 +1720,22 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, | |||
1720 | unlink_target: | 1720 | unlink_target: |
1721 | /* Try unlinking the target dentry if it's not negative */ | 1721 | /* Try unlinking the target dentry if it's not negative */ |
1722 | if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) { | 1722 | if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) { |
1723 | tmprc = cifs_unlink(target_dir, target_dentry); | 1723 | if (d_is_dir(target_dentry)) |
1724 | tmprc = cifs_rmdir(target_dir, target_dentry); | ||
1725 | else | ||
1726 | tmprc = cifs_unlink(target_dir, target_dentry); | ||
1724 | if (tmprc) | 1727 | if (tmprc) |
1725 | goto cifs_rename_exit; | 1728 | goto cifs_rename_exit; |
1726 | rc = cifs_do_rename(xid, source_dentry, from_name, | 1729 | rc = cifs_do_rename(xid, source_dentry, from_name, |
1727 | target_dentry, to_name); | 1730 | target_dentry, to_name); |
1728 | } | 1731 | } |
1729 | 1732 | ||
1733 | /* force revalidate to go get info when needed */ | ||
1734 | CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; | ||
1735 | |||
1736 | source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime = | ||
1737 | target_dir->i_mtime = current_fs_time(source_dir->i_sb); | ||
1738 | |||
1730 | cifs_rename_exit: | 1739 | cifs_rename_exit: |
1731 | kfree(info_buf_source); | 1740 | kfree(info_buf_source); |
1732 | kfree(from_name); | 1741 | kfree(from_name); |
diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 68559fd557fb..5657416d3483 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c | |||
@@ -213,8 +213,12 @@ create_mf_symlink(const unsigned int xid, struct cifs_tcon *tcon, | |||
213 | if (rc) | 213 | if (rc) |
214 | goto out; | 214 | goto out; |
215 | 215 | ||
216 | rc = tcon->ses->server->ops->create_mf_symlink(xid, tcon, cifs_sb, | 216 | if (tcon->ses->server->ops->create_mf_symlink) |
217 | fromName, buf, &bytes_written); | 217 | rc = tcon->ses->server->ops->create_mf_symlink(xid, tcon, |
218 | cifs_sb, fromName, buf, &bytes_written); | ||
219 | else | ||
220 | rc = -EOPNOTSUPP; | ||
221 | |||
218 | if (rc) | 222 | if (rc) |
219 | goto out; | 223 | goto out; |
220 | 224 | ||
@@ -339,9 +343,11 @@ cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, | |||
339 | if (rc) | 343 | if (rc) |
340 | return rc; | 344 | return rc; |
341 | 345 | ||
342 | if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) | 346 | if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) { |
347 | rc = -ENOENT; | ||
343 | /* it's not a symlink */ | 348 | /* it's not a symlink */ |
344 | goto out; | 349 | goto out; |
350 | } | ||
345 | 351 | ||
346 | io_parms.netfid = fid.netfid; | 352 | io_parms.netfid = fid.netfid; |
347 | io_parms.pid = current->tgid; | 353 | io_parms.pid = current->tgid; |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 81340c6253eb..b7415d596dbd 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -574,13 +574,6 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) | |||
574 | cinode->oplock = 0; | 574 | cinode->oplock = 0; |
575 | } | 575 | } |
576 | 576 | ||
577 | static int | ||
578 | cifs_oplock_break_wait(void *unused) | ||
579 | { | ||
580 | schedule(); | ||
581 | return signal_pending(current) ? -ERESTARTSYS : 0; | ||
582 | } | ||
583 | |||
584 | /* | 577 | /* |
585 | * We wait for oplock breaks to be processed before we attempt to perform | 578 | * We wait for oplock breaks to be processed before we attempt to perform |
586 | * writes. | 579 | * writes. |
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 6834b9c3bec1..b333ff60781d 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c | |||
@@ -925,11 +925,23 @@ cifs_NTtimeToUnix(__le64 ntutc) | |||
925 | /* BB what about the timezone? BB */ | 925 | /* BB what about the timezone? BB */ |
926 | 926 | ||
927 | /* Subtract the NTFS time offset, then convert to 1s intervals. */ | 927 | /* Subtract the NTFS time offset, then convert to 1s intervals. */ |
928 | u64 t; | 928 | s64 t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET; |
929 | |||
930 | /* | ||
931 | * Unfortunately can not use normal 64 bit division on 32 bit arch, but | ||
932 | * the alternative, do_div, does not work with negative numbers so have | ||
933 | * to special case them | ||
934 | */ | ||
935 | if (t < 0) { | ||
936 | t = -t; | ||
937 | ts.tv_nsec = (long)(do_div(t, 10000000) * 100); | ||
938 | ts.tv_nsec = -ts.tv_nsec; | ||
939 | ts.tv_sec = -t; | ||
940 | } else { | ||
941 | ts.tv_nsec = (long)do_div(t, 10000000) * 100; | ||
942 | ts.tv_sec = t; | ||
943 | } | ||
929 | 944 | ||
930 | t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET; | ||
931 | ts.tv_nsec = do_div(t, 10000000) * 100; | ||
932 | ts.tv_sec = t; | ||
933 | return ts; | 945 | return ts; |
934 | } | 946 | } |
935 | 947 | ||
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b15862e0f68c..b334a89d6a66 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -593,11 +593,11 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, | |||
593 | /* close and restart search */ | 593 | /* close and restart search */ |
594 | cifs_dbg(FYI, "search backing up - close and restart search\n"); | 594 | cifs_dbg(FYI, "search backing up - close and restart search\n"); |
595 | spin_lock(&cifs_file_list_lock); | 595 | spin_lock(&cifs_file_list_lock); |
596 | if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { | 596 | if (server->ops->dir_needs_close(cfile)) { |
597 | cfile->invalidHandle = true; | 597 | cfile->invalidHandle = true; |
598 | spin_unlock(&cifs_file_list_lock); | 598 | spin_unlock(&cifs_file_list_lock); |
599 | if (server->ops->close) | 599 | if (server->ops->close_dir) |
600 | server->ops->close(xid, tcon, &cfile->fid); | 600 | server->ops->close_dir(xid, tcon, &cfile->fid); |
601 | } else | 601 | } else |
602 | spin_unlock(&cifs_file_list_lock); | 602 | spin_unlock(&cifs_file_list_lock); |
603 | if (cfile->srch_inf.ntwrk_buf_start) { | 603 | if (cfile->srch_inf.ntwrk_buf_start) { |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 39ee32688eac..57db63ff88da 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -243,10 +243,11 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft, | |||
243 | kfree(ses->serverOS); | 243 | kfree(ses->serverOS); |
244 | 244 | ||
245 | ses->serverOS = kzalloc(len + 1, GFP_KERNEL); | 245 | ses->serverOS = kzalloc(len + 1, GFP_KERNEL); |
246 | if (ses->serverOS) | 246 | if (ses->serverOS) { |
247 | strncpy(ses->serverOS, bcc_ptr, len); | 247 | strncpy(ses->serverOS, bcc_ptr, len); |
248 | if (strncmp(ses->serverOS, "OS/2", 4) == 0) | 248 | if (strncmp(ses->serverOS, "OS/2", 4) == 0) |
249 | cifs_dbg(FYI, "OS/2 server\n"); | 249 | cifs_dbg(FYI, "OS/2 server\n"); |
250 | } | ||
250 | 251 | ||
251 | bcc_ptr += len + 1; | 252 | bcc_ptr += len + 1; |
252 | bleft -= len + 1; | 253 | bleft -= len + 1; |
@@ -744,14 +745,6 @@ out: | |||
744 | sess_free_buffer(sess_data); | 745 | sess_free_buffer(sess_data); |
745 | } | 746 | } |
746 | 747 | ||
747 | #else | ||
748 | |||
749 | static void | ||
750 | sess_auth_lanman(struct sess_data *sess_data) | ||
751 | { | ||
752 | sess_data->result = -EOPNOTSUPP; | ||
753 | sess_data->func = NULL; | ||
754 | } | ||
755 | #endif | 748 | #endif |
756 | 749 | ||
757 | static void | 750 | static void |
@@ -1102,15 +1095,6 @@ out: | |||
1102 | ses->auth_key.response = NULL; | 1095 | ses->auth_key.response = NULL; |
1103 | } | 1096 | } |
1104 | 1097 | ||
1105 | #else | ||
1106 | |||
1107 | static void | ||
1108 | sess_auth_kerberos(struct sess_data *sess_data) | ||
1109 | { | ||
1110 | cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n"); | ||
1111 | sess_data->result = -ENOSYS; | ||
1112 | sess_data->func = NULL; | ||
1113 | } | ||
1114 | #endif /* ! CONFIG_CIFS_UPCALL */ | 1098 | #endif /* ! CONFIG_CIFS_UPCALL */ |
1115 | 1099 | ||
1116 | /* | 1100 | /* |
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 5e8c22d6c7b9..52131d8cb4d5 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c | |||
@@ -586,7 +586,7 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, | |||
586 | tmprc = CIFS_open(xid, &oparms, &oplock, NULL); | 586 | tmprc = CIFS_open(xid, &oparms, &oplock, NULL); |
587 | if (tmprc == -EOPNOTSUPP) | 587 | if (tmprc == -EOPNOTSUPP) |
588 | *symlink = true; | 588 | *symlink = true; |
589 | else | 589 | else if (tmprc == 0) |
590 | CIFSSMBClose(xid, tcon, fid.netfid); | 590 | CIFSSMBClose(xid, tcon, fid.netfid); |
591 | } | 591 | } |
592 | 592 | ||
@@ -1015,6 +1015,12 @@ cifs_wp_retry_size(struct inode *inode) | |||
1015 | return CIFS_SB(inode->i_sb)->wsize; | 1015 | return CIFS_SB(inode->i_sb)->wsize; |
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | static bool | ||
1019 | cifs_dir_needs_close(struct cifsFileInfo *cfile) | ||
1020 | { | ||
1021 | return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle; | ||
1022 | } | ||
1023 | |||
1018 | struct smb_version_operations smb1_operations = { | 1024 | struct smb_version_operations smb1_operations = { |
1019 | .send_cancel = send_nt_cancel, | 1025 | .send_cancel = send_nt_cancel, |
1020 | .compare_fids = cifs_compare_fids, | 1026 | .compare_fids = cifs_compare_fids, |
@@ -1086,6 +1092,7 @@ struct smb_version_operations smb1_operations = { | |||
1086 | .create_mf_symlink = cifs_create_mf_symlink, | 1092 | .create_mf_symlink = cifs_create_mf_symlink, |
1087 | .is_read_op = cifs_is_read_op, | 1093 | .is_read_op = cifs_is_read_op, |
1088 | .wp_retry_size = cifs_wp_retry_size, | 1094 | .wp_retry_size = cifs_wp_retry_size, |
1095 | .dir_needs_close = cifs_dir_needs_close, | ||
1089 | #ifdef CONFIG_CIFS_XATTR | 1096 | #ifdef CONFIG_CIFS_XATTR |
1090 | .query_all_EAs = CIFSSMBQAllEAs, | 1097 | .query_all_EAs = CIFSSMBQAllEAs, |
1091 | .set_EA = CIFSSMBSetEA, | 1098 | .set_EA = CIFSSMBSetEA, |
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 3f17b4550831..45992944e238 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c | |||
@@ -50,7 +50,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, | |||
50 | goto out; | 50 | goto out; |
51 | } | 51 | } |
52 | 52 | ||
53 | smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, | 53 | smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, |
54 | GFP_KERNEL); | 54 | GFP_KERNEL); |
55 | if (smb2_data == NULL) { | 55 | if (smb2_data == NULL) { |
56 | rc = -ENOMEM; | 56 | rc = -ENOMEM; |
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 0150182a4494..899bbc86f73e 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c | |||
@@ -131,7 +131,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, | |||
131 | *adjust_tz = false; | 131 | *adjust_tz = false; |
132 | *symlink = false; | 132 | *symlink = false; |
133 | 133 | ||
134 | smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, | 134 | smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, |
135 | GFP_KERNEL); | 135 | GFP_KERNEL); |
136 | if (smb2_data == NULL) | 136 | if (smb2_data == NULL) |
137 | return -ENOMEM; | 137 | return -ENOMEM; |
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index e31a9dfdcd39..8257a5a97cc0 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c | |||
@@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { | |||
214 | {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"}, | 214 | {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"}, |
215 | {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"}, | 215 | {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"}, |
216 | {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"}, | 216 | {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"}, |
217 | {STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"}, | 217 | {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"}, |
218 | {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"}, | 218 | {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"}, |
219 | {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"}, | 219 | {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"}, |
220 | {STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"}, | 220 | {STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"}, |
@@ -256,6 +256,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { | |||
256 | {STATUS_DLL_MIGHT_BE_INCOMPATIBLE, -EIO, | 256 | {STATUS_DLL_MIGHT_BE_INCOMPATIBLE, -EIO, |
257 | "STATUS_DLL_MIGHT_BE_INCOMPATIBLE"}, | 257 | "STATUS_DLL_MIGHT_BE_INCOMPATIBLE"}, |
258 | {STATUS_STOPPED_ON_SYMLINK, -EOPNOTSUPP, "STATUS_STOPPED_ON_SYMLINK"}, | 258 | {STATUS_STOPPED_ON_SYMLINK, -EOPNOTSUPP, "STATUS_STOPPED_ON_SYMLINK"}, |
259 | {STATUS_IO_REPARSE_TAG_NOT_HANDLED, -EOPNOTSUPP, | ||
260 | "STATUS_REPARSE_NOT_HANDLED"}, | ||
259 | {STATUS_DEVICE_REQUIRES_CLEANING, -EIO, | 261 | {STATUS_DEVICE_REQUIRES_CLEANING, -EIO, |
260 | "STATUS_DEVICE_REQUIRES_CLEANING"}, | 262 | "STATUS_DEVICE_REQUIRES_CLEANING"}, |
261 | {STATUS_DEVICE_DOOR_OPEN, -EIO, "STATUS_DEVICE_DOOR_OPEN"}, | 263 | {STATUS_DEVICE_DOOR_OPEN, -EIO, "STATUS_DEVICE_DOOR_OPEN"}, |
@@ -298,7 +300,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { | |||
298 | {STATUS_INVALID_PARAMETER, -EINVAL, "STATUS_INVALID_PARAMETER"}, | 300 | {STATUS_INVALID_PARAMETER, -EINVAL, "STATUS_INVALID_PARAMETER"}, |
299 | {STATUS_NO_SUCH_DEVICE, -ENODEV, "STATUS_NO_SUCH_DEVICE"}, | 301 | {STATUS_NO_SUCH_DEVICE, -ENODEV, "STATUS_NO_SUCH_DEVICE"}, |
300 | {STATUS_NO_SUCH_FILE, -ENOENT, "STATUS_NO_SUCH_FILE"}, | 302 | {STATUS_NO_SUCH_FILE, -ENOENT, "STATUS_NO_SUCH_FILE"}, |
301 | {STATUS_INVALID_DEVICE_REQUEST, -EIO, "STATUS_INVALID_DEVICE_REQUEST"}, | 303 | {STATUS_INVALID_DEVICE_REQUEST, -EOPNOTSUPP, "STATUS_INVALID_DEVICE_REQUEST"}, |
302 | {STATUS_END_OF_FILE, -ENODATA, "STATUS_END_OF_FILE"}, | 304 | {STATUS_END_OF_FILE, -ENODATA, "STATUS_END_OF_FILE"}, |
303 | {STATUS_WRONG_VOLUME, -EIO, "STATUS_WRONG_VOLUME"}, | 305 | {STATUS_WRONG_VOLUME, -EIO, "STATUS_WRONG_VOLUME"}, |
304 | {STATUS_NO_MEDIA_IN_DEVICE, -EIO, "STATUS_NO_MEDIA_IN_DEVICE"}, | 306 | {STATUS_NO_MEDIA_IN_DEVICE, -EIO, "STATUS_NO_MEDIA_IN_DEVICE"}, |
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index f2e6ac29a8d6..4aa7a0f07d6e 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c | |||
@@ -178,9 +178,24 @@ smb2_check_message(char *buf, unsigned int length) | |||
178 | /* Windows 7 server returns 24 bytes more */ | 178 | /* Windows 7 server returns 24 bytes more */ |
179 | if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) | 179 | if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE) |
180 | return 0; | 180 | return 0; |
181 | /* server can return one byte more */ | 181 | /* server can return one byte more due to implied bcc[0] */ |
182 | if (clc_len == 4 + len + 1) | 182 | if (clc_len == 4 + len + 1) |
183 | return 0; | 183 | return 0; |
184 | |||
185 | /* | ||
186 | * MacOS server pads after SMB2.1 write response with 3 bytes | ||
187 | * of junk. Other servers match RFC1001 len to actual | ||
188 | * SMB2/SMB3 frame length (header + smb2 response specific data) | ||
189 | * Log the server error (once), but allow it and continue | ||
190 | * since the frame is parseable. | ||
191 | */ | ||
192 | if (clc_len < 4 /* RFC1001 header size */ + len) { | ||
193 | printk_once(KERN_WARNING | ||
194 | "SMB2 server sent bad RFC1001 len %d not %d\n", | ||
195 | len, clc_len - 4); | ||
196 | return 0; | ||
197 | } | ||
198 | |||
184 | return 1; | 199 | return 1; |
185 | } | 200 | } |
186 | return 0; | 201 | return 0; |
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 77f8aeb9c2fc..f522193b7184 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c | |||
@@ -389,7 +389,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, | |||
389 | int rc; | 389 | int rc; |
390 | struct smb2_file_all_info *smb2_data; | 390 | struct smb2_file_all_info *smb2_data; |
391 | 391 | ||
392 | smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, | 392 | smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, |
393 | GFP_KERNEL); | 393 | GFP_KERNEL); |
394 | if (smb2_data == NULL) | 394 | if (smb2_data == NULL) |
395 | return -ENOMEM; | 395 | return -ENOMEM; |
@@ -731,11 +731,72 @@ smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile, | |||
731 | return SMB2_write(xid, parms, written, iov, nr_segs); | 731 | return SMB2_write(xid, parms, written, iov, nr_segs); |
732 | } | 732 | } |
733 | 733 | ||
734 | /* Set or clear the SPARSE_FILE attribute based on value passed in setsparse */ | ||
735 | static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon, | ||
736 | struct cifsFileInfo *cfile, struct inode *inode, __u8 setsparse) | ||
737 | { | ||
738 | struct cifsInodeInfo *cifsi; | ||
739 | int rc; | ||
740 | |||
741 | cifsi = CIFS_I(inode); | ||
742 | |||
743 | /* if file already sparse don't bother setting sparse again */ | ||
744 | if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && setsparse) | ||
745 | return true; /* already sparse */ | ||
746 | |||
747 | if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) && !setsparse) | ||
748 | return true; /* already not sparse */ | ||
749 | |||
750 | /* | ||
751 | * Can't check for sparse support on share the usual way via the | ||
752 | * FS attribute info (FILE_SUPPORTS_SPARSE_FILES) on the share | ||
753 | * since Samba server doesn't set the flag on the share, yet | ||
754 | * supports the set sparse FSCTL and returns sparse correctly | ||
755 | * in the file attributes. If we fail setting sparse though we | ||
756 | * mark that server does not support sparse files for this share | ||
757 | * to avoid repeatedly sending the unsupported fsctl to server | ||
758 | * if the file is repeatedly extended. | ||
759 | */ | ||
760 | if (tcon->broken_sparse_sup) | ||
761 | return false; | ||
762 | |||
763 | rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, | ||
764 | cfile->fid.volatile_fid, FSCTL_SET_SPARSE, | ||
765 | true /* is_fctl */, &setsparse, 1, NULL, NULL); | ||
766 | if (rc) { | ||
767 | tcon->broken_sparse_sup = true; | ||
768 | cifs_dbg(FYI, "set sparse rc = %d\n", rc); | ||
769 | return false; | ||
770 | } | ||
771 | |||
772 | if (setsparse) | ||
773 | cifsi->cifsAttrs |= FILE_ATTRIBUTE_SPARSE_FILE; | ||
774 | else | ||
775 | cifsi->cifsAttrs &= (~FILE_ATTRIBUTE_SPARSE_FILE); | ||
776 | |||
777 | return true; | ||
778 | } | ||
779 | |||
734 | static int | 780 | static int |
735 | smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, | 781 | smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, |
736 | struct cifsFileInfo *cfile, __u64 size, bool set_alloc) | 782 | struct cifsFileInfo *cfile, __u64 size, bool set_alloc) |
737 | { | 783 | { |
738 | __le64 eof = cpu_to_le64(size); | 784 | __le64 eof = cpu_to_le64(size); |
785 | struct inode *inode; | ||
786 | |||
787 | /* | ||
788 | * If extending file more than one page make sparse. Many Linux fs | ||
789 | * make files sparse by default when extending via ftruncate | ||
790 | */ | ||
791 | inode = cfile->dentry->d_inode; | ||
792 | |||
793 | if (!set_alloc && (size > inode->i_size + 8192)) { | ||
794 | __u8 set_sparse = 1; | ||
795 | |||
796 | /* whether set sparse succeeds or not, extend the file */ | ||
797 | smb2_set_sparse(xid, tcon, cfile, inode, set_sparse); | ||
798 | } | ||
799 | |||
739 | return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, | 800 | return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, |
740 | cfile->fid.volatile_fid, cfile->pid, &eof, false); | 801 | cfile->fid.volatile_fid, cfile->pid, &eof, false); |
741 | } | 802 | } |
@@ -954,6 +1015,105 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, | |||
954 | return rc; | 1015 | return rc; |
955 | } | 1016 | } |
956 | 1017 | ||
1018 | static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, | ||
1019 | loff_t offset, loff_t len, bool keep_size) | ||
1020 | { | ||
1021 | struct inode *inode; | ||
1022 | struct cifsInodeInfo *cifsi; | ||
1023 | struct cifsFileInfo *cfile = file->private_data; | ||
1024 | struct file_zero_data_information fsctl_buf; | ||
1025 | long rc; | ||
1026 | unsigned int xid; | ||
1027 | |||
1028 | xid = get_xid(); | ||
1029 | |||
1030 | inode = cfile->dentry->d_inode; | ||
1031 | cifsi = CIFS_I(inode); | ||
1032 | |||
1033 | /* if file not oplocked can't be sure whether asking to extend size */ | ||
1034 | if (!CIFS_CACHE_READ(cifsi)) | ||
1035 | if (keep_size == false) | ||
1036 | return -EOPNOTSUPP; | ||
1037 | |||
1038 | /* | ||
1039 | * Must check if file sparse since fallocate -z (zero range) assumes | ||
1040 | * non-sparse allocation | ||
1041 | */ | ||
1042 | if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)) | ||
1043 | return -EOPNOTSUPP; | ||
1044 | |||
1045 | /* | ||
1046 | * need to make sure we are not asked to extend the file since the SMB3 | ||
1047 | * fsctl does not change the file size. In the future we could change | ||
1048 | * this to zero the first part of the range then set the file size | ||
1049 | * which for a non sparse file would zero the newly extended range | ||
1050 | */ | ||
1051 | if (keep_size == false) | ||
1052 | if (i_size_read(inode) < offset + len) | ||
1053 | return -EOPNOTSUPP; | ||
1054 | |||
1055 | cifs_dbg(FYI, "offset %lld len %lld", offset, len); | ||
1056 | |||
1057 | fsctl_buf.FileOffset = cpu_to_le64(offset); | ||
1058 | fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); | ||
1059 | |||
1060 | rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, | ||
1061 | cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, | ||
1062 | true /* is_fctl */, (char *)&fsctl_buf, | ||
1063 | sizeof(struct file_zero_data_information), NULL, NULL); | ||
1064 | free_xid(xid); | ||
1065 | return rc; | ||
1066 | } | ||
1067 | |||
1068 | static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, | ||
1069 | loff_t offset, loff_t len) | ||
1070 | { | ||
1071 | struct inode *inode; | ||
1072 | struct cifsInodeInfo *cifsi; | ||
1073 | struct cifsFileInfo *cfile = file->private_data; | ||
1074 | struct file_zero_data_information fsctl_buf; | ||
1075 | long rc; | ||
1076 | unsigned int xid; | ||
1077 | __u8 set_sparse = 1; | ||
1078 | |||
1079 | xid = get_xid(); | ||
1080 | |||
1081 | inode = cfile->dentry->d_inode; | ||
1082 | cifsi = CIFS_I(inode); | ||
1083 | |||
1084 | /* Need to make file sparse, if not already, before freeing range. */ | ||
1085 | /* Consider adding equivalent for compressed since it could also work */ | ||
1086 | if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) | ||
1087 | return -EOPNOTSUPP; | ||
1088 | |||
1089 | cifs_dbg(FYI, "offset %lld len %lld", offset, len); | ||
1090 | |||
1091 | fsctl_buf.FileOffset = cpu_to_le64(offset); | ||
1092 | fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len); | ||
1093 | |||
1094 | rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, | ||
1095 | cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, | ||
1096 | true /* is_fctl */, (char *)&fsctl_buf, | ||
1097 | sizeof(struct file_zero_data_information), NULL, NULL); | ||
1098 | free_xid(xid); | ||
1099 | return rc; | ||
1100 | } | ||
1101 | |||
1102 | static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode, | ||
1103 | loff_t off, loff_t len) | ||
1104 | { | ||
1105 | /* KEEP_SIZE already checked for by do_fallocate */ | ||
1106 | if (mode & FALLOC_FL_PUNCH_HOLE) | ||
1107 | return smb3_punch_hole(file, tcon, off, len); | ||
1108 | else if (mode & FALLOC_FL_ZERO_RANGE) { | ||
1109 | if (mode & FALLOC_FL_KEEP_SIZE) | ||
1110 | return smb3_zero_range(file, tcon, off, len, true); | ||
1111 | return smb3_zero_range(file, tcon, off, len, false); | ||
1112 | } | ||
1113 | |||
1114 | return -EOPNOTSUPP; | ||
1115 | } | ||
1116 | |||
957 | static void | 1117 | static void |
958 | smb2_downgrade_oplock(struct TCP_Server_Info *server, | 1118 | smb2_downgrade_oplock(struct TCP_Server_Info *server, |
959 | struct cifsInodeInfo *cinode, bool set_level2) | 1119 | struct cifsInodeInfo *cinode, bool set_level2) |
@@ -1161,6 +1321,12 @@ smb2_wp_retry_size(struct inode *inode) | |||
1161 | SMB2_MAX_BUFFER_SIZE); | 1321 | SMB2_MAX_BUFFER_SIZE); |
1162 | } | 1322 | } |
1163 | 1323 | ||
1324 | static bool | ||
1325 | smb2_dir_needs_close(struct cifsFileInfo *cfile) | ||
1326 | { | ||
1327 | return !cfile->invalidHandle; | ||
1328 | } | ||
1329 | |||
1164 | struct smb_version_operations smb20_operations = { | 1330 | struct smb_version_operations smb20_operations = { |
1165 | .compare_fids = smb2_compare_fids, | 1331 | .compare_fids = smb2_compare_fids, |
1166 | .setup_request = smb2_setup_request, | 1332 | .setup_request = smb2_setup_request, |
@@ -1236,6 +1402,7 @@ struct smb_version_operations smb20_operations = { | |||
1236 | .parse_lease_buf = smb2_parse_lease_buf, | 1402 | .parse_lease_buf = smb2_parse_lease_buf, |
1237 | .clone_range = smb2_clone_range, | 1403 | .clone_range = smb2_clone_range, |
1238 | .wp_retry_size = smb2_wp_retry_size, | 1404 | .wp_retry_size = smb2_wp_retry_size, |
1405 | .dir_needs_close = smb2_dir_needs_close, | ||
1239 | }; | 1406 | }; |
1240 | 1407 | ||
1241 | struct smb_version_operations smb21_operations = { | 1408 | struct smb_version_operations smb21_operations = { |
@@ -1313,6 +1480,7 @@ struct smb_version_operations smb21_operations = { | |||
1313 | .parse_lease_buf = smb2_parse_lease_buf, | 1480 | .parse_lease_buf = smb2_parse_lease_buf, |
1314 | .clone_range = smb2_clone_range, | 1481 | .clone_range = smb2_clone_range, |
1315 | .wp_retry_size = smb2_wp_retry_size, | 1482 | .wp_retry_size = smb2_wp_retry_size, |
1483 | .dir_needs_close = smb2_dir_needs_close, | ||
1316 | }; | 1484 | }; |
1317 | 1485 | ||
1318 | struct smb_version_operations smb30_operations = { | 1486 | struct smb_version_operations smb30_operations = { |
@@ -1393,6 +1561,8 @@ struct smb_version_operations smb30_operations = { | |||
1393 | .clone_range = smb2_clone_range, | 1561 | .clone_range = smb2_clone_range, |
1394 | .validate_negotiate = smb3_validate_negotiate, | 1562 | .validate_negotiate = smb3_validate_negotiate, |
1395 | .wp_retry_size = smb2_wp_retry_size, | 1563 | .wp_retry_size = smb2_wp_retry_size, |
1564 | .dir_needs_close = smb2_dir_needs_close, | ||
1565 | .fallocate = smb3_fallocate, | ||
1396 | }; | 1566 | }; |
1397 | 1567 | ||
1398 | struct smb_version_values smb20_values = { | 1568 | struct smb_version_values smb20_values = { |
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 42ebc1a8be6c..74b3a6684383 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c | |||
@@ -530,7 +530,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, | |||
530 | struct smb2_sess_setup_rsp *rsp = NULL; | 530 | struct smb2_sess_setup_rsp *rsp = NULL; |
531 | struct kvec iov[2]; | 531 | struct kvec iov[2]; |
532 | int rc = 0; | 532 | int rc = 0; |
533 | int resp_buftype; | 533 | int resp_buftype = CIFS_NO_BUFFER; |
534 | __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ | 534 | __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ |
535 | struct TCP_Server_Info *server = ses->server; | 535 | struct TCP_Server_Info *server = ses->server; |
536 | u16 blob_length = 0; | 536 | u16 blob_length = 0; |
@@ -907,7 +907,8 @@ tcon_exit: | |||
907 | tcon_error_exit: | 907 | tcon_error_exit: |
908 | if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) { | 908 | if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) { |
909 | cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); | 909 | cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); |
910 | tcon->bad_network_name = true; | 910 | if (tcon) |
911 | tcon->bad_network_name = true; | ||
911 | } | 912 | } |
912 | goto tcon_exit; | 913 | goto tcon_exit; |
913 | } | 914 | } |
@@ -1224,7 +1225,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, | |||
1224 | 1225 | ||
1225 | cifs_dbg(FYI, "SMB2 IOCTL\n"); | 1226 | cifs_dbg(FYI, "SMB2 IOCTL\n"); |
1226 | 1227 | ||
1227 | *out_data = NULL; | 1228 | if (out_data != NULL) |
1229 | *out_data = NULL; | ||
1230 | |||
1228 | /* zero out returned data len, in case of error */ | 1231 | /* zero out returned data len, in case of error */ |
1229 | if (plen) | 1232 | if (plen) |
1230 | *plen = 0; | 1233 | *plen = 0; |
@@ -1400,8 +1403,7 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, | |||
1400 | rsp = (struct smb2_close_rsp *)iov[0].iov_base; | 1403 | rsp = (struct smb2_close_rsp *)iov[0].iov_base; |
1401 | 1404 | ||
1402 | if (rc != 0) { | 1405 | if (rc != 0) { |
1403 | if (tcon) | 1406 | cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE); |
1404 | cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE); | ||
1405 | goto close_exit; | 1407 | goto close_exit; |
1406 | } | 1408 | } |
1407 | 1409 | ||
@@ -1530,7 +1532,7 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, | |||
1530 | { | 1532 | { |
1531 | return query_info(xid, tcon, persistent_fid, volatile_fid, | 1533 | return query_info(xid, tcon, persistent_fid, volatile_fid, |
1532 | FILE_ALL_INFORMATION, | 1534 | FILE_ALL_INFORMATION, |
1533 | sizeof(struct smb2_file_all_info) + MAX_NAME * 2, | 1535 | sizeof(struct smb2_file_all_info) + PATH_MAX * 2, |
1534 | sizeof(struct smb2_file_all_info), data); | 1536 | sizeof(struct smb2_file_all_info), data); |
1535 | } | 1537 | } |
1536 | 1538 | ||
@@ -2177,6 +2179,10 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, | |||
2177 | rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base; | 2179 | rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base; |
2178 | 2180 | ||
2179 | if (rc) { | 2181 | if (rc) { |
2182 | if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) { | ||
2183 | srch_inf->endOfSearch = true; | ||
2184 | rc = 0; | ||
2185 | } | ||
2180 | cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); | 2186 | cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); |
2181 | goto qdir_exit; | 2187 | goto qdir_exit; |
2182 | } | 2188 | } |
@@ -2214,11 +2220,6 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, | |||
2214 | else | 2220 | else |
2215 | cifs_dbg(VFS, "illegal search buffer type\n"); | 2221 | cifs_dbg(VFS, "illegal search buffer type\n"); |
2216 | 2222 | ||
2217 | if (rsp->hdr.Status == STATUS_NO_MORE_FILES) | ||
2218 | srch_inf->endOfSearch = 1; | ||
2219 | else | ||
2220 | srch_inf->endOfSearch = 0; | ||
2221 | |||
2222 | return rc; | 2223 | return rc; |
2223 | 2224 | ||
2224 | qdir_exit: | 2225 | qdir_exit: |
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 69f3595d3952..fbe486c285a9 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h | |||
@@ -573,6 +573,12 @@ struct copychunk_ioctl { | |||
573 | __u32 Reserved2; | 573 | __u32 Reserved2; |
574 | } __packed; | 574 | } __packed; |
575 | 575 | ||
576 | /* this goes in the ioctl buffer when doing FSCTL_SET_ZERO_DATA */ | ||
577 | struct file_zero_data_information { | ||
578 | __le64 FileOffset; | ||
579 | __le64 BeyondFinalZero; | ||
580 | } __packed; | ||
581 | |||
576 | struct copychunk_ioctl_rsp { | 582 | struct copychunk_ioctl_rsp { |
577 | __le32 ChunksWritten; | 583 | __le32 ChunksWritten; |
578 | __le32 ChunkBytesWritten; | 584 | __le32 ChunkBytesWritten; |
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index 0e538b5c9622..83efa59535be 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h | |||
@@ -63,7 +63,7 @@ | |||
63 | #define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */ | 63 | #define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */ |
64 | #define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */ | 64 | #define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */ |
65 | #define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */ | 65 | #define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */ |
66 | #define FSCTL_SET_ZERO_DATA 0x000900C8 /* BB add struct */ | 66 | #define FSCTL_SET_ZERO_DATA 0x000980C8 |
67 | #define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */ | 67 | #define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */ |
68 | #define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */ | 68 | #define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */ |
69 | #define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */ | 69 | #define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */ |
diff --git a/fs/dcache.c b/fs/dcache.c index d30ce699ae4b..cb25a1a5e307 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -106,8 +106,7 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, | |||
106 | unsigned int hash) | 106 | unsigned int hash) |
107 | { | 107 | { |
108 | hash += (unsigned long) parent / L1_CACHE_BYTES; | 108 | hash += (unsigned long) parent / L1_CACHE_BYTES; |
109 | hash = hash + (hash >> d_hash_shift); | 109 | return dentry_hashtable + hash_32(hash, d_hash_shift); |
110 | return dentry_hashtable + (hash & d_hash_mask); | ||
111 | } | 110 | } |
112 | 111 | ||
113 | /* Statistics gathering. */ | 112 | /* Statistics gathering. */ |
@@ -2373,7 +2372,8 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) | |||
2373 | } | 2372 | } |
2374 | EXPORT_SYMBOL(dentry_update_name_case); | 2373 | EXPORT_SYMBOL(dentry_update_name_case); |
2375 | 2374 | ||
2376 | static void switch_names(struct dentry *dentry, struct dentry *target) | 2375 | static void switch_names(struct dentry *dentry, struct dentry *target, |
2376 | bool exchange) | ||
2377 | { | 2377 | { |
2378 | if (dname_external(target)) { | 2378 | if (dname_external(target)) { |
2379 | if (dname_external(dentry)) { | 2379 | if (dname_external(dentry)) { |
@@ -2407,13 +2407,19 @@ static void switch_names(struct dentry *dentry, struct dentry *target) | |||
2407 | */ | 2407 | */ |
2408 | unsigned int i; | 2408 | unsigned int i; |
2409 | BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); | 2409 | BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); |
2410 | if (!exchange) { | ||
2411 | memcpy(dentry->d_iname, target->d_name.name, | ||
2412 | target->d_name.len + 1); | ||
2413 | dentry->d_name.hash_len = target->d_name.hash_len; | ||
2414 | return; | ||
2415 | } | ||
2410 | for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { | 2416 | for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { |
2411 | swap(((long *) &dentry->d_iname)[i], | 2417 | swap(((long *) &dentry->d_iname)[i], |
2412 | ((long *) &target->d_iname)[i]); | 2418 | ((long *) &target->d_iname)[i]); |
2413 | } | 2419 | } |
2414 | } | 2420 | } |
2415 | } | 2421 | } |
2416 | swap(dentry->d_name.len, target->d_name.len); | 2422 | swap(dentry->d_name.hash_len, target->d_name.hash_len); |
2417 | } | 2423 | } |
2418 | 2424 | ||
2419 | static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) | 2425 | static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) |
@@ -2443,25 +2449,29 @@ static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) | |||
2443 | } | 2449 | } |
2444 | } | 2450 | } |
2445 | 2451 | ||
2446 | static void dentry_unlock_parents_for_move(struct dentry *dentry, | 2452 | static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target) |
2447 | struct dentry *target) | ||
2448 | { | 2453 | { |
2449 | if (target->d_parent != dentry->d_parent) | 2454 | if (target->d_parent != dentry->d_parent) |
2450 | spin_unlock(&dentry->d_parent->d_lock); | 2455 | spin_unlock(&dentry->d_parent->d_lock); |
2451 | if (target->d_parent != target) | 2456 | if (target->d_parent != target) |
2452 | spin_unlock(&target->d_parent->d_lock); | 2457 | spin_unlock(&target->d_parent->d_lock); |
2458 | spin_unlock(&target->d_lock); | ||
2459 | spin_unlock(&dentry->d_lock); | ||
2453 | } | 2460 | } |
2454 | 2461 | ||
2455 | /* | 2462 | /* |
2456 | * When switching names, the actual string doesn't strictly have to | 2463 | * When switching names, the actual string doesn't strictly have to |
2457 | * be preserved in the target - because we're dropping the target | 2464 | * be preserved in the target - because we're dropping the target |
2458 | * anyway. As such, we can just do a simple memcpy() to copy over | 2465 | * anyway. As such, we can just do a simple memcpy() to copy over |
2459 | * the new name before we switch. | 2466 | * the new name before we switch, unless we are going to rehash |
2460 | * | 2467 | * it. Note that if we *do* unhash the target, we are not allowed |
2461 | * Note that we have to be a lot more careful about getting the hash | 2468 | * to rehash it without giving it a new name/hash key - whether |
2462 | * switched - we have to switch the hash value properly even if it | 2469 | * we swap or overwrite the names here, resulting name won't match |
2463 | * then no longer matches the actual (corrupted) string of the target. | 2470 | * the reality in filesystem; it's only there for d_path() purposes. |
2464 | * The hash value has to match the hash queue that the dentry is on.. | 2471 | * Note that all of this is happening under rename_lock, so the |
2472 | * any hash lookup seeing it in the middle of manipulations will | ||
2473 | * be discarded anyway. So we do not care what happens to the hash | ||
2474 | * key in that case. | ||
2465 | */ | 2475 | */ |
2466 | /* | 2476 | /* |
2467 | * __d_move - move a dentry | 2477 | * __d_move - move a dentry |
@@ -2507,36 +2517,30 @@ static void __d_move(struct dentry *dentry, struct dentry *target, | |||
2507 | d_hash(dentry->d_parent, dentry->d_name.hash)); | 2517 | d_hash(dentry->d_parent, dentry->d_name.hash)); |
2508 | } | 2518 | } |
2509 | 2519 | ||
2510 | list_del(&dentry->d_u.d_child); | ||
2511 | list_del(&target->d_u.d_child); | ||
2512 | |||
2513 | /* Switch the names.. */ | 2520 | /* Switch the names.. */ |
2514 | switch_names(dentry, target); | 2521 | switch_names(dentry, target, exchange); |
2515 | swap(dentry->d_name.hash, target->d_name.hash); | ||
2516 | 2522 | ||
2517 | /* ... and switch the parents */ | 2523 | /* ... and switch them in the tree */ |
2518 | if (IS_ROOT(dentry)) { | 2524 | if (IS_ROOT(dentry)) { |
2525 | /* splicing a tree */ | ||
2519 | dentry->d_parent = target->d_parent; | 2526 | dentry->d_parent = target->d_parent; |
2520 | target->d_parent = target; | 2527 | target->d_parent = target; |
2521 | INIT_LIST_HEAD(&target->d_u.d_child); | 2528 | list_del_init(&target->d_u.d_child); |
2529 | list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); | ||
2522 | } else { | 2530 | } else { |
2531 | /* swapping two dentries */ | ||
2523 | swap(dentry->d_parent, target->d_parent); | 2532 | swap(dentry->d_parent, target->d_parent); |
2524 | 2533 | list_move(&target->d_u.d_child, &target->d_parent->d_subdirs); | |
2525 | /* And add them back to the (new) parent lists */ | 2534 | list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); |
2526 | list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); | 2535 | if (exchange) |
2536 | fsnotify_d_move(target); | ||
2537 | fsnotify_d_move(dentry); | ||
2527 | } | 2538 | } |
2528 | 2539 | ||
2529 | list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); | ||
2530 | |||
2531 | write_seqcount_end(&target->d_seq); | 2540 | write_seqcount_end(&target->d_seq); |
2532 | write_seqcount_end(&dentry->d_seq); | 2541 | write_seqcount_end(&dentry->d_seq); |
2533 | 2542 | ||
2534 | dentry_unlock_parents_for_move(dentry, target); | 2543 | dentry_unlock_for_move(dentry, target); |
2535 | if (exchange) | ||
2536 | fsnotify_d_move(target); | ||
2537 | spin_unlock(&target->d_lock); | ||
2538 | fsnotify_d_move(dentry); | ||
2539 | spin_unlock(&dentry->d_lock); | ||
2540 | } | 2544 | } |
2541 | 2545 | ||
2542 | /* | 2546 | /* |
@@ -2634,39 +2638,6 @@ out_err: | |||
2634 | return ret; | 2638 | return ret; |
2635 | } | 2639 | } |
2636 | 2640 | ||
2637 | /* | ||
2638 | * Prepare an anonymous dentry for life in the superblock's dentry tree as a | ||
2639 | * named dentry in place of the dentry to be replaced. | ||
2640 | * returns with anon->d_lock held! | ||
2641 | */ | ||
2642 | static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) | ||
2643 | { | ||
2644 | struct dentry *dparent; | ||
2645 | |||
2646 | dentry_lock_for_move(anon, dentry); | ||
2647 | |||
2648 | write_seqcount_begin(&dentry->d_seq); | ||
2649 | write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED); | ||
2650 | |||
2651 | dparent = dentry->d_parent; | ||
2652 | |||
2653 | switch_names(dentry, anon); | ||
2654 | swap(dentry->d_name.hash, anon->d_name.hash); | ||
2655 | |||
2656 | dentry->d_parent = dentry; | ||
2657 | list_del_init(&dentry->d_u.d_child); | ||
2658 | anon->d_parent = dparent; | ||
2659 | list_move(&anon->d_u.d_child, &dparent->d_subdirs); | ||
2660 | |||
2661 | write_seqcount_end(&dentry->d_seq); | ||
2662 | write_seqcount_end(&anon->d_seq); | ||
2663 | |||
2664 | dentry_unlock_parents_for_move(anon, dentry); | ||
2665 | spin_unlock(&dentry->d_lock); | ||
2666 | |||
2667 | /* anon->d_lock still locked, returns locked */ | ||
2668 | } | ||
2669 | |||
2670 | /** | 2641 | /** |
2671 | * d_splice_alias - splice a disconnected dentry into the tree if one exists | 2642 | * d_splice_alias - splice a disconnected dentry into the tree if one exists |
2672 | * @inode: the inode which may have a disconnected dentry | 2643 | * @inode: the inode which may have a disconnected dentry |
@@ -2712,11 +2683,8 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
2712 | return ERR_PTR(-EIO); | 2683 | return ERR_PTR(-EIO); |
2713 | } | 2684 | } |
2714 | write_seqlock(&rename_lock); | 2685 | write_seqlock(&rename_lock); |
2715 | __d_materialise_dentry(dentry, new); | 2686 | __d_move(new, dentry, false); |
2716 | write_sequnlock(&rename_lock); | 2687 | write_sequnlock(&rename_lock); |
2717 | __d_drop(new); | ||
2718 | _d_rehash(new); | ||
2719 | spin_unlock(&new->d_lock); | ||
2720 | spin_unlock(&inode->i_lock); | 2688 | spin_unlock(&inode->i_lock); |
2721 | security_d_instantiate(new, inode); | 2689 | security_d_instantiate(new, inode); |
2722 | iput(inode); | 2690 | iput(inode); |
@@ -2776,9 +2744,8 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) | |||
2776 | } else if (IS_ROOT(alias)) { | 2744 | } else if (IS_ROOT(alias)) { |
2777 | /* Is this an anonymous mountpoint that we | 2745 | /* Is this an anonymous mountpoint that we |
2778 | * could splice into our tree? */ | 2746 | * could splice into our tree? */ |
2779 | __d_materialise_dentry(dentry, alias); | 2747 | __d_move(alias, dentry, false); |
2780 | write_sequnlock(&rename_lock); | 2748 | write_sequnlock(&rename_lock); |
2781 | __d_drop(alias); | ||
2782 | goto found; | 2749 | goto found; |
2783 | } else { | 2750 | } else { |
2784 | /* Nope, but we must(!) avoid directory | 2751 | /* Nope, but we must(!) avoid directory |
@@ -2804,13 +2771,9 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) | |||
2804 | actual = __d_instantiate_unique(dentry, inode); | 2771 | actual = __d_instantiate_unique(dentry, inode); |
2805 | if (!actual) | 2772 | if (!actual) |
2806 | actual = dentry; | 2773 | actual = dentry; |
2807 | else | ||
2808 | BUG_ON(!d_unhashed(actual)); | ||
2809 | 2774 | ||
2810 | spin_lock(&actual->d_lock); | 2775 | d_rehash(actual); |
2811 | found: | 2776 | found: |
2812 | _d_rehash(actual); | ||
2813 | spin_unlock(&actual->d_lock); | ||
2814 | spin_unlock(&inode->i_lock); | 2777 | spin_unlock(&inode->i_lock); |
2815 | out_nolock: | 2778 | out_nolock: |
2816 | if (actual == dentry) { | 2779 | if (actual == dentry) { |
diff --git a/fs/direct-io.c b/fs/direct-io.c index c3116404ab49..e181b6b2e297 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -158,7 +158,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) | |||
158 | { | 158 | { |
159 | ssize_t ret; | 159 | ssize_t ret; |
160 | 160 | ||
161 | ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES, | 161 | ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES, |
162 | &sdio->from); | 162 | &sdio->from); |
163 | 163 | ||
164 | if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { | 164 | if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index db0fad3269c0..b4b6ab9873ae 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
@@ -229,8 +229,8 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | |||
229 | if (rc) { | 229 | if (rc) { |
230 | printk(KERN_ERR "%s: Error attempting to initialize " | 230 | printk(KERN_ERR "%s: Error attempting to initialize " |
231 | "the lower file for the dentry with name " | 231 | "the lower file for the dentry with name " |
232 | "[%s]; rc = [%d]\n", __func__, | 232 | "[%pd]; rc = [%d]\n", __func__, |
233 | ecryptfs_dentry->d_name.name, rc); | 233 | ecryptfs_dentry, rc); |
234 | goto out_free; | 234 | goto out_free; |
235 | } | 235 | } |
236 | if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE) | 236 | if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE) |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index d4a9431ec73c..1686dc2da9fd 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -53,9 +53,7 @@ static void unlock_dir(struct dentry *dir) | |||
53 | 53 | ||
54 | static int ecryptfs_inode_test(struct inode *inode, void *lower_inode) | 54 | static int ecryptfs_inode_test(struct inode *inode, void *lower_inode) |
55 | { | 55 | { |
56 | if (ecryptfs_inode_to_lower(inode) == (struct inode *)lower_inode) | 56 | return ecryptfs_inode_to_lower(inode) == lower_inode; |
57 | return 1; | ||
58 | return 0; | ||
59 | } | 57 | } |
60 | 58 | ||
61 | static int ecryptfs_inode_set(struct inode *inode, void *opaque) | 59 | static int ecryptfs_inode_set(struct inode *inode, void *opaque) |
@@ -192,12 +190,6 @@ ecryptfs_do_create(struct inode *directory_inode, | |||
192 | 190 | ||
193 | lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); | 191 | lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); |
194 | lower_dir_dentry = lock_parent(lower_dentry); | 192 | lower_dir_dentry = lock_parent(lower_dentry); |
195 | if (IS_ERR(lower_dir_dentry)) { | ||
196 | ecryptfs_printk(KERN_ERR, "Error locking directory of " | ||
197 | "dentry\n"); | ||
198 | inode = ERR_CAST(lower_dir_dentry); | ||
199 | goto out; | ||
200 | } | ||
201 | rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, true); | 193 | rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, true); |
202 | if (rc) { | 194 | if (rc) { |
203 | printk(KERN_ERR "%s: Failure to create dentry in lower fs; " | 195 | printk(KERN_ERR "%s: Failure to create dentry in lower fs; " |
@@ -215,7 +207,6 @@ ecryptfs_do_create(struct inode *directory_inode, | |||
215 | fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode); | 207 | fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode); |
216 | out_lock: | 208 | out_lock: |
217 | unlock_dir(lower_dir_dentry); | 209 | unlock_dir(lower_dir_dentry); |
218 | out: | ||
219 | return inode; | 210 | return inode; |
220 | } | 211 | } |
221 | 212 | ||
@@ -250,8 +241,8 @@ int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, | |||
250 | if (rc) { | 241 | if (rc) { |
251 | printk(KERN_ERR "%s: Error attempting to initialize " | 242 | printk(KERN_ERR "%s: Error attempting to initialize " |
252 | "the lower file for the dentry with name " | 243 | "the lower file for the dentry with name " |
253 | "[%s]; rc = [%d]\n", __func__, | 244 | "[%pd]; rc = [%d]\n", __func__, |
254 | ecryptfs_dentry->d_name.name, rc); | 245 | ecryptfs_dentry, rc); |
255 | goto out; | 246 | goto out; |
256 | } | 247 | } |
257 | rc = ecryptfs_write_metadata(ecryptfs_dentry, ecryptfs_inode); | 248 | rc = ecryptfs_write_metadata(ecryptfs_dentry, ecryptfs_inode); |
@@ -313,8 +304,8 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode) | |||
313 | if (rc) { | 304 | if (rc) { |
314 | printk(KERN_ERR "%s: Error attempting to initialize " | 305 | printk(KERN_ERR "%s: Error attempting to initialize " |
315 | "the lower file for the dentry with name " | 306 | "the lower file for the dentry with name " |
316 | "[%s]; rc = [%d]\n", __func__, | 307 | "[%pd]; rc = [%d]\n", __func__, |
317 | dentry->d_name.name, rc); | 308 | dentry, rc); |
318 | return rc; | 309 | return rc; |
319 | } | 310 | } |
320 | 311 | ||
@@ -418,8 +409,8 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
418 | if (IS_ERR(lower_dentry)) { | 409 | if (IS_ERR(lower_dentry)) { |
419 | rc = PTR_ERR(lower_dentry); | 410 | rc = PTR_ERR(lower_dentry); |
420 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " | 411 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " |
421 | "[%d] on lower_dentry = [%s]\n", __func__, rc, | 412 | "[%d] on lower_dentry = [%pd]\n", __func__, rc, |
422 | ecryptfs_dentry->d_name.name); | 413 | ecryptfs_dentry); |
423 | goto out; | 414 | goto out; |
424 | } | 415 | } |
425 | if (lower_dentry->d_inode) | 416 | if (lower_dentry->d_inode) |
@@ -1039,7 +1030,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
1039 | } | 1030 | } |
1040 | 1031 | ||
1041 | rc = vfs_setxattr(lower_dentry, name, value, size, flags); | 1032 | rc = vfs_setxattr(lower_dentry, name, value, size, flags); |
1042 | if (!rc) | 1033 | if (!rc && dentry->d_inode) |
1043 | fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode); | 1034 | fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode); |
1044 | out: | 1035 | out: |
1045 | return rc; | 1036 | return rc; |
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 4725a07f003c..635e8e16a5b7 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
@@ -26,7 +26,6 @@ | |||
26 | */ | 26 | */ |
27 | 27 | ||
28 | #include <linux/string.h> | 28 | #include <linux/string.h> |
29 | #include <linux/syscalls.h> | ||
30 | #include <linux/pagemap.h> | 29 | #include <linux/pagemap.h> |
31 | #include <linux/key.h> | 30 | #include <linux/key.h> |
32 | #include <linux/random.h> | 31 | #include <linux/random.h> |
@@ -1846,7 +1845,6 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, | |||
1846 | "(Tag 11 not allowed by itself)\n"); | 1845 | "(Tag 11 not allowed by itself)\n"); |
1847 | rc = -EIO; | 1846 | rc = -EIO; |
1848 | goto out_wipe_list; | 1847 | goto out_wipe_list; |
1849 | break; | ||
1850 | default: | 1848 | default: |
1851 | ecryptfs_printk(KERN_DEBUG, "No packet at offset [%zd] " | 1849 | ecryptfs_printk(KERN_DEBUG, "No packet at offset [%zd] " |
1852 | "of the file header; hex value of " | 1850 | "of the file header; hex value of " |
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index e57380e5f6bd..286f10b0363b 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -434,8 +434,7 @@ void ecryptfs_release_messaging(void) | |||
434 | mutex_lock(&ecryptfs_msg_ctx_lists_mux); | 434 | mutex_lock(&ecryptfs_msg_ctx_lists_mux); |
435 | for (i = 0; i < ecryptfs_message_buf_len; i++) { | 435 | for (i = 0; i < ecryptfs_message_buf_len; i++) { |
436 | mutex_lock(&ecryptfs_msg_ctx_arr[i].mux); | 436 | mutex_lock(&ecryptfs_msg_ctx_arr[i].mux); |
437 | if (ecryptfs_msg_ctx_arr[i].msg) | 437 | kfree(ecryptfs_msg_ctx_arr[i].msg); |
438 | kfree(ecryptfs_msg_ctx_arr[i].msg); | ||
439 | mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux); | 438 | mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux); |
440 | } | 439 | } |
441 | kfree(ecryptfs_msg_ctx_arr); | 440 | kfree(ecryptfs_msg_ctx_arr); |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index b10b48c2a7af..7bcfff900f05 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -1852,7 +1852,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1852 | goto error_tgt_fput; | 1852 | goto error_tgt_fput; |
1853 | 1853 | ||
1854 | /* Check if EPOLLWAKEUP is allowed */ | 1854 | /* Check if EPOLLWAKEUP is allowed */ |
1855 | ep_take_care_of_epollwakeup(&epds); | 1855 | if (ep_op_has_event(op)) |
1856 | ep_take_care_of_epollwakeup(&epds); | ||
1856 | 1857 | ||
1857 | /* | 1858 | /* |
1858 | * We have to check that the file structure underneath the file descriptor | 1859 | * We have to check that the file structure underneath the file descriptor |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index b88edc05c230..170dc41e8bf4 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -1067,14 +1067,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
1067 | ext2_rsv_window_add(sb, &sbi->s_rsv_window_head); | 1067 | ext2_rsv_window_add(sb, &sbi->s_rsv_window_head); |
1068 | 1068 | ||
1069 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | 1069 | err = percpu_counter_init(&sbi->s_freeblocks_counter, |
1070 | ext2_count_free_blocks(sb)); | 1070 | ext2_count_free_blocks(sb), GFP_KERNEL); |
1071 | if (!err) { | 1071 | if (!err) { |
1072 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | 1072 | err = percpu_counter_init(&sbi->s_freeinodes_counter, |
1073 | ext2_count_free_inodes(sb)); | 1073 | ext2_count_free_inodes(sb), GFP_KERNEL); |
1074 | } | 1074 | } |
1075 | if (!err) { | 1075 | if (!err) { |
1076 | err = percpu_counter_init(&sbi->s_dirs_counter, | 1076 | err = percpu_counter_init(&sbi->s_dirs_counter, |
1077 | ext2_count_dirs(sb)); | 1077 | ext2_count_dirs(sb), GFP_KERNEL); |
1078 | } | 1078 | } |
1079 | if (err) { | 1079 | if (err) { |
1080 | ext2_msg(sb, KERN_ERR, "error: insufficient memory"); | 1080 | ext2_msg(sb, KERN_ERR, "error: insufficient memory"); |
diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h index e85ff15a060e..fc3cdcf24aed 100644 --- a/fs/ext3/ext3.h +++ b/fs/ext3/ext3.h | |||
@@ -237,6 +237,8 @@ struct ext3_new_group_data { | |||
237 | #define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION | 237 | #define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION |
238 | #define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION | 238 | #define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION |
239 | 239 | ||
240 | /* Number of supported quota types */ | ||
241 | #define EXT3_MAXQUOTAS 2 | ||
240 | 242 | ||
241 | /* | 243 | /* |
242 | * Mount options | 244 | * Mount options |
@@ -248,7 +250,7 @@ struct ext3_mount_options { | |||
248 | unsigned long s_commit_interval; | 250 | unsigned long s_commit_interval; |
249 | #ifdef CONFIG_QUOTA | 251 | #ifdef CONFIG_QUOTA |
250 | int s_jquota_fmt; | 252 | int s_jquota_fmt; |
251 | char *s_qf_names[MAXQUOTAS]; | 253 | char *s_qf_names[EXT3_MAXQUOTAS]; |
252 | #endif | 254 | #endif |
253 | }; | 255 | }; |
254 | 256 | ||
@@ -669,7 +671,7 @@ struct ext3_sb_info { | |||
669 | unsigned long s_commit_interval; | 671 | unsigned long s_commit_interval; |
670 | struct block_device *journal_bdev; | 672 | struct block_device *journal_bdev; |
671 | #ifdef CONFIG_QUOTA | 673 | #ifdef CONFIG_QUOTA |
672 | char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ | 674 | char *s_qf_names[EXT3_MAXQUOTAS]; /* Names of quota files with journalled quota */ |
673 | int s_jquota_fmt; /* Format of quota to use */ | 675 | int s_jquota_fmt; /* Format of quota to use */ |
674 | #endif | 676 | #endif |
675 | }; | 677 | }; |
@@ -1183,9 +1185,9 @@ extern const struct inode_operations ext3_fast_symlink_inode_operations; | |||
1183 | #define EXT3_QUOTA_INIT_BLOCKS(sb) 0 | 1185 | #define EXT3_QUOTA_INIT_BLOCKS(sb) 0 |
1184 | #define EXT3_QUOTA_DEL_BLOCKS(sb) 0 | 1186 | #define EXT3_QUOTA_DEL_BLOCKS(sb) 0 |
1185 | #endif | 1187 | #endif |
1186 | #define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb)) | 1188 | #define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT3_MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb)) |
1187 | #define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb)) | 1189 | #define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (EXT3_MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb)) |
1188 | #define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb)) | 1190 | #define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (EXT3_MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb)) |
1189 | 1191 | ||
1190 | int | 1192 | int |
1191 | ext3_mark_iloc_dirty(handle_t *handle, | 1193 | ext3_mark_iloc_dirty(handle_t *handle, |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 08cdfe5461e3..7015db0bafd1 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -441,7 +441,7 @@ static void ext3_put_super (struct super_block * sb) | |||
441 | percpu_counter_destroy(&sbi->s_dirs_counter); | 441 | percpu_counter_destroy(&sbi->s_dirs_counter); |
442 | brelse(sbi->s_sbh); | 442 | brelse(sbi->s_sbh); |
443 | #ifdef CONFIG_QUOTA | 443 | #ifdef CONFIG_QUOTA |
444 | for (i = 0; i < MAXQUOTAS; i++) | 444 | for (i = 0; i < EXT3_MAXQUOTAS; i++) |
445 | kfree(sbi->s_qf_names[i]); | 445 | kfree(sbi->s_qf_names[i]); |
446 | #endif | 446 | #endif |
447 | 447 | ||
@@ -1555,7 +1555,7 @@ static void ext3_orphan_cleanup (struct super_block * sb, | |||
1555 | /* Needed for iput() to work correctly and not trash data */ | 1555 | /* Needed for iput() to work correctly and not trash data */ |
1556 | sb->s_flags |= MS_ACTIVE; | 1556 | sb->s_flags |= MS_ACTIVE; |
1557 | /* Turn on quotas so that they are updated correctly */ | 1557 | /* Turn on quotas so that they are updated correctly */ |
1558 | for (i = 0; i < MAXQUOTAS; i++) { | 1558 | for (i = 0; i < EXT3_MAXQUOTAS; i++) { |
1559 | if (EXT3_SB(sb)->s_qf_names[i]) { | 1559 | if (EXT3_SB(sb)->s_qf_names[i]) { |
1560 | int ret = ext3_quota_on_mount(sb, i); | 1560 | int ret = ext3_quota_on_mount(sb, i); |
1561 | if (ret < 0) | 1561 | if (ret < 0) |
@@ -1606,7 +1606,7 @@ static void ext3_orphan_cleanup (struct super_block * sb, | |||
1606 | PLURAL(nr_truncates)); | 1606 | PLURAL(nr_truncates)); |
1607 | #ifdef CONFIG_QUOTA | 1607 | #ifdef CONFIG_QUOTA |
1608 | /* Turn quotas off */ | 1608 | /* Turn quotas off */ |
1609 | for (i = 0; i < MAXQUOTAS; i++) { | 1609 | for (i = 0; i < EXT3_MAXQUOTAS; i++) { |
1610 | if (sb_dqopt(sb)->files[i]) | 1610 | if (sb_dqopt(sb)->files[i]) |
1611 | dquot_quota_off(sb, i); | 1611 | dquot_quota_off(sb, i); |
1612 | } | 1612 | } |
@@ -2039,14 +2039,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
2039 | goto failed_mount2; | 2039 | goto failed_mount2; |
2040 | } | 2040 | } |
2041 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | 2041 | err = percpu_counter_init(&sbi->s_freeblocks_counter, |
2042 | ext3_count_free_blocks(sb)); | 2042 | ext3_count_free_blocks(sb), GFP_KERNEL); |
2043 | if (!err) { | 2043 | if (!err) { |
2044 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | 2044 | err = percpu_counter_init(&sbi->s_freeinodes_counter, |
2045 | ext3_count_free_inodes(sb)); | 2045 | ext3_count_free_inodes(sb), GFP_KERNEL); |
2046 | } | 2046 | } |
2047 | if (!err) { | 2047 | if (!err) { |
2048 | err = percpu_counter_init(&sbi->s_dirs_counter, | 2048 | err = percpu_counter_init(&sbi->s_dirs_counter, |
2049 | ext3_count_dirs(sb)); | 2049 | ext3_count_dirs(sb), GFP_KERNEL); |
2050 | } | 2050 | } |
2051 | if (err) { | 2051 | if (err) { |
2052 | ext3_msg(sb, KERN_ERR, "error: insufficient memory"); | 2052 | ext3_msg(sb, KERN_ERR, "error: insufficient memory"); |
@@ -2139,7 +2139,7 @@ failed_mount2: | |||
2139 | kfree(sbi->s_group_desc); | 2139 | kfree(sbi->s_group_desc); |
2140 | failed_mount: | 2140 | failed_mount: |
2141 | #ifdef CONFIG_QUOTA | 2141 | #ifdef CONFIG_QUOTA |
2142 | for (i = 0; i < MAXQUOTAS; i++) | 2142 | for (i = 0; i < EXT3_MAXQUOTAS; i++) |
2143 | kfree(sbi->s_qf_names[i]); | 2143 | kfree(sbi->s_qf_names[i]); |
2144 | #endif | 2144 | #endif |
2145 | ext3_blkdev_remove(sbi); | 2145 | ext3_blkdev_remove(sbi); |
@@ -2659,7 +2659,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2659 | old_opts.s_commit_interval = sbi->s_commit_interval; | 2659 | old_opts.s_commit_interval = sbi->s_commit_interval; |
2660 | #ifdef CONFIG_QUOTA | 2660 | #ifdef CONFIG_QUOTA |
2661 | old_opts.s_jquota_fmt = sbi->s_jquota_fmt; | 2661 | old_opts.s_jquota_fmt = sbi->s_jquota_fmt; |
2662 | for (i = 0; i < MAXQUOTAS; i++) | 2662 | for (i = 0; i < EXT3_MAXQUOTAS; i++) |
2663 | if (sbi->s_qf_names[i]) { | 2663 | if (sbi->s_qf_names[i]) { |
2664 | old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], | 2664 | old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], |
2665 | GFP_KERNEL); | 2665 | GFP_KERNEL); |
@@ -2763,7 +2763,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2763 | } | 2763 | } |
2764 | #ifdef CONFIG_QUOTA | 2764 | #ifdef CONFIG_QUOTA |
2765 | /* Release old quota file names */ | 2765 | /* Release old quota file names */ |
2766 | for (i = 0; i < MAXQUOTAS; i++) | 2766 | for (i = 0; i < EXT3_MAXQUOTAS; i++) |
2767 | kfree(old_opts.s_qf_names[i]); | 2767 | kfree(old_opts.s_qf_names[i]); |
2768 | #endif | 2768 | #endif |
2769 | if (enable_quota) | 2769 | if (enable_quota) |
@@ -2777,7 +2777,7 @@ restore_opts: | |||
2777 | sbi->s_commit_interval = old_opts.s_commit_interval; | 2777 | sbi->s_commit_interval = old_opts.s_commit_interval; |
2778 | #ifdef CONFIG_QUOTA | 2778 | #ifdef CONFIG_QUOTA |
2779 | sbi->s_jquota_fmt = old_opts.s_jquota_fmt; | 2779 | sbi->s_jquota_fmt = old_opts.s_jquota_fmt; |
2780 | for (i = 0; i < MAXQUOTAS; i++) { | 2780 | for (i = 0; i < EXT3_MAXQUOTAS; i++) { |
2781 | kfree(sbi->s_qf_names[i]); | 2781 | kfree(sbi->s_qf_names[i]); |
2782 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; | 2782 | sbi->s_qf_names[i] = old_opts.s_qf_names[i]; |
2783 | } | 2783 | } |
@@ -2828,8 +2828,9 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) | |||
2828 | */ | 2828 | */ |
2829 | overhead += ngroups * (2 + sbi->s_itb_per_group); | 2829 | overhead += ngroups * (2 + sbi->s_itb_per_group); |
2830 | 2830 | ||
2831 | /* Add the journal blocks as well */ | 2831 | /* Add the internal journal blocks as well */ |
2832 | overhead += sbi->s_journal->j_maxlen; | 2832 | if (sbi->s_journal && !sbi->journal_bdev) |
2833 | overhead += sbi->s_journal->j_maxlen; | ||
2833 | 2834 | ||
2834 | sbi->s_overhead_last = overhead; | 2835 | sbi->s_overhead_last = overhead; |
2835 | smp_wmb(); | 2836 | smp_wmb(); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5b19760b1de5..b0c225cdb52c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1825,7 +1825,7 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) | |||
1825 | /* | 1825 | /* |
1826 | * Special error return code only used by dx_probe() and its callers. | 1826 | * Special error return code only used by dx_probe() and its callers. |
1827 | */ | 1827 | */ |
1828 | #define ERR_BAD_DX_DIR -75000 | 1828 | #define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1)) |
1829 | 1829 | ||
1830 | /* | 1830 | /* |
1831 | * Timeout and state flag for lazy initialization inode thread. | 1831 | * Timeout and state flag for lazy initialization inode thread. |
@@ -2454,6 +2454,22 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | |||
2454 | up_write(&EXT4_I(inode)->i_data_sem); | 2454 | up_write(&EXT4_I(inode)->i_data_sem); |
2455 | } | 2455 | } |
2456 | 2456 | ||
2457 | /* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */ | ||
2458 | static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) | ||
2459 | { | ||
2460 | int changed = 0; | ||
2461 | |||
2462 | if (newsize > inode->i_size) { | ||
2463 | i_size_write(inode, newsize); | ||
2464 | changed = 1; | ||
2465 | } | ||
2466 | if (newsize > EXT4_I(inode)->i_disksize) { | ||
2467 | ext4_update_i_disksize(inode, newsize); | ||
2468 | changed |= 2; | ||
2469 | } | ||
2470 | return changed; | ||
2471 | } | ||
2472 | |||
2457 | struct ext4_group_info { | 2473 | struct ext4_group_info { |
2458 | unsigned long bb_state; | 2474 | unsigned long bb_state; |
2459 | struct rb_root bb_free_root; | 2475 | struct rb_root bb_free_root; |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 76c2df382b7d..74292a71b384 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -4665,7 +4665,8 @@ retry: | |||
4665 | } | 4665 | } |
4666 | 4666 | ||
4667 | static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, | 4667 | static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, |
4668 | ext4_lblk_t len, int flags, int mode) | 4668 | ext4_lblk_t len, loff_t new_size, |
4669 | int flags, int mode) | ||
4669 | { | 4670 | { |
4670 | struct inode *inode = file_inode(file); | 4671 | struct inode *inode = file_inode(file); |
4671 | handle_t *handle; | 4672 | handle_t *handle; |
@@ -4674,8 +4675,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, | |||
4674 | int retries = 0; | 4675 | int retries = 0; |
4675 | struct ext4_map_blocks map; | 4676 | struct ext4_map_blocks map; |
4676 | unsigned int credits; | 4677 | unsigned int credits; |
4678 | loff_t epos; | ||
4677 | 4679 | ||
4678 | map.m_lblk = offset; | 4680 | map.m_lblk = offset; |
4681 | map.m_len = len; | ||
4679 | /* | 4682 | /* |
4680 | * Don't normalize the request if it can fit in one extent so | 4683 | * Don't normalize the request if it can fit in one extent so |
4681 | * that it doesn't get unnecessarily split into multiple | 4684 | * that it doesn't get unnecessarily split into multiple |
@@ -4690,9 +4693,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, | |||
4690 | credits = ext4_chunk_trans_blocks(inode, len); | 4693 | credits = ext4_chunk_trans_blocks(inode, len); |
4691 | 4694 | ||
4692 | retry: | 4695 | retry: |
4693 | while (ret >= 0 && ret < len) { | 4696 | while (ret >= 0 && len) { |
4694 | map.m_lblk = map.m_lblk + ret; | ||
4695 | map.m_len = len = len - ret; | ||
4696 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, | 4697 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, |
4697 | credits); | 4698 | credits); |
4698 | if (IS_ERR(handle)) { | 4699 | if (IS_ERR(handle)) { |
@@ -4709,6 +4710,21 @@ retry: | |||
4709 | ret2 = ext4_journal_stop(handle); | 4710 | ret2 = ext4_journal_stop(handle); |
4710 | break; | 4711 | break; |
4711 | } | 4712 | } |
4713 | map.m_lblk += ret; | ||
4714 | map.m_len = len = len - ret; | ||
4715 | epos = (loff_t)map.m_lblk << inode->i_blkbits; | ||
4716 | inode->i_ctime = ext4_current_time(inode); | ||
4717 | if (new_size) { | ||
4718 | if (epos > new_size) | ||
4719 | epos = new_size; | ||
4720 | if (ext4_update_inode_size(inode, epos) & 0x1) | ||
4721 | inode->i_mtime = inode->i_ctime; | ||
4722 | } else { | ||
4723 | if (epos > inode->i_size) | ||
4724 | ext4_set_inode_flag(inode, | ||
4725 | EXT4_INODE_EOFBLOCKS); | ||
4726 | } | ||
4727 | ext4_mark_inode_dirty(handle, inode); | ||
4712 | ret2 = ext4_journal_stop(handle); | 4728 | ret2 = ext4_journal_stop(handle); |
4713 | if (ret2) | 4729 | if (ret2) |
4714 | break; | 4730 | break; |
@@ -4731,7 +4747,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4731 | loff_t new_size = 0; | 4747 | loff_t new_size = 0; |
4732 | int ret = 0; | 4748 | int ret = 0; |
4733 | int flags; | 4749 | int flags; |
4734 | int partial; | 4750 | int credits; |
4751 | int partial_begin, partial_end; | ||
4735 | loff_t start, end; | 4752 | loff_t start, end; |
4736 | ext4_lblk_t lblk; | 4753 | ext4_lblk_t lblk; |
4737 | struct address_space *mapping = inode->i_mapping; | 4754 | struct address_space *mapping = inode->i_mapping; |
@@ -4771,7 +4788,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4771 | 4788 | ||
4772 | if (start < offset || end > offset + len) | 4789 | if (start < offset || end > offset + len) |
4773 | return -EINVAL; | 4790 | return -EINVAL; |
4774 | partial = (offset + len) & ((1 << blkbits) - 1); | 4791 | partial_begin = offset & ((1 << blkbits) - 1); |
4792 | partial_end = (offset + len) & ((1 << blkbits) - 1); | ||
4775 | 4793 | ||
4776 | lblk = start >> blkbits; | 4794 | lblk = start >> blkbits; |
4777 | max_blocks = (end >> blkbits); | 4795 | max_blocks = (end >> blkbits); |
@@ -4805,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4805 | * If we have a partial block after EOF we have to allocate | 4823 | * If we have a partial block after EOF we have to allocate |
4806 | * the entire block. | 4824 | * the entire block. |
4807 | */ | 4825 | */ |
4808 | if (partial) | 4826 | if (partial_end) |
4809 | max_blocks += 1; | 4827 | max_blocks += 1; |
4810 | } | 4828 | } |
4811 | 4829 | ||
@@ -4813,6 +4831,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4813 | 4831 | ||
4814 | /* Now release the pages and zero block aligned part of pages*/ | 4832 | /* Now release the pages and zero block aligned part of pages*/ |
4815 | truncate_pagecache_range(inode, start, end - 1); | 4833 | truncate_pagecache_range(inode, start, end - 1); |
4834 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
4816 | 4835 | ||
4817 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | 4836 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
4818 | ext4_inode_block_unlocked_dio(inode); | 4837 | ext4_inode_block_unlocked_dio(inode); |
@@ -4825,13 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4825 | if (ret) | 4844 | if (ret) |
4826 | goto out_dio; | 4845 | goto out_dio; |
4827 | 4846 | ||
4828 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, | 4847 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, |
4829 | mode); | 4848 | flags, mode); |
4830 | if (ret) | 4849 | if (ret) |
4831 | goto out_dio; | 4850 | goto out_dio; |
4832 | } | 4851 | } |
4852 | if (!partial_begin && !partial_end) | ||
4853 | goto out_dio; | ||
4833 | 4854 | ||
4834 | handle = ext4_journal_start(inode, EXT4_HT_MISC, 4); | 4855 | /* |
4856 | * In worst case we have to writeout two nonadjacent unwritten | ||
4857 | * blocks and update the inode | ||
4858 | */ | ||
4859 | credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1; | ||
4860 | if (ext4_should_journal_data(inode)) | ||
4861 | credits += 2; | ||
4862 | handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); | ||
4835 | if (IS_ERR(handle)) { | 4863 | if (IS_ERR(handle)) { |
4836 | ret = PTR_ERR(handle); | 4864 | ret = PTR_ERR(handle); |
4837 | ext4_std_error(inode->i_sb, ret); | 4865 | ext4_std_error(inode->i_sb, ret); |
@@ -4839,12 +4867,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4839 | } | 4867 | } |
4840 | 4868 | ||
4841 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4869 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
4842 | |||
4843 | if (new_size) { | 4870 | if (new_size) { |
4844 | if (new_size > i_size_read(inode)) | 4871 | ext4_update_inode_size(inode, new_size); |
4845 | i_size_write(inode, new_size); | ||
4846 | if (new_size > EXT4_I(inode)->i_disksize) | ||
4847 | ext4_update_i_disksize(inode, new_size); | ||
4848 | } else { | 4872 | } else { |
4849 | /* | 4873 | /* |
4850 | * Mark that we allocate beyond EOF so the subsequent truncate | 4874 | * Mark that we allocate beyond EOF so the subsequent truncate |
@@ -4853,7 +4877,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4853 | if ((offset + len) > i_size_read(inode)) | 4877 | if ((offset + len) > i_size_read(inode)) |
4854 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | 4878 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
4855 | } | 4879 | } |
4856 | |||
4857 | ext4_mark_inode_dirty(handle, inode); | 4880 | ext4_mark_inode_dirty(handle, inode); |
4858 | 4881 | ||
4859 | /* Zero out partial block at the edges of the range */ | 4882 | /* Zero out partial block at the edges of the range */ |
@@ -4880,13 +4903,11 @@ out_mutex: | |||
4880 | long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | 4903 | long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) |
4881 | { | 4904 | { |
4882 | struct inode *inode = file_inode(file); | 4905 | struct inode *inode = file_inode(file); |
4883 | handle_t *handle; | ||
4884 | loff_t new_size = 0; | 4906 | loff_t new_size = 0; |
4885 | unsigned int max_blocks; | 4907 | unsigned int max_blocks; |
4886 | int ret = 0; | 4908 | int ret = 0; |
4887 | int flags; | 4909 | int flags; |
4888 | ext4_lblk_t lblk; | 4910 | ext4_lblk_t lblk; |
4889 | struct timespec tv; | ||
4890 | unsigned int blkbits = inode->i_blkbits; | 4911 | unsigned int blkbits = inode->i_blkbits; |
4891 | 4912 | ||
4892 | /* Return error if mode is not supported */ | 4913 | /* Return error if mode is not supported */ |
@@ -4937,36 +4958,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
4937 | goto out; | 4958 | goto out; |
4938 | } | 4959 | } |
4939 | 4960 | ||
4940 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); | 4961 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, |
4962 | flags, mode); | ||
4941 | if (ret) | 4963 | if (ret) |
4942 | goto out; | 4964 | goto out; |
4943 | 4965 | ||
4944 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | 4966 | if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) { |
4945 | if (IS_ERR(handle)) | 4967 | ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal, |
4946 | goto out; | 4968 | EXT4_I(inode)->i_sync_tid); |
4947 | |||
4948 | tv = inode->i_ctime = ext4_current_time(inode); | ||
4949 | |||
4950 | if (new_size) { | ||
4951 | if (new_size > i_size_read(inode)) { | ||
4952 | i_size_write(inode, new_size); | ||
4953 | inode->i_mtime = tv; | ||
4954 | } | ||
4955 | if (new_size > EXT4_I(inode)->i_disksize) | ||
4956 | ext4_update_i_disksize(inode, new_size); | ||
4957 | } else { | ||
4958 | /* | ||
4959 | * Mark that we allocate beyond EOF so the subsequent truncate | ||
4960 | * can proceed even if the new size is the same as i_size. | ||
4961 | */ | ||
4962 | if ((offset + len) > i_size_read(inode)) | ||
4963 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
4964 | } | 4969 | } |
4965 | ext4_mark_inode_dirty(handle, inode); | ||
4966 | if (file->f_flags & O_SYNC) | ||
4967 | ext4_handle_sync(handle); | ||
4968 | |||
4969 | ext4_journal_stop(handle); | ||
4970 | out: | 4970 | out: |
4971 | mutex_unlock(&inode->i_mutex); | 4971 | mutex_unlock(&inode->i_mutex); |
4972 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); | 4972 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 367a60c07cf0..3aa26e9117c4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1055,27 +1055,11 @@ static int ext4_write_end(struct file *file, | |||
1055 | } else | 1055 | } else |
1056 | copied = block_write_end(file, mapping, pos, | 1056 | copied = block_write_end(file, mapping, pos, |
1057 | len, copied, page, fsdata); | 1057 | len, copied, page, fsdata); |
1058 | |||
1059 | /* | 1058 | /* |
1060 | * No need to use i_size_read() here, the i_size | 1059 | * it's important to update i_size while still holding page lock: |
1061 | * cannot change under us because we hole i_mutex. | ||
1062 | * | ||
1063 | * But it's important to update i_size while still holding page lock: | ||
1064 | * page writeout could otherwise come in and zero beyond i_size. | 1060 | * page writeout could otherwise come in and zero beyond i_size. |
1065 | */ | 1061 | */ |
1066 | if (pos + copied > inode->i_size) { | 1062 | i_size_changed = ext4_update_inode_size(inode, pos + copied); |
1067 | i_size_write(inode, pos + copied); | ||
1068 | i_size_changed = 1; | ||
1069 | } | ||
1070 | |||
1071 | if (pos + copied > EXT4_I(inode)->i_disksize) { | ||
1072 | /* We need to mark inode dirty even if | ||
1073 | * new_i_size is less that inode->i_size | ||
1074 | * but greater than i_disksize. (hint delalloc) | ||
1075 | */ | ||
1076 | ext4_update_i_disksize(inode, (pos + copied)); | ||
1077 | i_size_changed = 1; | ||
1078 | } | ||
1079 | unlock_page(page); | 1063 | unlock_page(page); |
1080 | page_cache_release(page); | 1064 | page_cache_release(page); |
1081 | 1065 | ||
@@ -1123,7 +1107,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1123 | int ret = 0, ret2; | 1107 | int ret = 0, ret2; |
1124 | int partial = 0; | 1108 | int partial = 0; |
1125 | unsigned from, to; | 1109 | unsigned from, to; |
1126 | loff_t new_i_size; | 1110 | int size_changed = 0; |
1127 | 1111 | ||
1128 | trace_ext4_journalled_write_end(inode, pos, len, copied); | 1112 | trace_ext4_journalled_write_end(inode, pos, len, copied); |
1129 | from = pos & (PAGE_CACHE_SIZE - 1); | 1113 | from = pos & (PAGE_CACHE_SIZE - 1); |
@@ -1146,20 +1130,18 @@ static int ext4_journalled_write_end(struct file *file, | |||
1146 | if (!partial) | 1130 | if (!partial) |
1147 | SetPageUptodate(page); | 1131 | SetPageUptodate(page); |
1148 | } | 1132 | } |
1149 | new_i_size = pos + copied; | 1133 | size_changed = ext4_update_inode_size(inode, pos + copied); |
1150 | if (new_i_size > inode->i_size) | ||
1151 | i_size_write(inode, pos+copied); | ||
1152 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 1134 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
1153 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; | 1135 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; |
1154 | if (new_i_size > EXT4_I(inode)->i_disksize) { | 1136 | unlock_page(page); |
1155 | ext4_update_i_disksize(inode, new_i_size); | 1137 | page_cache_release(page); |
1138 | |||
1139 | if (size_changed) { | ||
1156 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1140 | ret2 = ext4_mark_inode_dirty(handle, inode); |
1157 | if (!ret) | 1141 | if (!ret) |
1158 | ret = ret2; | 1142 | ret = ret2; |
1159 | } | 1143 | } |
1160 | 1144 | ||
1161 | unlock_page(page); | ||
1162 | page_cache_release(page); | ||
1163 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 1145 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
1164 | /* if we have allocated more blocks and copied | 1146 | /* if we have allocated more blocks and copied |
1165 | * less. We will have blocks allocated outside | 1147 | * less. We will have blocks allocated outside |
@@ -2095,6 +2077,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, | |||
2095 | struct ext4_map_blocks *map = &mpd->map; | 2077 | struct ext4_map_blocks *map = &mpd->map; |
2096 | int err; | 2078 | int err; |
2097 | loff_t disksize; | 2079 | loff_t disksize; |
2080 | int progress = 0; | ||
2098 | 2081 | ||
2099 | mpd->io_submit.io_end->offset = | 2082 | mpd->io_submit.io_end->offset = |
2100 | ((loff_t)map->m_lblk) << inode->i_blkbits; | 2083 | ((loff_t)map->m_lblk) << inode->i_blkbits; |
@@ -2111,8 +2094,11 @@ static int mpage_map_and_submit_extent(handle_t *handle, | |||
2111 | * is non-zero, a commit should free up blocks. | 2094 | * is non-zero, a commit should free up blocks. |
2112 | */ | 2095 | */ |
2113 | if ((err == -ENOMEM) || | 2096 | if ((err == -ENOMEM) || |
2114 | (err == -ENOSPC && ext4_count_free_clusters(sb))) | 2097 | (err == -ENOSPC && ext4_count_free_clusters(sb))) { |
2098 | if (progress) | ||
2099 | goto update_disksize; | ||
2115 | return err; | 2100 | return err; |
2101 | } | ||
2116 | ext4_msg(sb, KERN_CRIT, | 2102 | ext4_msg(sb, KERN_CRIT, |
2117 | "Delayed block allocation failed for " | 2103 | "Delayed block allocation failed for " |
2118 | "inode %lu at logical offset %llu with" | 2104 | "inode %lu at logical offset %llu with" |
@@ -2129,15 +2115,17 @@ static int mpage_map_and_submit_extent(handle_t *handle, | |||
2129 | *give_up_on_write = true; | 2115 | *give_up_on_write = true; |
2130 | return err; | 2116 | return err; |
2131 | } | 2117 | } |
2118 | progress = 1; | ||
2132 | /* | 2119 | /* |
2133 | * Update buffer state, submit mapped pages, and get us new | 2120 | * Update buffer state, submit mapped pages, and get us new |
2134 | * extent to map | 2121 | * extent to map |
2135 | */ | 2122 | */ |
2136 | err = mpage_map_and_submit_buffers(mpd); | 2123 | err = mpage_map_and_submit_buffers(mpd); |
2137 | if (err < 0) | 2124 | if (err < 0) |
2138 | return err; | 2125 | goto update_disksize; |
2139 | } while (map->m_len); | 2126 | } while (map->m_len); |
2140 | 2127 | ||
2128 | update_disksize: | ||
2141 | /* | 2129 | /* |
2142 | * Update on-disk size after IO is submitted. Races with | 2130 | * Update on-disk size after IO is submitted. Races with |
2143 | * truncate are avoided by checking i_size under i_data_sem. | 2131 | * truncate are avoided by checking i_size under i_data_sem. |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 956027711faf..8b0f9ef517d6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -1412,6 +1412,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1412 | int last = first + count - 1; | 1412 | int last = first + count - 1; |
1413 | struct super_block *sb = e4b->bd_sb; | 1413 | struct super_block *sb = e4b->bd_sb; |
1414 | 1414 | ||
1415 | if (WARN_ON(count == 0)) | ||
1416 | return; | ||
1415 | BUG_ON(last >= (sb->s_blocksize << 3)); | 1417 | BUG_ON(last >= (sb->s_blocksize << 3)); |
1416 | assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); | 1418 | assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); |
1417 | /* Don't bother if the block group is corrupt. */ | 1419 | /* Don't bother if the block group is corrupt. */ |
@@ -3221,6 +3223,8 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) | |||
3221 | int err; | 3223 | int err; |
3222 | 3224 | ||
3223 | if (pa == NULL) { | 3225 | if (pa == NULL) { |
3226 | if (ac->ac_f_ex.fe_len == 0) | ||
3227 | return; | ||
3224 | err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b); | 3228 | err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b); |
3225 | if (err) { | 3229 | if (err) { |
3226 | /* | 3230 | /* |
@@ -3235,6 +3239,7 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) | |||
3235 | mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start, | 3239 | mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start, |
3236 | ac->ac_f_ex.fe_len); | 3240 | ac->ac_f_ex.fe_len); |
3237 | ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group); | 3241 | ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group); |
3242 | ext4_mb_unload_buddy(&e4b); | ||
3238 | return; | 3243 | return; |
3239 | } | 3244 | } |
3240 | if (pa->pa_type == MB_INODE_PA) | 3245 | if (pa->pa_type == MB_INODE_PA) |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b147a67baa0d..603e4ebbd0ac 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1227,7 +1227,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
1227 | buffer */ | 1227 | buffer */ |
1228 | int num = 0; | 1228 | int num = 0; |
1229 | ext4_lblk_t nblocks; | 1229 | ext4_lblk_t nblocks; |
1230 | int i, err; | 1230 | int i, err = 0; |
1231 | int namelen; | 1231 | int namelen; |
1232 | 1232 | ||
1233 | *res_dir = NULL; | 1233 | *res_dir = NULL; |
@@ -1264,7 +1264,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
1264 | * return. Otherwise, fall back to doing a search the | 1264 | * return. Otherwise, fall back to doing a search the |
1265 | * old fashioned way. | 1265 | * old fashioned way. |
1266 | */ | 1266 | */ |
1267 | if (bh || (err != ERR_BAD_DX_DIR)) | 1267 | if (err == -ENOENT) |
1268 | return NULL; | ||
1269 | if (err && err != ERR_BAD_DX_DIR) | ||
1270 | return ERR_PTR(err); | ||
1271 | if (bh) | ||
1268 | return bh; | 1272 | return bh; |
1269 | dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " | 1273 | dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " |
1270 | "falling back\n")); | 1274 | "falling back\n")); |
@@ -1295,6 +1299,11 @@ restart: | |||
1295 | } | 1299 | } |
1296 | num++; | 1300 | num++; |
1297 | bh = ext4_getblk(NULL, dir, b++, 0, &err); | 1301 | bh = ext4_getblk(NULL, dir, b++, 0, &err); |
1302 | if (unlikely(err)) { | ||
1303 | if (ra_max == 0) | ||
1304 | return ERR_PTR(err); | ||
1305 | break; | ||
1306 | } | ||
1298 | bh_use[ra_max] = bh; | 1307 | bh_use[ra_max] = bh; |
1299 | if (bh) | 1308 | if (bh) |
1300 | ll_rw_block(READ | REQ_META | REQ_PRIO, | 1309 | ll_rw_block(READ | REQ_META | REQ_PRIO, |
@@ -1417,6 +1426,8 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi | |||
1417 | return ERR_PTR(-ENAMETOOLONG); | 1426 | return ERR_PTR(-ENAMETOOLONG); |
1418 | 1427 | ||
1419 | bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); | 1428 | bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); |
1429 | if (IS_ERR(bh)) | ||
1430 | return (struct dentry *) bh; | ||
1420 | inode = NULL; | 1431 | inode = NULL; |
1421 | if (bh) { | 1432 | if (bh) { |
1422 | __u32 ino = le32_to_cpu(de->inode); | 1433 | __u32 ino = le32_to_cpu(de->inode); |
@@ -1450,6 +1461,8 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1450 | struct buffer_head *bh; | 1461 | struct buffer_head *bh; |
1451 | 1462 | ||
1452 | bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL); | 1463 | bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL); |
1464 | if (IS_ERR(bh)) | ||
1465 | return (struct dentry *) bh; | ||
1453 | if (!bh) | 1466 | if (!bh) |
1454 | return ERR_PTR(-ENOENT); | 1467 | return ERR_PTR(-ENOENT); |
1455 | ino = le32_to_cpu(de->inode); | 1468 | ino = le32_to_cpu(de->inode); |
@@ -2727,6 +2740,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) | |||
2727 | 2740 | ||
2728 | retval = -ENOENT; | 2741 | retval = -ENOENT; |
2729 | bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); | 2742 | bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); |
2743 | if (IS_ERR(bh)) | ||
2744 | return PTR_ERR(bh); | ||
2730 | if (!bh) | 2745 | if (!bh) |
2731 | goto end_rmdir; | 2746 | goto end_rmdir; |
2732 | 2747 | ||
@@ -2794,6 +2809,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2794 | 2809 | ||
2795 | retval = -ENOENT; | 2810 | retval = -ENOENT; |
2796 | bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); | 2811 | bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); |
2812 | if (IS_ERR(bh)) | ||
2813 | return PTR_ERR(bh); | ||
2797 | if (!bh) | 2814 | if (!bh) |
2798 | goto end_unlink; | 2815 | goto end_unlink; |
2799 | 2816 | ||
@@ -3121,6 +3138,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, | |||
3121 | struct ext4_dir_entry_2 *de; | 3138 | struct ext4_dir_entry_2 *de; |
3122 | 3139 | ||
3123 | bh = ext4_find_entry(dir, d_name, &de, NULL); | 3140 | bh = ext4_find_entry(dir, d_name, &de, NULL); |
3141 | if (IS_ERR(bh)) | ||
3142 | return PTR_ERR(bh); | ||
3124 | if (bh) { | 3143 | if (bh) { |
3125 | retval = ext4_delete_entry(handle, dir, de, bh); | 3144 | retval = ext4_delete_entry(handle, dir, de, bh); |
3126 | brelse(bh); | 3145 | brelse(bh); |
@@ -3128,7 +3147,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, | |||
3128 | return retval; | 3147 | return retval; |
3129 | } | 3148 | } |
3130 | 3149 | ||
3131 | static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) | 3150 | static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent, |
3151 | int force_reread) | ||
3132 | { | 3152 | { |
3133 | int retval; | 3153 | int retval; |
3134 | /* | 3154 | /* |
@@ -3140,7 +3160,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) | |||
3140 | if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino || | 3160 | if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino || |
3141 | ent->de->name_len != ent->dentry->d_name.len || | 3161 | ent->de->name_len != ent->dentry->d_name.len || |
3142 | strncmp(ent->de->name, ent->dentry->d_name.name, | 3162 | strncmp(ent->de->name, ent->dentry->d_name.name, |
3143 | ent->de->name_len)) { | 3163 | ent->de->name_len) || |
3164 | force_reread) { | ||
3144 | retval = ext4_find_delete_entry(handle, ent->dir, | 3165 | retval = ext4_find_delete_entry(handle, ent->dir, |
3145 | &ent->dentry->d_name); | 3166 | &ent->dentry->d_name); |
3146 | } else { | 3167 | } else { |
@@ -3191,6 +3212,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3191 | .dentry = new_dentry, | 3212 | .dentry = new_dentry, |
3192 | .inode = new_dentry->d_inode, | 3213 | .inode = new_dentry->d_inode, |
3193 | }; | 3214 | }; |
3215 | int force_reread; | ||
3194 | int retval; | 3216 | int retval; |
3195 | 3217 | ||
3196 | dquot_initialize(old.dir); | 3218 | dquot_initialize(old.dir); |
@@ -3202,6 +3224,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3202 | dquot_initialize(new.inode); | 3224 | dquot_initialize(new.inode); |
3203 | 3225 | ||
3204 | old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); | 3226 | old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); |
3227 | if (IS_ERR(old.bh)) | ||
3228 | return PTR_ERR(old.bh); | ||
3205 | /* | 3229 | /* |
3206 | * Check for inode number is _not_ due to possible IO errors. | 3230 | * Check for inode number is _not_ due to possible IO errors. |
3207 | * We might rmdir the source, keep it as pwd of some process | 3231 | * We might rmdir the source, keep it as pwd of some process |
@@ -3214,6 +3238,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3214 | 3238 | ||
3215 | new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, | 3239 | new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, |
3216 | &new.de, &new.inlined); | 3240 | &new.de, &new.inlined); |
3241 | if (IS_ERR(new.bh)) { | ||
3242 | retval = PTR_ERR(new.bh); | ||
3243 | new.bh = NULL; | ||
3244 | goto end_rename; | ||
3245 | } | ||
3217 | if (new.bh) { | 3246 | if (new.bh) { |
3218 | if (!new.inode) { | 3247 | if (!new.inode) { |
3219 | brelse(new.bh); | 3248 | brelse(new.bh); |
@@ -3246,6 +3275,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3246 | if (retval) | 3275 | if (retval) |
3247 | goto end_rename; | 3276 | goto end_rename; |
3248 | } | 3277 | } |
3278 | /* | ||
3279 | * If we're renaming a file within an inline_data dir and adding or | ||
3280 | * setting the new dirent causes a conversion from inline_data to | ||
3281 | * extents/blockmap, we need to force the dirent delete code to | ||
3282 | * re-read the directory, or else we end up trying to delete a dirent | ||
3283 | * from what is now the extent tree root (or a block map). | ||
3284 | */ | ||
3285 | force_reread = (new.dir->i_ino == old.dir->i_ino && | ||
3286 | ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); | ||
3249 | if (!new.bh) { | 3287 | if (!new.bh) { |
3250 | retval = ext4_add_entry(handle, new.dentry, old.inode); | 3288 | retval = ext4_add_entry(handle, new.dentry, old.inode); |
3251 | if (retval) | 3289 | if (retval) |
@@ -3256,6 +3294,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3256 | if (retval) | 3294 | if (retval) |
3257 | goto end_rename; | 3295 | goto end_rename; |
3258 | } | 3296 | } |
3297 | if (force_reread) | ||
3298 | force_reread = !ext4_test_inode_flag(new.dir, | ||
3299 | EXT4_INODE_INLINE_DATA); | ||
3259 | 3300 | ||
3260 | /* | 3301 | /* |
3261 | * Like most other Unix systems, set the ctime for inodes on a | 3302 | * Like most other Unix systems, set the ctime for inodes on a |
@@ -3267,7 +3308,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3267 | /* | 3308 | /* |
3268 | * ok, that's it | 3309 | * ok, that's it |
3269 | */ | 3310 | */ |
3270 | ext4_rename_delete(handle, &old); | 3311 | ext4_rename_delete(handle, &old, force_reread); |
3271 | 3312 | ||
3272 | if (new.inode) { | 3313 | if (new.inode) { |
3273 | ext4_dec_count(handle, new.inode); | 3314 | ext4_dec_count(handle, new.inode); |
@@ -3330,6 +3371,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3330 | 3371 | ||
3331 | old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, | 3372 | old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, |
3332 | &old.de, &old.inlined); | 3373 | &old.de, &old.inlined); |
3374 | if (IS_ERR(old.bh)) | ||
3375 | return PTR_ERR(old.bh); | ||
3333 | /* | 3376 | /* |
3334 | * Check for inode number is _not_ due to possible IO errors. | 3377 | * Check for inode number is _not_ due to possible IO errors. |
3335 | * We might rmdir the source, keep it as pwd of some process | 3378 | * We might rmdir the source, keep it as pwd of some process |
@@ -3342,6 +3385,11 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3342 | 3385 | ||
3343 | new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, | 3386 | new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, |
3344 | &new.de, &new.inlined); | 3387 | &new.de, &new.inlined); |
3388 | if (IS_ERR(new.bh)) { | ||
3389 | retval = PTR_ERR(new.bh); | ||
3390 | new.bh = NULL; | ||
3391 | goto end_rename; | ||
3392 | } | ||
3345 | 3393 | ||
3346 | /* RENAME_EXCHANGE case: old *and* new must both exist */ | 3394 | /* RENAME_EXCHANGE case: old *and* new must both exist */ |
3347 | if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino) | 3395 | if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino) |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index bb0e80f03e2e..1e43b905ff98 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -575,6 +575,7 @@ handle_bb: | |||
575 | bh = bclean(handle, sb, block); | 575 | bh = bclean(handle, sb, block); |
576 | if (IS_ERR(bh)) { | 576 | if (IS_ERR(bh)) { |
577 | err = PTR_ERR(bh); | 577 | err = PTR_ERR(bh); |
578 | bh = NULL; | ||
578 | goto out; | 579 | goto out; |
579 | } | 580 | } |
580 | overhead = ext4_group_overhead_blocks(sb, group); | 581 | overhead = ext4_group_overhead_blocks(sb, group); |
@@ -603,6 +604,7 @@ handle_ib: | |||
603 | bh = bclean(handle, sb, block); | 604 | bh = bclean(handle, sb, block); |
604 | if (IS_ERR(bh)) { | 605 | if (IS_ERR(bh)) { |
605 | err = PTR_ERR(bh); | 606 | err = PTR_ERR(bh); |
607 | bh = NULL; | ||
606 | goto out; | 608 | goto out; |
607 | } | 609 | } |
608 | 610 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 32b43ad154b9..05c159218bc2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb) | |||
3181 | 3181 | ||
3182 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3182 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
3183 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | 3183 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { |
3184 | /* journal checksum v2 */ | 3184 | /* journal checksum v3 */ |
3185 | compat = 0; | 3185 | compat = 0; |
3186 | incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; | 3186 | incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; |
3187 | } else { | 3187 | } else { |
3188 | /* journal checksum v1 */ | 3188 | /* journal checksum v1 */ |
3189 | compat = JBD2_FEATURE_COMPAT_CHECKSUM; | 3189 | compat = JBD2_FEATURE_COMPAT_CHECKSUM; |
@@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb) | |||
3205 | jbd2_journal_clear_features(sbi->s_journal, | 3205 | jbd2_journal_clear_features(sbi->s_journal, |
3206 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | 3206 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, |
3207 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | | 3207 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | |
3208 | JBD2_FEATURE_INCOMPAT_CSUM_V3 | | ||
3208 | JBD2_FEATURE_INCOMPAT_CSUM_V2); | 3209 | JBD2_FEATURE_INCOMPAT_CSUM_V2); |
3209 | } | 3210 | } |
3210 | 3211 | ||
@@ -3891,7 +3892,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3891 | /* Register extent status tree shrinker */ | 3892 | /* Register extent status tree shrinker */ |
3892 | ext4_es_register_shrinker(sbi); | 3893 | ext4_es_register_shrinker(sbi); |
3893 | 3894 | ||
3894 | if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) { | 3895 | err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL); |
3896 | if (err) { | ||
3895 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | 3897 | ext4_msg(sb, KERN_ERR, "insufficient memory"); |
3896 | goto failed_mount3; | 3898 | goto failed_mount3; |
3897 | } | 3899 | } |
@@ -4105,17 +4107,20 @@ no_journal: | |||
4105 | block = ext4_count_free_clusters(sb); | 4107 | block = ext4_count_free_clusters(sb); |
4106 | ext4_free_blocks_count_set(sbi->s_es, | 4108 | ext4_free_blocks_count_set(sbi->s_es, |
4107 | EXT4_C2B(sbi, block)); | 4109 | EXT4_C2B(sbi, block)); |
4108 | err = percpu_counter_init(&sbi->s_freeclusters_counter, block); | 4110 | err = percpu_counter_init(&sbi->s_freeclusters_counter, block, |
4111 | GFP_KERNEL); | ||
4109 | if (!err) { | 4112 | if (!err) { |
4110 | unsigned long freei = ext4_count_free_inodes(sb); | 4113 | unsigned long freei = ext4_count_free_inodes(sb); |
4111 | sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); | 4114 | sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); |
4112 | err = percpu_counter_init(&sbi->s_freeinodes_counter, freei); | 4115 | err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, |
4116 | GFP_KERNEL); | ||
4113 | } | 4117 | } |
4114 | if (!err) | 4118 | if (!err) |
4115 | err = percpu_counter_init(&sbi->s_dirs_counter, | 4119 | err = percpu_counter_init(&sbi->s_dirs_counter, |
4116 | ext4_count_dirs(sb)); | 4120 | ext4_count_dirs(sb), GFP_KERNEL); |
4117 | if (!err) | 4121 | if (!err) |
4118 | err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0); | 4122 | err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, |
4123 | GFP_KERNEL); | ||
4119 | if (err) { | 4124 | if (err) { |
4120 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | 4125 | ext4_msg(sb, KERN_ERR, "insufficient memory"); |
4121 | goto failed_mount6; | 4126 | goto failed_mount6; |
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 214fe1054fce..736a348509f7 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig | |||
@@ -23,7 +23,7 @@ config F2FS_STAT_FS | |||
23 | mounted as f2fs. Each file shows the whole f2fs information. | 23 | mounted as f2fs. Each file shows the whole f2fs information. |
24 | 24 | ||
25 | /sys/kernel/debug/f2fs/status includes: | 25 | /sys/kernel/debug/f2fs/status includes: |
26 | - major file system information managed by f2fs currently | 26 | - major filesystem information managed by f2fs currently |
27 | - average SIT information about whole segments | 27 | - average SIT information about whole segments |
28 | - current memory footprint consumed by f2fs. | 28 | - current memory footprint consumed by f2fs. |
29 | 29 | ||
@@ -68,6 +68,6 @@ config F2FS_CHECK_FS | |||
68 | bool "F2FS consistency checking feature" | 68 | bool "F2FS consistency checking feature" |
69 | depends on F2FS_FS | 69 | depends on F2FS_FS |
70 | help | 70 | help |
71 | Enables BUG_ONs which check the file system consistency in runtime. | 71 | Enables BUG_ONs which check the filesystem consistency in runtime. |
72 | 72 | ||
73 | If you want to improve the performance, say N. | 73 | If you want to improve the performance, say N. |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6aeed5bada52..dd10a031c052 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
@@ -72,7 +72,22 @@ out: | |||
72 | return page; | 72 | return page; |
73 | } | 73 | } |
74 | 74 | ||
75 | static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) | 75 | struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) |
76 | { | ||
77 | bool readahead = false; | ||
78 | struct page *page; | ||
79 | |||
80 | page = find_get_page(META_MAPPING(sbi), index); | ||
81 | if (!page || (page && !PageUptodate(page))) | ||
82 | readahead = true; | ||
83 | f2fs_put_page(page, 0); | ||
84 | |||
85 | if (readahead) | ||
86 | ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); | ||
87 | return get_meta_page(sbi, index); | ||
88 | } | ||
89 | |||
90 | static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) | ||
76 | { | 91 | { |
77 | switch (type) { | 92 | switch (type) { |
78 | case META_NAT: | 93 | case META_NAT: |
@@ -82,6 +97,8 @@ static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) | |||
82 | case META_SSA: | 97 | case META_SSA: |
83 | case META_CP: | 98 | case META_CP: |
84 | return 0; | 99 | return 0; |
100 | case META_POR: | ||
101 | return MAX_BLKADDR(sbi); | ||
85 | default: | 102 | default: |
86 | BUG(); | 103 | BUG(); |
87 | } | 104 | } |
@@ -90,12 +107,12 @@ static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) | |||
90 | /* | 107 | /* |
91 | * Readahead CP/NAT/SIT/SSA pages | 108 | * Readahead CP/NAT/SIT/SSA pages |
92 | */ | 109 | */ |
93 | int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) | 110 | int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type) |
94 | { | 111 | { |
95 | block_t prev_blk_addr = 0; | 112 | block_t prev_blk_addr = 0; |
96 | struct page *page; | 113 | struct page *page; |
97 | int blkno = start; | 114 | block_t blkno = start; |
98 | int max_blks = get_max_meta_blks(sbi, type); | 115 | block_t max_blks = get_max_meta_blks(sbi, type); |
99 | 116 | ||
100 | struct f2fs_io_info fio = { | 117 | struct f2fs_io_info fio = { |
101 | .type = META, | 118 | .type = META, |
@@ -125,7 +142,11 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) | |||
125 | break; | 142 | break; |
126 | case META_SSA: | 143 | case META_SSA: |
127 | case META_CP: | 144 | case META_CP: |
128 | /* get ssa/cp block addr */ | 145 | case META_POR: |
146 | if (unlikely(blkno >= max_blks)) | ||
147 | goto out; | ||
148 | if (unlikely(blkno < SEG0_BLKADDR(sbi))) | ||
149 | goto out; | ||
129 | blk_addr = blkno; | 150 | blk_addr = blkno; |
130 | break; | 151 | break; |
131 | default: | 152 | default: |
@@ -151,8 +172,7 @@ out: | |||
151 | static int f2fs_write_meta_page(struct page *page, | 172 | static int f2fs_write_meta_page(struct page *page, |
152 | struct writeback_control *wbc) | 173 | struct writeback_control *wbc) |
153 | { | 174 | { |
154 | struct inode *inode = page->mapping->host; | 175 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); |
155 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
156 | 176 | ||
157 | trace_f2fs_writepage(page, META); | 177 | trace_f2fs_writepage(page, META); |
158 | 178 | ||
@@ -160,14 +180,11 @@ static int f2fs_write_meta_page(struct page *page, | |||
160 | goto redirty_out; | 180 | goto redirty_out; |
161 | if (wbc->for_reclaim) | 181 | if (wbc->for_reclaim) |
162 | goto redirty_out; | 182 | goto redirty_out; |
163 | 183 | if (unlikely(f2fs_cp_error(sbi))) | |
164 | /* Should not write any meta pages, if any IO error was occurred */ | 184 | goto redirty_out; |
165 | if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) | ||
166 | goto no_write; | ||
167 | 185 | ||
168 | f2fs_wait_on_page_writeback(page, META); | 186 | f2fs_wait_on_page_writeback(page, META); |
169 | write_meta_page(sbi, page); | 187 | write_meta_page(sbi, page); |
170 | no_write: | ||
171 | dec_page_count(sbi, F2FS_DIRTY_META); | 188 | dec_page_count(sbi, F2FS_DIRTY_META); |
172 | unlock_page(page); | 189 | unlock_page(page); |
173 | return 0; | 190 | return 0; |
@@ -180,7 +197,7 @@ redirty_out: | |||
180 | static int f2fs_write_meta_pages(struct address_space *mapping, | 197 | static int f2fs_write_meta_pages(struct address_space *mapping, |
181 | struct writeback_control *wbc) | 198 | struct writeback_control *wbc) |
182 | { | 199 | { |
183 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | 200 | struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); |
184 | long diff, written; | 201 | long diff, written; |
185 | 202 | ||
186 | trace_f2fs_writepages(mapping->host, wbc, META); | 203 | trace_f2fs_writepages(mapping->host, wbc, META); |
@@ -262,15 +279,12 @@ continue_unlock: | |||
262 | 279 | ||
263 | static int f2fs_set_meta_page_dirty(struct page *page) | 280 | static int f2fs_set_meta_page_dirty(struct page *page) |
264 | { | 281 | { |
265 | struct address_space *mapping = page->mapping; | ||
266 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | ||
267 | |||
268 | trace_f2fs_set_page_dirty(page, META); | 282 | trace_f2fs_set_page_dirty(page, META); |
269 | 283 | ||
270 | SetPageUptodate(page); | 284 | SetPageUptodate(page); |
271 | if (!PageDirty(page)) { | 285 | if (!PageDirty(page)) { |
272 | __set_page_dirty_nobuffers(page); | 286 | __set_page_dirty_nobuffers(page); |
273 | inc_page_count(sbi, F2FS_DIRTY_META); | 287 | inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); |
274 | return 1; | 288 | return 1; |
275 | } | 289 | } |
276 | return 0; | 290 | return 0; |
@@ -348,7 +362,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) | |||
348 | return e ? true : false; | 362 | return e ? true : false; |
349 | } | 363 | } |
350 | 364 | ||
351 | static void release_dirty_inode(struct f2fs_sb_info *sbi) | 365 | void release_dirty_inode(struct f2fs_sb_info *sbi) |
352 | { | 366 | { |
353 | struct ino_entry *e, *tmp; | 367 | struct ino_entry *e, *tmp; |
354 | int i; | 368 | int i; |
@@ -381,7 +395,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) | |||
381 | void release_orphan_inode(struct f2fs_sb_info *sbi) | 395 | void release_orphan_inode(struct f2fs_sb_info *sbi) |
382 | { | 396 | { |
383 | spin_lock(&sbi->ino_lock[ORPHAN_INO]); | 397 | spin_lock(&sbi->ino_lock[ORPHAN_INO]); |
384 | f2fs_bug_on(sbi->n_orphans == 0); | 398 | f2fs_bug_on(sbi, sbi->n_orphans == 0); |
385 | sbi->n_orphans--; | 399 | sbi->n_orphans--; |
386 | spin_unlock(&sbi->ino_lock[ORPHAN_INO]); | 400 | spin_unlock(&sbi->ino_lock[ORPHAN_INO]); |
387 | } | 401 | } |
@@ -401,7 +415,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |||
401 | static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | 415 | static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) |
402 | { | 416 | { |
403 | struct inode *inode = f2fs_iget(sbi->sb, ino); | 417 | struct inode *inode = f2fs_iget(sbi->sb, ino); |
404 | f2fs_bug_on(IS_ERR(inode)); | 418 | f2fs_bug_on(sbi, IS_ERR(inode)); |
405 | clear_nlink(inode); | 419 | clear_nlink(inode); |
406 | 420 | ||
407 | /* truncate all the data during iput */ | 421 | /* truncate all the data during iput */ |
@@ -446,8 +460,8 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) | |||
446 | struct f2fs_orphan_block *orphan_blk = NULL; | 460 | struct f2fs_orphan_block *orphan_blk = NULL; |
447 | unsigned int nentries = 0; | 461 | unsigned int nentries = 0; |
448 | unsigned short index; | 462 | unsigned short index; |
449 | unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + | 463 | unsigned short orphan_blocks = |
450 | (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); | 464 | (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans); |
451 | struct page *page = NULL; | 465 | struct page *page = NULL; |
452 | struct ino_entry *orphan = NULL; | 466 | struct ino_entry *orphan = NULL; |
453 | 467 | ||
@@ -462,7 +476,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) | |||
462 | list_for_each_entry(orphan, head, list) { | 476 | list_for_each_entry(orphan, head, list) { |
463 | if (!page) { | 477 | if (!page) { |
464 | page = find_get_page(META_MAPPING(sbi), start_blk++); | 478 | page = find_get_page(META_MAPPING(sbi), start_blk++); |
465 | f2fs_bug_on(!page); | 479 | f2fs_bug_on(sbi, !page); |
466 | orphan_blk = | 480 | orphan_blk = |
467 | (struct f2fs_orphan_block *)page_address(page); | 481 | (struct f2fs_orphan_block *)page_address(page); |
468 | memset(orphan_blk, 0, sizeof(*orphan_blk)); | 482 | memset(orphan_blk, 0, sizeof(*orphan_blk)); |
@@ -622,7 +636,7 @@ fail_no_cp: | |||
622 | 636 | ||
623 | static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) | 637 | static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) |
624 | { | 638 | { |
625 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 639 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
626 | 640 | ||
627 | if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) | 641 | if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) |
628 | return -EEXIST; | 642 | return -EEXIST; |
@@ -634,32 +648,38 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) | |||
634 | return 0; | 648 | return 0; |
635 | } | 649 | } |
636 | 650 | ||
637 | void set_dirty_dir_page(struct inode *inode, struct page *page) | 651 | void update_dirty_page(struct inode *inode, struct page *page) |
638 | { | 652 | { |
639 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 653 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
640 | struct dir_inode_entry *new; | 654 | struct dir_inode_entry *new; |
641 | int ret = 0; | 655 | int ret = 0; |
642 | 656 | ||
643 | if (!S_ISDIR(inode->i_mode)) | 657 | if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) |
644 | return; | 658 | return; |
645 | 659 | ||
660 | if (!S_ISDIR(inode->i_mode)) { | ||
661 | inode_inc_dirty_pages(inode); | ||
662 | goto out; | ||
663 | } | ||
664 | |||
646 | new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); | 665 | new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); |
647 | new->inode = inode; | 666 | new->inode = inode; |
648 | INIT_LIST_HEAD(&new->list); | 667 | INIT_LIST_HEAD(&new->list); |
649 | 668 | ||
650 | spin_lock(&sbi->dir_inode_lock); | 669 | spin_lock(&sbi->dir_inode_lock); |
651 | ret = __add_dirty_inode(inode, new); | 670 | ret = __add_dirty_inode(inode, new); |
652 | inode_inc_dirty_dents(inode); | 671 | inode_inc_dirty_pages(inode); |
653 | SetPagePrivate(page); | ||
654 | spin_unlock(&sbi->dir_inode_lock); | 672 | spin_unlock(&sbi->dir_inode_lock); |
655 | 673 | ||
656 | if (ret) | 674 | if (ret) |
657 | kmem_cache_free(inode_entry_slab, new); | 675 | kmem_cache_free(inode_entry_slab, new); |
676 | out: | ||
677 | SetPagePrivate(page); | ||
658 | } | 678 | } |
659 | 679 | ||
660 | void add_dirty_dir_inode(struct inode *inode) | 680 | void add_dirty_dir_inode(struct inode *inode) |
661 | { | 681 | { |
662 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 682 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
663 | struct dir_inode_entry *new = | 683 | struct dir_inode_entry *new = |
664 | f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); | 684 | f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); |
665 | int ret = 0; | 685 | int ret = 0; |
@@ -677,14 +697,14 @@ void add_dirty_dir_inode(struct inode *inode) | |||
677 | 697 | ||
678 | void remove_dirty_dir_inode(struct inode *inode) | 698 | void remove_dirty_dir_inode(struct inode *inode) |
679 | { | 699 | { |
680 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 700 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
681 | struct dir_inode_entry *entry; | 701 | struct dir_inode_entry *entry; |
682 | 702 | ||
683 | if (!S_ISDIR(inode->i_mode)) | 703 | if (!S_ISDIR(inode->i_mode)) |
684 | return; | 704 | return; |
685 | 705 | ||
686 | spin_lock(&sbi->dir_inode_lock); | 706 | spin_lock(&sbi->dir_inode_lock); |
687 | if (get_dirty_dents(inode) || | 707 | if (get_dirty_pages(inode) || |
688 | !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { | 708 | !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { |
689 | spin_unlock(&sbi->dir_inode_lock); | 709 | spin_unlock(&sbi->dir_inode_lock); |
690 | return; | 710 | return; |
@@ -737,7 +757,7 @@ retry: | |||
737 | /* | 757 | /* |
738 | * Freeze all the FS-operations for checkpoint. | 758 | * Freeze all the FS-operations for checkpoint. |
739 | */ | 759 | */ |
740 | static void block_operations(struct f2fs_sb_info *sbi) | 760 | static int block_operations(struct f2fs_sb_info *sbi) |
741 | { | 761 | { |
742 | struct writeback_control wbc = { | 762 | struct writeback_control wbc = { |
743 | .sync_mode = WB_SYNC_ALL, | 763 | .sync_mode = WB_SYNC_ALL, |
@@ -745,6 +765,7 @@ static void block_operations(struct f2fs_sb_info *sbi) | |||
745 | .for_reclaim = 0, | 765 | .for_reclaim = 0, |
746 | }; | 766 | }; |
747 | struct blk_plug plug; | 767 | struct blk_plug plug; |
768 | int err = 0; | ||
748 | 769 | ||
749 | blk_start_plug(&plug); | 770 | blk_start_plug(&plug); |
750 | 771 | ||
@@ -754,11 +775,15 @@ retry_flush_dents: | |||
754 | if (get_pages(sbi, F2FS_DIRTY_DENTS)) { | 775 | if (get_pages(sbi, F2FS_DIRTY_DENTS)) { |
755 | f2fs_unlock_all(sbi); | 776 | f2fs_unlock_all(sbi); |
756 | sync_dirty_dir_inodes(sbi); | 777 | sync_dirty_dir_inodes(sbi); |
778 | if (unlikely(f2fs_cp_error(sbi))) { | ||
779 | err = -EIO; | ||
780 | goto out; | ||
781 | } | ||
757 | goto retry_flush_dents; | 782 | goto retry_flush_dents; |
758 | } | 783 | } |
759 | 784 | ||
760 | /* | 785 | /* |
761 | * POR: we should ensure that there is no dirty node pages | 786 | * POR: we should ensure that there are no dirty node pages |
762 | * until finishing nat/sit flush. | 787 | * until finishing nat/sit flush. |
763 | */ | 788 | */ |
764 | retry_flush_nodes: | 789 | retry_flush_nodes: |
@@ -767,9 +792,16 @@ retry_flush_nodes: | |||
767 | if (get_pages(sbi, F2FS_DIRTY_NODES)) { | 792 | if (get_pages(sbi, F2FS_DIRTY_NODES)) { |
768 | up_write(&sbi->node_write); | 793 | up_write(&sbi->node_write); |
769 | sync_node_pages(sbi, 0, &wbc); | 794 | sync_node_pages(sbi, 0, &wbc); |
795 | if (unlikely(f2fs_cp_error(sbi))) { | ||
796 | f2fs_unlock_all(sbi); | ||
797 | err = -EIO; | ||
798 | goto out; | ||
799 | } | ||
770 | goto retry_flush_nodes; | 800 | goto retry_flush_nodes; |
771 | } | 801 | } |
802 | out: | ||
772 | blk_finish_plug(&plug); | 803 | blk_finish_plug(&plug); |
804 | return err; | ||
773 | } | 805 | } |
774 | 806 | ||
775 | static void unblock_operations(struct f2fs_sb_info *sbi) | 807 | static void unblock_operations(struct f2fs_sb_info *sbi) |
@@ -793,11 +825,12 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) | |||
793 | finish_wait(&sbi->cp_wait, &wait); | 825 | finish_wait(&sbi->cp_wait, &wait); |
794 | } | 826 | } |
795 | 827 | ||
796 | static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | 828 | static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) |
797 | { | 829 | { |
798 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | 830 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); |
799 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); | 831 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); |
800 | nid_t last_nid = 0; | 832 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
833 | nid_t last_nid = nm_i->next_scan_nid; | ||
801 | block_t start_blk; | 834 | block_t start_blk; |
802 | struct page *cp_page; | 835 | struct page *cp_page; |
803 | unsigned int data_sum_blocks, orphan_blocks; | 836 | unsigned int data_sum_blocks, orphan_blocks; |
@@ -813,8 +846,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
813 | discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); | 846 | discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); |
814 | 847 | ||
815 | /* Flush all the NAT/SIT pages */ | 848 | /* Flush all the NAT/SIT pages */ |
816 | while (get_pages(sbi, F2FS_DIRTY_META)) | 849 | while (get_pages(sbi, F2FS_DIRTY_META)) { |
817 | sync_meta_pages(sbi, META, LONG_MAX); | 850 | sync_meta_pages(sbi, META, LONG_MAX); |
851 | if (unlikely(f2fs_cp_error(sbi))) | ||
852 | return; | ||
853 | } | ||
818 | 854 | ||
819 | next_free_nid(sbi, &last_nid); | 855 | next_free_nid(sbi, &last_nid); |
820 | 856 | ||
@@ -825,7 +861,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
825 | ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); | 861 | ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); |
826 | ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); | 862 | ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); |
827 | ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); | 863 | ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); |
828 | for (i = 0; i < 3; i++) { | 864 | for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { |
829 | ckpt->cur_node_segno[i] = | 865 | ckpt->cur_node_segno[i] = |
830 | cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); | 866 | cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); |
831 | ckpt->cur_node_blkoff[i] = | 867 | ckpt->cur_node_blkoff[i] = |
@@ -833,7 +869,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
833 | ckpt->alloc_type[i + CURSEG_HOT_NODE] = | 869 | ckpt->alloc_type[i + CURSEG_HOT_NODE] = |
834 | curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); | 870 | curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); |
835 | } | 871 | } |
836 | for (i = 0; i < 3; i++) { | 872 | for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { |
837 | ckpt->cur_data_segno[i] = | 873 | ckpt->cur_data_segno[i] = |
838 | cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); | 874 | cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); |
839 | ckpt->cur_data_blkoff[i] = | 875 | ckpt->cur_data_blkoff[i] = |
@@ -848,24 +884,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
848 | 884 | ||
849 | /* 2 cp + n data seg summary + orphan inode blocks */ | 885 | /* 2 cp + n data seg summary + orphan inode blocks */ |
850 | data_sum_blocks = npages_for_summary_flush(sbi); | 886 | data_sum_blocks = npages_for_summary_flush(sbi); |
851 | if (data_sum_blocks < 3) | 887 | if (data_sum_blocks < NR_CURSEG_DATA_TYPE) |
852 | set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); | 888 | set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); |
853 | else | 889 | else |
854 | clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); | 890 | clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); |
855 | 891 | ||
856 | orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) | 892 | orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans); |
857 | / F2FS_ORPHANS_PER_BLOCK; | ||
858 | ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + | 893 | ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + |
859 | orphan_blocks); | 894 | orphan_blocks); |
860 | 895 | ||
861 | if (is_umount) { | 896 | if (cpc->reason == CP_UMOUNT) { |
862 | set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); | 897 | set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); |
863 | ckpt->cp_pack_total_block_count = cpu_to_le32(2 + | 898 | ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ |
864 | cp_payload_blks + data_sum_blocks + | 899 | cp_payload_blks + data_sum_blocks + |
865 | orphan_blocks + NR_CURSEG_NODE_TYPE); | 900 | orphan_blocks + NR_CURSEG_NODE_TYPE); |
866 | } else { | 901 | } else { |
867 | clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); | 902 | clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); |
868 | ckpt->cp_pack_total_block_count = cpu_to_le32(2 + | 903 | ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + |
869 | cp_payload_blks + data_sum_blocks + | 904 | cp_payload_blks + data_sum_blocks + |
870 | orphan_blocks); | 905 | orphan_blocks); |
871 | } | 906 | } |
@@ -875,6 +910,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
875 | else | 910 | else |
876 | clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); | 911 | clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); |
877 | 912 | ||
913 | if (sbi->need_fsck) | ||
914 | set_ckpt_flags(ckpt, CP_FSCK_FLAG); | ||
915 | |||
878 | /* update SIT/NAT bitmap */ | 916 | /* update SIT/NAT bitmap */ |
879 | get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); | 917 | get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); |
880 | get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); | 918 | get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); |
@@ -909,7 +947,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
909 | 947 | ||
910 | write_data_summaries(sbi, start_blk); | 948 | write_data_summaries(sbi, start_blk); |
911 | start_blk += data_sum_blocks; | 949 | start_blk += data_sum_blocks; |
912 | if (is_umount) { | 950 | if (cpc->reason == CP_UMOUNT) { |
913 | write_node_summaries(sbi, start_blk); | 951 | write_node_summaries(sbi, start_blk); |
914 | start_blk += NR_CURSEG_NODE_TYPE; | 952 | start_blk += NR_CURSEG_NODE_TYPE; |
915 | } | 953 | } |
@@ -924,6 +962,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
924 | /* wait for previous submitted node/meta pages writeback */ | 962 | /* wait for previous submitted node/meta pages writeback */ |
925 | wait_on_all_pages_writeback(sbi); | 963 | wait_on_all_pages_writeback(sbi); |
926 | 964 | ||
965 | if (unlikely(f2fs_cp_error(sbi))) | ||
966 | return; | ||
967 | |||
927 | filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); | 968 | filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); |
928 | filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); | 969 | filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); |
929 | 970 | ||
@@ -934,27 +975,35 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
934 | /* Here, we only have one bio having CP pack */ | 975 | /* Here, we only have one bio having CP pack */ |
935 | sync_meta_pages(sbi, META_FLUSH, LONG_MAX); | 976 | sync_meta_pages(sbi, META_FLUSH, LONG_MAX); |
936 | 977 | ||
937 | if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { | 978 | release_dirty_inode(sbi); |
938 | clear_prefree_segments(sbi); | 979 | |
939 | release_dirty_inode(sbi); | 980 | if (unlikely(f2fs_cp_error(sbi))) |
940 | F2FS_RESET_SB_DIRT(sbi); | 981 | return; |
941 | } | 982 | |
983 | clear_prefree_segments(sbi); | ||
984 | F2FS_RESET_SB_DIRT(sbi); | ||
942 | } | 985 | } |
943 | 986 | ||
944 | /* | 987 | /* |
945 | * We guarantee that this checkpoint procedure should not fail. | 988 | * We guarantee that this checkpoint procedure will not fail. |
946 | */ | 989 | */ |
947 | void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | 990 | void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) |
948 | { | 991 | { |
949 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | 992 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); |
950 | unsigned long long ckpt_ver; | 993 | unsigned long long ckpt_ver; |
951 | 994 | ||
952 | trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); | 995 | trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); |
953 | 996 | ||
954 | mutex_lock(&sbi->cp_mutex); | 997 | mutex_lock(&sbi->cp_mutex); |
955 | block_operations(sbi); | ||
956 | 998 | ||
957 | trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); | 999 | if (!sbi->s_dirty && cpc->reason != CP_DISCARD) |
1000 | goto out; | ||
1001 | if (unlikely(f2fs_cp_error(sbi))) | ||
1002 | goto out; | ||
1003 | if (block_operations(sbi)) | ||
1004 | goto out; | ||
1005 | |||
1006 | trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops"); | ||
958 | 1007 | ||
959 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | 1008 | f2fs_submit_merged_bio(sbi, DATA, WRITE); |
960 | f2fs_submit_merged_bio(sbi, NODE, WRITE); | 1009 | f2fs_submit_merged_bio(sbi, NODE, WRITE); |
@@ -970,16 +1019,16 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
970 | 1019 | ||
971 | /* write cached NAT/SIT entries to NAT/SIT area */ | 1020 | /* write cached NAT/SIT entries to NAT/SIT area */ |
972 | flush_nat_entries(sbi); | 1021 | flush_nat_entries(sbi); |
973 | flush_sit_entries(sbi); | 1022 | flush_sit_entries(sbi, cpc); |
974 | 1023 | ||
975 | /* unlock all the fs_lock[] in do_checkpoint() */ | 1024 | /* unlock all the fs_lock[] in do_checkpoint() */ |
976 | do_checkpoint(sbi, is_umount); | 1025 | do_checkpoint(sbi, cpc); |
977 | 1026 | ||
978 | unblock_operations(sbi); | 1027 | unblock_operations(sbi); |
979 | mutex_unlock(&sbi->cp_mutex); | ||
980 | |||
981 | stat_inc_cp_count(sbi->stat_info); | 1028 | stat_inc_cp_count(sbi->stat_info); |
982 | trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); | 1029 | out: |
1030 | mutex_unlock(&sbi->cp_mutex); | ||
1031 | trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); | ||
983 | } | 1032 | } |
984 | 1033 | ||
985 | void init_ino_entry_info(struct f2fs_sb_info *sbi) | 1034 | void init_ino_entry_info(struct f2fs_sb_info *sbi) |
@@ -999,8 +1048,8 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) | |||
999 | * for cp pack we can have max 1020*504 orphan entries | 1048 | * for cp pack we can have max 1020*504 orphan entries |
1000 | */ | 1049 | */ |
1001 | sbi->n_orphans = 0; | 1050 | sbi->n_orphans = 0; |
1002 | sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) | 1051 | sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - |
1003 | * F2FS_ORPHANS_PER_BLOCK; | 1052 | NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; |
1004 | } | 1053 | } |
1005 | 1054 | ||
1006 | int __init create_checkpoint_caches(void) | 1055 | int __init create_checkpoint_caches(void) |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 03313099c51c..8e58c4cc2cb9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -53,7 +53,7 @@ static void f2fs_write_end_io(struct bio *bio, int err) | |||
53 | struct page *page = bvec->bv_page; | 53 | struct page *page = bvec->bv_page; |
54 | 54 | ||
55 | if (unlikely(err)) { | 55 | if (unlikely(err)) { |
56 | SetPageError(page); | 56 | set_page_dirty(page); |
57 | set_bit(AS_EIO, &page->mapping->flags); | 57 | set_bit(AS_EIO, &page->mapping->flags); |
58 | f2fs_stop_checkpoint(sbi); | 58 | f2fs_stop_checkpoint(sbi); |
59 | } | 59 | } |
@@ -85,7 +85,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, | |||
85 | bio = bio_alloc(GFP_NOIO, npages); | 85 | bio = bio_alloc(GFP_NOIO, npages); |
86 | 86 | ||
87 | bio->bi_bdev = sbi->sb->s_bdev; | 87 | bio->bi_bdev = sbi->sb->s_bdev; |
88 | bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); | 88 | bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); |
89 | bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; | 89 | bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; |
90 | bio->bi_private = sbi; | 90 | bio->bi_private = sbi; |
91 | 91 | ||
@@ -193,7 +193,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, | |||
193 | __submit_merged_bio(io); | 193 | __submit_merged_bio(io); |
194 | alloc_new: | 194 | alloc_new: |
195 | if (io->bio == NULL) { | 195 | if (io->bio == NULL) { |
196 | int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 196 | int bio_blocks = MAX_BIO_BLOCKS(sbi); |
197 | 197 | ||
198 | io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); | 198 | io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); |
199 | io->fio = *fio; | 199 | io->fio = *fio; |
@@ -236,7 +236,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) | |||
236 | 236 | ||
237 | int reserve_new_block(struct dnode_of_data *dn) | 237 | int reserve_new_block(struct dnode_of_data *dn) |
238 | { | 238 | { |
239 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 239 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
240 | 240 | ||
241 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) | 241 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) |
242 | return -EPERM; | 242 | return -EPERM; |
@@ -258,7 +258,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) | |||
258 | int err; | 258 | int err; |
259 | 259 | ||
260 | /* if inode_page exists, index should be zero */ | 260 | /* if inode_page exists, index should be zero */ |
261 | f2fs_bug_on(!need_put && index); | 261 | f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index); |
262 | 262 | ||
263 | err = get_dnode_of_data(dn, index, ALLOC_NODE); | 263 | err = get_dnode_of_data(dn, index, ALLOC_NODE); |
264 | if (err) | 264 | if (err) |
@@ -321,7 +321,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) | |||
321 | block_t start_blkaddr, end_blkaddr; | 321 | block_t start_blkaddr, end_blkaddr; |
322 | int need_update = true; | 322 | int need_update = true; |
323 | 323 | ||
324 | f2fs_bug_on(blk_addr == NEW_ADDR); | 324 | f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR); |
325 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | 325 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + |
326 | dn->ofs_in_node; | 326 | dn->ofs_in_node; |
327 | 327 | ||
@@ -396,7 +396,6 @@ end_update: | |||
396 | 396 | ||
397 | struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | 397 | struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) |
398 | { | 398 | { |
399 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
400 | struct address_space *mapping = inode->i_mapping; | 399 | struct address_space *mapping = inode->i_mapping; |
401 | struct dnode_of_data dn; | 400 | struct dnode_of_data dn; |
402 | struct page *page; | 401 | struct page *page; |
@@ -429,7 +428,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |||
429 | return page; | 428 | return page; |
430 | } | 429 | } |
431 | 430 | ||
432 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, | 431 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr, |
433 | sync ? READ_SYNC : READA); | 432 | sync ? READ_SYNC : READA); |
434 | if (err) | 433 | if (err) |
435 | return ERR_PTR(err); | 434 | return ERR_PTR(err); |
@@ -451,7 +450,6 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |||
451 | */ | 450 | */ |
452 | struct page *get_lock_data_page(struct inode *inode, pgoff_t index) | 451 | struct page *get_lock_data_page(struct inode *inode, pgoff_t index) |
453 | { | 452 | { |
454 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
455 | struct address_space *mapping = inode->i_mapping; | 453 | struct address_space *mapping = inode->i_mapping; |
456 | struct dnode_of_data dn; | 454 | struct dnode_of_data dn; |
457 | struct page *page; | 455 | struct page *page; |
@@ -490,7 +488,8 @@ repeat: | |||
490 | return page; | 488 | return page; |
491 | } | 489 | } |
492 | 490 | ||
493 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); | 491 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, |
492 | dn.data_blkaddr, READ_SYNC); | ||
494 | if (err) | 493 | if (err) |
495 | return ERR_PTR(err); | 494 | return ERR_PTR(err); |
496 | 495 | ||
@@ -517,7 +516,6 @@ repeat: | |||
517 | struct page *get_new_data_page(struct inode *inode, | 516 | struct page *get_new_data_page(struct inode *inode, |
518 | struct page *ipage, pgoff_t index, bool new_i_size) | 517 | struct page *ipage, pgoff_t index, bool new_i_size) |
519 | { | 518 | { |
520 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
521 | struct address_space *mapping = inode->i_mapping; | 519 | struct address_space *mapping = inode->i_mapping; |
522 | struct page *page; | 520 | struct page *page; |
523 | struct dnode_of_data dn; | 521 | struct dnode_of_data dn; |
@@ -541,8 +539,8 @@ repeat: | |||
541 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); | 539 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); |
542 | SetPageUptodate(page); | 540 | SetPageUptodate(page); |
543 | } else { | 541 | } else { |
544 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, | 542 | err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, |
545 | READ_SYNC); | 543 | dn.data_blkaddr, READ_SYNC); |
546 | if (err) | 544 | if (err) |
547 | goto put_err; | 545 | goto put_err; |
548 | 546 | ||
@@ -573,10 +571,12 @@ put_err: | |||
573 | 571 | ||
574 | static int __allocate_data_block(struct dnode_of_data *dn) | 572 | static int __allocate_data_block(struct dnode_of_data *dn) |
575 | { | 573 | { |
576 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 574 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
575 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); | ||
577 | struct f2fs_summary sum; | 576 | struct f2fs_summary sum; |
578 | block_t new_blkaddr; | 577 | block_t new_blkaddr; |
579 | struct node_info ni; | 578 | struct node_info ni; |
579 | pgoff_t fofs; | ||
580 | int type; | 580 | int type; |
581 | 581 | ||
582 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) | 582 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) |
@@ -599,6 +599,12 @@ static int __allocate_data_block(struct dnode_of_data *dn) | |||
599 | update_extent_cache(new_blkaddr, dn); | 599 | update_extent_cache(new_blkaddr, dn); |
600 | clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); | 600 | clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); |
601 | 601 | ||
602 | /* update i_size */ | ||
603 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | ||
604 | dn->ofs_in_node; | ||
605 | if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) | ||
606 | i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); | ||
607 | |||
602 | dn->data_blkaddr = new_blkaddr; | 608 | dn->data_blkaddr = new_blkaddr; |
603 | return 0; | 609 | return 0; |
604 | } | 610 | } |
@@ -614,7 +620,6 @@ static int __allocate_data_block(struct dnode_of_data *dn) | |||
614 | static int __get_data_block(struct inode *inode, sector_t iblock, | 620 | static int __get_data_block(struct inode *inode, sector_t iblock, |
615 | struct buffer_head *bh_result, int create, bool fiemap) | 621 | struct buffer_head *bh_result, int create, bool fiemap) |
616 | { | 622 | { |
617 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
618 | unsigned int blkbits = inode->i_sb->s_blocksize_bits; | 623 | unsigned int blkbits = inode->i_sb->s_blocksize_bits; |
619 | unsigned maxblocks = bh_result->b_size >> blkbits; | 624 | unsigned maxblocks = bh_result->b_size >> blkbits; |
620 | struct dnode_of_data dn; | 625 | struct dnode_of_data dn; |
@@ -630,8 +635,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock, | |||
630 | goto out; | 635 | goto out; |
631 | 636 | ||
632 | if (create) { | 637 | if (create) { |
633 | f2fs_balance_fs(sbi); | 638 | f2fs_balance_fs(F2FS_I_SB(inode)); |
634 | f2fs_lock_op(sbi); | 639 | f2fs_lock_op(F2FS_I_SB(inode)); |
635 | } | 640 | } |
636 | 641 | ||
637 | /* When reading holes, we need its node page */ | 642 | /* When reading holes, we need its node page */ |
@@ -691,7 +696,7 @@ get_next: | |||
691 | allocated = true; | 696 | allocated = true; |
692 | blkaddr = dn.data_blkaddr; | 697 | blkaddr = dn.data_blkaddr; |
693 | } | 698 | } |
694 | /* Give more consecutive addresses for the read ahead */ | 699 | /* Give more consecutive addresses for the readahead */ |
695 | if (blkaddr == (bh_result->b_blocknr + ofs)) { | 700 | if (blkaddr == (bh_result->b_blocknr + ofs)) { |
696 | ofs++; | 701 | ofs++; |
697 | dn.ofs_in_node++; | 702 | dn.ofs_in_node++; |
@@ -707,7 +712,7 @@ put_out: | |||
707 | f2fs_put_dnode(&dn); | 712 | f2fs_put_dnode(&dn); |
708 | unlock_out: | 713 | unlock_out: |
709 | if (create) | 714 | if (create) |
710 | f2fs_unlock_op(sbi); | 715 | f2fs_unlock_op(F2FS_I_SB(inode)); |
711 | out: | 716 | out: |
712 | trace_f2fs_get_data_block(inode, iblock, bh_result, err); | 717 | trace_f2fs_get_data_block(inode, iblock, bh_result, err); |
713 | return err; | 718 | return err; |
@@ -739,7 +744,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page) | |||
739 | 744 | ||
740 | trace_f2fs_readpage(page, DATA); | 745 | trace_f2fs_readpage(page, DATA); |
741 | 746 | ||
742 | /* If the file has inline data, try to read it directlly */ | 747 | /* If the file has inline data, try to read it directly */ |
743 | if (f2fs_has_inline_data(inode)) | 748 | if (f2fs_has_inline_data(inode)) |
744 | ret = f2fs_read_inline_data(inode, page); | 749 | ret = f2fs_read_inline_data(inode, page); |
745 | else | 750 | else |
@@ -804,7 +809,7 @@ static int f2fs_write_data_page(struct page *page, | |||
804 | struct writeback_control *wbc) | 809 | struct writeback_control *wbc) |
805 | { | 810 | { |
806 | struct inode *inode = page->mapping->host; | 811 | struct inode *inode = page->mapping->host; |
807 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 812 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
808 | loff_t i_size = i_size_read(inode); | 813 | loff_t i_size = i_size_read(inode); |
809 | const pgoff_t end_index = ((unsigned long long) i_size) | 814 | const pgoff_t end_index = ((unsigned long long) i_size) |
810 | >> PAGE_CACHE_SHIFT; | 815 | >> PAGE_CACHE_SHIFT; |
@@ -836,10 +841,19 @@ write: | |||
836 | 841 | ||
837 | /* Dentry blocks are controlled by checkpoint */ | 842 | /* Dentry blocks are controlled by checkpoint */ |
838 | if (S_ISDIR(inode->i_mode)) { | 843 | if (S_ISDIR(inode->i_mode)) { |
844 | if (unlikely(f2fs_cp_error(sbi))) | ||
845 | goto redirty_out; | ||
839 | err = do_write_data_page(page, &fio); | 846 | err = do_write_data_page(page, &fio); |
840 | goto done; | 847 | goto done; |
841 | } | 848 | } |
842 | 849 | ||
850 | /* we should bypass data pages to proceed the kworkder jobs */ | ||
851 | if (unlikely(f2fs_cp_error(sbi))) { | ||
852 | SetPageError(page); | ||
853 | unlock_page(page); | ||
854 | goto out; | ||
855 | } | ||
856 | |||
843 | if (!wbc->for_reclaim) | 857 | if (!wbc->for_reclaim) |
844 | need_balance_fs = true; | 858 | need_balance_fs = true; |
845 | else if (has_not_enough_free_secs(sbi, 0)) | 859 | else if (has_not_enough_free_secs(sbi, 0)) |
@@ -857,7 +871,7 @@ done: | |||
857 | 871 | ||
858 | clear_cold_data(page); | 872 | clear_cold_data(page); |
859 | out: | 873 | out: |
860 | inode_dec_dirty_dents(inode); | 874 | inode_dec_dirty_pages(inode); |
861 | unlock_page(page); | 875 | unlock_page(page); |
862 | if (need_balance_fs) | 876 | if (need_balance_fs) |
863 | f2fs_balance_fs(sbi); | 877 | f2fs_balance_fs(sbi); |
@@ -883,7 +897,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
883 | struct writeback_control *wbc) | 897 | struct writeback_control *wbc) |
884 | { | 898 | { |
885 | struct inode *inode = mapping->host; | 899 | struct inode *inode = mapping->host; |
886 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 900 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
887 | bool locked = false; | 901 | bool locked = false; |
888 | int ret; | 902 | int ret; |
889 | long diff; | 903 | long diff; |
@@ -895,7 +909,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
895 | return 0; | 909 | return 0; |
896 | 910 | ||
897 | if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && | 911 | if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && |
898 | get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) && | 912 | get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && |
899 | available_free_memory(sbi, DIRTY_DENTS)) | 913 | available_free_memory(sbi, DIRTY_DENTS)) |
900 | goto skip_write; | 914 | goto skip_write; |
901 | 915 | ||
@@ -917,7 +931,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
917 | return ret; | 931 | return ret; |
918 | 932 | ||
919 | skip_write: | 933 | skip_write: |
920 | wbc->pages_skipped += get_dirty_dents(inode); | 934 | wbc->pages_skipped += get_dirty_pages(inode); |
921 | return 0; | 935 | return 0; |
922 | } | 936 | } |
923 | 937 | ||
@@ -927,7 +941,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) | |||
927 | 941 | ||
928 | if (to > inode->i_size) { | 942 | if (to > inode->i_size) { |
929 | truncate_pagecache(inode, inode->i_size); | 943 | truncate_pagecache(inode, inode->i_size); |
930 | truncate_blocks(inode, inode->i_size); | 944 | truncate_blocks(inode, inode->i_size, true); |
931 | } | 945 | } |
932 | } | 946 | } |
933 | 947 | ||
@@ -936,7 +950,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, | |||
936 | struct page **pagep, void **fsdata) | 950 | struct page **pagep, void **fsdata) |
937 | { | 951 | { |
938 | struct inode *inode = mapping->host; | 952 | struct inode *inode = mapping->host; |
939 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 953 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
940 | struct page *page; | 954 | struct page *page; |
941 | pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; | 955 | pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; |
942 | struct dnode_of_data dn; | 956 | struct dnode_of_data dn; |
@@ -946,7 +960,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, | |||
946 | 960 | ||
947 | f2fs_balance_fs(sbi); | 961 | f2fs_balance_fs(sbi); |
948 | repeat: | 962 | repeat: |
949 | err = f2fs_convert_inline_data(inode, pos + len); | 963 | err = f2fs_convert_inline_data(inode, pos + len, NULL); |
950 | if (err) | 964 | if (err) |
951 | goto fail; | 965 | goto fail; |
952 | 966 | ||
@@ -1038,7 +1052,10 @@ static int f2fs_write_end(struct file *file, | |||
1038 | 1052 | ||
1039 | trace_f2fs_write_end(inode, pos, len, copied); | 1053 | trace_f2fs_write_end(inode, pos, len, copied); |
1040 | 1054 | ||
1041 | set_page_dirty(page); | 1055 | if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) |
1056 | register_inmem_page(inode, page); | ||
1057 | else | ||
1058 | set_page_dirty(page); | ||
1042 | 1059 | ||
1043 | if (pos + copied > i_size_read(inode)) { | 1060 | if (pos + copied > i_size_read(inode)) { |
1044 | i_size_write(inode, pos + copied); | 1061 | i_size_write(inode, pos + copied); |
@@ -1083,9 +1100,6 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | |||
1083 | if (check_direct_IO(inode, rw, iter, offset)) | 1100 | if (check_direct_IO(inode, rw, iter, offset)) |
1084 | return 0; | 1101 | return 0; |
1085 | 1102 | ||
1086 | /* clear fsync mark to recover these blocks */ | ||
1087 | fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); | ||
1088 | |||
1089 | trace_f2fs_direct_IO_enter(inode, offset, count, rw); | 1103 | trace_f2fs_direct_IO_enter(inode, offset, count, rw); |
1090 | 1104 | ||
1091 | err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); | 1105 | err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); |
@@ -1101,8 +1115,12 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, | |||
1101 | unsigned int length) | 1115 | unsigned int length) |
1102 | { | 1116 | { |
1103 | struct inode *inode = page->mapping->host; | 1117 | struct inode *inode = page->mapping->host; |
1118 | |||
1119 | if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) | ||
1120 | return; | ||
1121 | |||
1104 | if (PageDirty(page)) | 1122 | if (PageDirty(page)) |
1105 | inode_dec_dirty_dents(inode); | 1123 | inode_dec_dirty_pages(inode); |
1106 | ClearPagePrivate(page); | 1124 | ClearPagePrivate(page); |
1107 | } | 1125 | } |
1108 | 1126 | ||
@@ -1124,7 +1142,7 @@ static int f2fs_set_data_page_dirty(struct page *page) | |||
1124 | 1142 | ||
1125 | if (!PageDirty(page)) { | 1143 | if (!PageDirty(page)) { |
1126 | __set_page_dirty_nobuffers(page); | 1144 | __set_page_dirty_nobuffers(page); |
1127 | set_dirty_dir_page(inode, page); | 1145 | update_dirty_page(inode, page); |
1128 | return 1; | 1146 | return 1; |
1129 | } | 1147 | } |
1130 | return 0; | 1148 | return 0; |
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a441ba33be11..0a91ab813a9e 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c | |||
@@ -32,7 +32,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) | |||
32 | struct f2fs_stat_info *si = F2FS_STAT(sbi); | 32 | struct f2fs_stat_info *si = F2FS_STAT(sbi); |
33 | int i; | 33 | int i; |
34 | 34 | ||
35 | /* valid check of the segment numbers */ | 35 | /* validation check of the segment numbers */ |
36 | si->hit_ext = sbi->read_hit_ext; | 36 | si->hit_ext = sbi->read_hit_ext; |
37 | si->total_ext = sbi->total_hit_ext; | 37 | si->total_ext = sbi->total_hit_ext; |
38 | si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); | 38 | si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); |
@@ -93,7 +93,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) | |||
93 | total_vblocks = 0; | 93 | total_vblocks = 0; |
94 | blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); | 94 | blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); |
95 | hblks_per_sec = blks_per_sec / 2; | 95 | hblks_per_sec = blks_per_sec / 2; |
96 | for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { | 96 | for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { |
97 | vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); | 97 | vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); |
98 | dist = abs(vblocks - hblks_per_sec); | 98 | dist = abs(vblocks - hblks_per_sec); |
99 | bimodal += dist * dist; | 99 | bimodal += dist * dist; |
@@ -103,7 +103,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) | |||
103 | ndirty++; | 103 | ndirty++; |
104 | } | 104 | } |
105 | } | 105 | } |
106 | dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; | 106 | dist = MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; |
107 | si->bimodal = bimodal / dist; | 107 | si->bimodal = bimodal / dist; |
108 | if (si->dirty_count) | 108 | if (si->dirty_count) |
109 | si->avg_vblocks = total_vblocks / ndirty; | 109 | si->avg_vblocks = total_vblocks / ndirty; |
@@ -131,17 +131,17 @@ static void update_mem_info(struct f2fs_sb_info *sbi) | |||
131 | 131 | ||
132 | /* build sit */ | 132 | /* build sit */ |
133 | si->base_mem += sizeof(struct sit_info); | 133 | si->base_mem += sizeof(struct sit_info); |
134 | si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry); | 134 | si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); |
135 | si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 135 | si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); |
136 | si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi); | 136 | si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); |
137 | if (sbi->segs_per_sec > 1) | 137 | if (sbi->segs_per_sec > 1) |
138 | si->base_mem += TOTAL_SECS(sbi) * sizeof(struct sec_entry); | 138 | si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); |
139 | si->base_mem += __bitmap_size(sbi, SIT_BITMAP); | 139 | si->base_mem += __bitmap_size(sbi, SIT_BITMAP); |
140 | 140 | ||
141 | /* build free segmap */ | 141 | /* build free segmap */ |
142 | si->base_mem += sizeof(struct free_segmap_info); | 142 | si->base_mem += sizeof(struct free_segmap_info); |
143 | si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 143 | si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); |
144 | si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); | 144 | si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi)); |
145 | 145 | ||
146 | /* build curseg */ | 146 | /* build curseg */ |
147 | si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; | 147 | si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE; |
@@ -149,10 +149,10 @@ static void update_mem_info(struct f2fs_sb_info *sbi) | |||
149 | 149 | ||
150 | /* build dirty segmap */ | 150 | /* build dirty segmap */ |
151 | si->base_mem += sizeof(struct dirty_seglist_info); | 151 | si->base_mem += sizeof(struct dirty_seglist_info); |
152 | si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 152 | si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(MAIN_SEGS(sbi)); |
153 | si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); | 153 | si->base_mem += f2fs_bitmap_size(MAIN_SECS(sbi)); |
154 | 154 | ||
155 | /* buld nm */ | 155 | /* build nm */ |
156 | si->base_mem += sizeof(struct f2fs_nm_info); | 156 | si->base_mem += sizeof(struct f2fs_nm_info); |
157 | si->base_mem += __bitmap_size(sbi, NAT_BITMAP); | 157 | si->base_mem += __bitmap_size(sbi, NAT_BITMAP); |
158 | 158 | ||
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index bcf893c3d903..b54f87149c09 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
@@ -124,9 +124,9 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, | |||
124 | 124 | ||
125 | /* | 125 | /* |
126 | * For the most part, it should be a bug when name_len is zero. | 126 | * For the most part, it should be a bug when name_len is zero. |
127 | * We stop here for figuring out where the bugs are occurred. | 127 | * We stop here for figuring out where the bugs has occurred. |
128 | */ | 128 | */ |
129 | f2fs_bug_on(!de->name_len); | 129 | f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len); |
130 | 130 | ||
131 | bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); | 131 | bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); |
132 | } | 132 | } |
@@ -151,7 +151,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, | |||
151 | bool room = false; | 151 | bool room = false; |
152 | int max_slots = 0; | 152 | int max_slots = 0; |
153 | 153 | ||
154 | f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); | 154 | f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); |
155 | 155 | ||
156 | nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); | 156 | nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); |
157 | nblock = bucket_blocks(level); | 157 | nblock = bucket_blocks(level); |
@@ -284,10 +284,9 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) | |||
284 | 284 | ||
285 | int update_dent_inode(struct inode *inode, const struct qstr *name) | 285 | int update_dent_inode(struct inode *inode, const struct qstr *name) |
286 | { | 286 | { |
287 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
288 | struct page *page; | 287 | struct page *page; |
289 | 288 | ||
290 | page = get_node_page(sbi, inode->i_ino); | 289 | page = get_node_page(F2FS_I_SB(inode), inode->i_ino); |
291 | if (IS_ERR(page)) | 290 | if (IS_ERR(page)) |
292 | return PTR_ERR(page); | 291 | return PTR_ERR(page); |
293 | 292 | ||
@@ -337,7 +336,6 @@ static int make_empty_dir(struct inode *inode, | |||
337 | static struct page *init_inode_metadata(struct inode *inode, | 336 | static struct page *init_inode_metadata(struct inode *inode, |
338 | struct inode *dir, const struct qstr *name) | 337 | struct inode *dir, const struct qstr *name) |
339 | { | 338 | { |
340 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | ||
341 | struct page *page; | 339 | struct page *page; |
342 | int err; | 340 | int err; |
343 | 341 | ||
@@ -360,7 +358,7 @@ static struct page *init_inode_metadata(struct inode *inode, | |||
360 | if (err) | 358 | if (err) |
361 | goto put_error; | 359 | goto put_error; |
362 | } else { | 360 | } else { |
363 | page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); | 361 | page = get_node_page(F2FS_I_SB(dir), inode->i_ino); |
364 | if (IS_ERR(page)) | 362 | if (IS_ERR(page)) |
365 | return page; | 363 | return page; |
366 | 364 | ||
@@ -381,7 +379,7 @@ static struct page *init_inode_metadata(struct inode *inode, | |||
381 | * we should remove this inode from orphan list. | 379 | * we should remove this inode from orphan list. |
382 | */ | 380 | */ |
383 | if (inode->i_nlink == 0) | 381 | if (inode->i_nlink == 0) |
384 | remove_orphan_inode(sbi, inode->i_ino); | 382 | remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); |
385 | inc_nlink(inode); | 383 | inc_nlink(inode); |
386 | } | 384 | } |
387 | return page; | 385 | return page; |
@@ -391,7 +389,7 @@ put_error: | |||
391 | error: | 389 | error: |
392 | /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ | 390 | /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ |
393 | truncate_inode_pages(&inode->i_data, 0); | 391 | truncate_inode_pages(&inode->i_data, 0); |
394 | truncate_blocks(inode, 0); | 392 | truncate_blocks(inode, 0, false); |
395 | remove_dirty_dir_inode(inode); | 393 | remove_dirty_dir_inode(inode); |
396 | remove_inode_page(inode); | 394 | remove_inode_page(inode); |
397 | return ERR_PTR(err); | 395 | return ERR_PTR(err); |
@@ -563,7 +561,7 @@ fail: | |||
563 | } | 561 | } |
564 | 562 | ||
565 | /* | 563 | /* |
566 | * It only removes the dentry from the dentry page,corresponding name | 564 | * It only removes the dentry from the dentry page, corresponding name |
567 | * entry in name page does not need to be touched during deletion. | 565 | * entry in name page does not need to be touched during deletion. |
568 | */ | 566 | */ |
569 | void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | 567 | void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, |
@@ -571,8 +569,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
571 | { | 569 | { |
572 | struct f2fs_dentry_block *dentry_blk; | 570 | struct f2fs_dentry_block *dentry_blk; |
573 | unsigned int bit_pos; | 571 | unsigned int bit_pos; |
574 | struct address_space *mapping = page->mapping; | 572 | struct inode *dir = page->mapping->host; |
575 | struct inode *dir = mapping->host; | ||
576 | int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); | 573 | int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); |
577 | int i; | 574 | int i; |
578 | 575 | ||
@@ -594,7 +591,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
594 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; | 591 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; |
595 | 592 | ||
596 | if (inode) { | 593 | if (inode) { |
597 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 594 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
598 | 595 | ||
599 | down_write(&F2FS_I(inode)->i_sem); | 596 | down_write(&F2FS_I(inode)->i_sem); |
600 | 597 | ||
@@ -621,7 +618,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
621 | truncate_hole(dir, page->index, page->index + 1); | 618 | truncate_hole(dir, page->index, page->index + 1); |
622 | clear_page_dirty_for_io(page); | 619 | clear_page_dirty_for_io(page); |
623 | ClearPageUptodate(page); | 620 | ClearPageUptodate(page); |
624 | inode_dec_dirty_dents(dir); | 621 | inode_dec_dirty_pages(dir); |
625 | } | 622 | } |
626 | f2fs_put_page(page, 1); | 623 | f2fs_put_page(page, 1); |
627 | } | 624 | } |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4dab5338a97a..8171e80b2ee9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -21,10 +21,16 @@ | |||
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | 22 | ||
23 | #ifdef CONFIG_F2FS_CHECK_FS | 23 | #ifdef CONFIG_F2FS_CHECK_FS |
24 | #define f2fs_bug_on(condition) BUG_ON(condition) | 24 | #define f2fs_bug_on(sbi, condition) BUG_ON(condition) |
25 | #define f2fs_down_write(x, y) down_write_nest_lock(x, y) | 25 | #define f2fs_down_write(x, y) down_write_nest_lock(x, y) |
26 | #else | 26 | #else |
27 | #define f2fs_bug_on(condition) | 27 | #define f2fs_bug_on(sbi, condition) \ |
28 | do { \ | ||
29 | if (unlikely(condition)) { \ | ||
30 | WARN_ON(1); \ | ||
31 | sbi->need_fsck = true; \ | ||
32 | } \ | ||
33 | } while (0) | ||
28 | #define f2fs_down_write(x, y) down_write(x) | 34 | #define f2fs_down_write(x, y) down_write(x) |
29 | #endif | 35 | #endif |
30 | 36 | ||
@@ -90,6 +96,20 @@ enum { | |||
90 | SIT_BITMAP | 96 | SIT_BITMAP |
91 | }; | 97 | }; |
92 | 98 | ||
99 | enum { | ||
100 | CP_UMOUNT, | ||
101 | CP_SYNC, | ||
102 | CP_DISCARD, | ||
103 | }; | ||
104 | |||
105 | struct cp_control { | ||
106 | int reason; | ||
107 | __u64 trim_start; | ||
108 | __u64 trim_end; | ||
109 | __u64 trim_minlen; | ||
110 | __u64 trimmed; | ||
111 | }; | ||
112 | |||
93 | /* | 113 | /* |
94 | * For CP/NAT/SIT/SSA readahead | 114 | * For CP/NAT/SIT/SSA readahead |
95 | */ | 115 | */ |
@@ -97,7 +117,8 @@ enum { | |||
97 | META_CP, | 117 | META_CP, |
98 | META_NAT, | 118 | META_NAT, |
99 | META_SIT, | 119 | META_SIT, |
100 | META_SSA | 120 | META_SSA, |
121 | META_POR, | ||
101 | }; | 122 | }; |
102 | 123 | ||
103 | /* for the list of ino */ | 124 | /* for the list of ino */ |
@@ -130,7 +151,9 @@ struct discard_entry { | |||
130 | struct fsync_inode_entry { | 151 | struct fsync_inode_entry { |
131 | struct list_head list; /* list head */ | 152 | struct list_head list; /* list head */ |
132 | struct inode *inode; /* vfs inode pointer */ | 153 | struct inode *inode; /* vfs inode pointer */ |
133 | block_t blkaddr; /* block address locating the last inode */ | 154 | block_t blkaddr; /* block address locating the last fsync */ |
155 | block_t last_dentry; /* block address locating the last dentry */ | ||
156 | block_t last_inode; /* block address locating the last inode */ | ||
134 | }; | 157 | }; |
135 | 158 | ||
136 | #define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) | 159 | #define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) |
@@ -141,6 +164,9 @@ struct fsync_inode_entry { | |||
141 | #define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) | 164 | #define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) |
142 | #define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) | 165 | #define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) |
143 | 166 | ||
167 | #define MAX_NAT_JENTRIES(sum) (NAT_JOURNAL_ENTRIES - nats_in_cursum(sum)) | ||
168 | #define MAX_SIT_JENTRIES(sum) (SIT_JOURNAL_ENTRIES - sits_in_cursum(sum)) | ||
169 | |||
144 | static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) | 170 | static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) |
145 | { | 171 | { |
146 | int before = nats_in_cursum(rs); | 172 | int before = nats_in_cursum(rs); |
@@ -155,11 +181,24 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) | |||
155 | return before; | 181 | return before; |
156 | } | 182 | } |
157 | 183 | ||
184 | static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, | ||
185 | int type) | ||
186 | { | ||
187 | if (type == NAT_JOURNAL) | ||
188 | return size <= MAX_NAT_JENTRIES(sum); | ||
189 | return size <= MAX_SIT_JENTRIES(sum); | ||
190 | } | ||
191 | |||
158 | /* | 192 | /* |
159 | * ioctl commands | 193 | * ioctl commands |
160 | */ | 194 | */ |
161 | #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS | 195 | #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS |
162 | #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS | 196 | #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS |
197 | |||
198 | #define F2FS_IOCTL_MAGIC 0xf5 | ||
199 | #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) | ||
200 | #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) | ||
201 | #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) | ||
163 | 202 | ||
164 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 203 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
165 | /* | 204 | /* |
@@ -222,13 +261,16 @@ struct f2fs_inode_info { | |||
222 | /* Use below internally in f2fs*/ | 261 | /* Use below internally in f2fs*/ |
223 | unsigned long flags; /* use to pass per-file flags */ | 262 | unsigned long flags; /* use to pass per-file flags */ |
224 | struct rw_semaphore i_sem; /* protect fi info */ | 263 | struct rw_semaphore i_sem; /* protect fi info */ |
225 | atomic_t dirty_dents; /* # of dirty dentry pages */ | 264 | atomic_t dirty_pages; /* # of dirty pages */ |
226 | f2fs_hash_t chash; /* hash value of given file name */ | 265 | f2fs_hash_t chash; /* hash value of given file name */ |
227 | unsigned int clevel; /* maximum level of given file name */ | 266 | unsigned int clevel; /* maximum level of given file name */ |
228 | nid_t i_xattr_nid; /* node id that contains xattrs */ | 267 | nid_t i_xattr_nid; /* node id that contains xattrs */ |
229 | unsigned long long xattr_ver; /* cp version of xattr modification */ | 268 | unsigned long long xattr_ver; /* cp version of xattr modification */ |
230 | struct extent_info ext; /* in-memory extent cache entry */ | 269 | struct extent_info ext; /* in-memory extent cache entry */ |
231 | struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ | 270 | struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ |
271 | |||
272 | struct list_head inmem_pages; /* inmemory pages managed by f2fs */ | ||
273 | struct mutex inmem_lock; /* lock for inmemory pages */ | ||
232 | }; | 274 | }; |
233 | 275 | ||
234 | static inline void get_extent_info(struct extent_info *ext, | 276 | static inline void get_extent_info(struct extent_info *ext, |
@@ -260,11 +302,10 @@ struct f2fs_nm_info { | |||
260 | 302 | ||
261 | /* NAT cache management */ | 303 | /* NAT cache management */ |
262 | struct radix_tree_root nat_root;/* root of the nat entry cache */ | 304 | struct radix_tree_root nat_root;/* root of the nat entry cache */ |
305 | struct radix_tree_root nat_set_root;/* root of the nat set cache */ | ||
263 | rwlock_t nat_tree_lock; /* protect nat_tree_lock */ | 306 | rwlock_t nat_tree_lock; /* protect nat_tree_lock */ |
264 | unsigned int nat_cnt; /* the # of cached nat entries */ | ||
265 | struct list_head nat_entries; /* cached nat entry list (clean) */ | 307 | struct list_head nat_entries; /* cached nat entry list (clean) */ |
266 | struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ | 308 | unsigned int nat_cnt; /* the # of cached nat entries */ |
267 | struct list_head nat_entry_set; /* nat entry set list */ | ||
268 | unsigned int dirty_nat_cnt; /* total num of nat entries in set */ | 309 | unsigned int dirty_nat_cnt; /* total num of nat entries in set */ |
269 | 310 | ||
270 | /* free node ids management */ | 311 | /* free node ids management */ |
@@ -332,18 +373,16 @@ enum { | |||
332 | }; | 373 | }; |
333 | 374 | ||
334 | struct flush_cmd { | 375 | struct flush_cmd { |
335 | struct flush_cmd *next; | ||
336 | struct completion wait; | 376 | struct completion wait; |
377 | struct llist_node llnode; | ||
337 | int ret; | 378 | int ret; |
338 | }; | 379 | }; |
339 | 380 | ||
340 | struct flush_cmd_control { | 381 | struct flush_cmd_control { |
341 | struct task_struct *f2fs_issue_flush; /* flush thread */ | 382 | struct task_struct *f2fs_issue_flush; /* flush thread */ |
342 | wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ | 383 | wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ |
343 | struct flush_cmd *issue_list; /* list for command issue */ | 384 | struct llist_head issue_list; /* list for command issue */ |
344 | struct flush_cmd *dispatch_list; /* list for command dispatch */ | 385 | struct llist_node *dispatch_list; /* list for command dispatch */ |
345 | spinlock_t issue_lock; /* for issue list lock */ | ||
346 | struct flush_cmd *issue_tail; /* list tail of issue list */ | ||
347 | }; | 386 | }; |
348 | 387 | ||
349 | struct f2fs_sm_info { | 388 | struct f2fs_sm_info { |
@@ -369,8 +408,11 @@ struct f2fs_sm_info { | |||
369 | int nr_discards; /* # of discards in the list */ | 408 | int nr_discards; /* # of discards in the list */ |
370 | int max_discards; /* max. discards to be issued */ | 409 | int max_discards; /* max. discards to be issued */ |
371 | 410 | ||
411 | struct list_head sit_entry_set; /* sit entry set list */ | ||
412 | |||
372 | unsigned int ipu_policy; /* in-place-update policy */ | 413 | unsigned int ipu_policy; /* in-place-update policy */ |
373 | unsigned int min_ipu_util; /* in-place-update threshold */ | 414 | unsigned int min_ipu_util; /* in-place-update threshold */ |
415 | unsigned int min_fsync_blocks; /* threshold for fsync */ | ||
374 | 416 | ||
375 | /* for flush command control */ | 417 | /* for flush command control */ |
376 | struct flush_cmd_control *cmd_control_info; | 418 | struct flush_cmd_control *cmd_control_info; |
@@ -395,7 +437,7 @@ enum count_type { | |||
395 | }; | 437 | }; |
396 | 438 | ||
397 | /* | 439 | /* |
398 | * The below are the page types of bios used in submti_bio(). | 440 | * The below are the page types of bios used in submit_bio(). |
399 | * The available types are: | 441 | * The available types are: |
400 | * DATA User data pages. It operates as async mode. | 442 | * DATA User data pages. It operates as async mode. |
401 | * NODE Node pages. It operates as async mode. | 443 | * NODE Node pages. It operates as async mode. |
@@ -434,6 +476,7 @@ struct f2fs_sb_info { | |||
434 | struct buffer_head *raw_super_buf; /* buffer head of raw sb */ | 476 | struct buffer_head *raw_super_buf; /* buffer head of raw sb */ |
435 | struct f2fs_super_block *raw_super; /* raw super block pointer */ | 477 | struct f2fs_super_block *raw_super; /* raw super block pointer */ |
436 | int s_dirty; /* dirty flag for checkpoint */ | 478 | int s_dirty; /* dirty flag for checkpoint */ |
479 | bool need_fsck; /* need fsck.f2fs to fix */ | ||
437 | 480 | ||
438 | /* for node-related operations */ | 481 | /* for node-related operations */ |
439 | struct f2fs_nm_info *nm_info; /* node manager */ | 482 | struct f2fs_nm_info *nm_info; /* node manager */ |
@@ -470,7 +513,7 @@ struct f2fs_sb_info { | |||
470 | struct list_head dir_inode_list; /* dir inode list */ | 513 | struct list_head dir_inode_list; /* dir inode list */ |
471 | spinlock_t dir_inode_lock; /* for dir inode list lock */ | 514 | spinlock_t dir_inode_lock; /* for dir inode list lock */ |
472 | 515 | ||
473 | /* basic file system units */ | 516 | /* basic filesystem units */ |
474 | unsigned int log_sectors_per_block; /* log2 sectors per block */ | 517 | unsigned int log_sectors_per_block; /* log2 sectors per block */ |
475 | unsigned int log_blocksize; /* log2 block size */ | 518 | unsigned int log_blocksize; /* log2 block size */ |
476 | unsigned int blocksize; /* block size */ | 519 | unsigned int blocksize; /* block size */ |
@@ -539,6 +582,21 @@ static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) | |||
539 | return sb->s_fs_info; | 582 | return sb->s_fs_info; |
540 | } | 583 | } |
541 | 584 | ||
585 | static inline struct f2fs_sb_info *F2FS_I_SB(struct inode *inode) | ||
586 | { | ||
587 | return F2FS_SB(inode->i_sb); | ||
588 | } | ||
589 | |||
590 | static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) | ||
591 | { | ||
592 | return F2FS_I_SB(mapping->host); | ||
593 | } | ||
594 | |||
595 | static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) | ||
596 | { | ||
597 | return F2FS_M_SB(page->mapping); | ||
598 | } | ||
599 | |||
542 | static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) | 600 | static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) |
543 | { | 601 | { |
544 | return (struct f2fs_super_block *)(sbi->raw_super); | 602 | return (struct f2fs_super_block *)(sbi->raw_super); |
@@ -703,8 +761,8 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, | |||
703 | blkcnt_t count) | 761 | blkcnt_t count) |
704 | { | 762 | { |
705 | spin_lock(&sbi->stat_lock); | 763 | spin_lock(&sbi->stat_lock); |
706 | f2fs_bug_on(sbi->total_valid_block_count < (block_t) count); | 764 | f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); |
707 | f2fs_bug_on(inode->i_blocks < count); | 765 | f2fs_bug_on(sbi, inode->i_blocks < count); |
708 | inode->i_blocks -= count; | 766 | inode->i_blocks -= count; |
709 | sbi->total_valid_block_count -= (block_t)count; | 767 | sbi->total_valid_block_count -= (block_t)count; |
710 | spin_unlock(&sbi->stat_lock); | 768 | spin_unlock(&sbi->stat_lock); |
@@ -716,10 +774,11 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) | |||
716 | F2FS_SET_SB_DIRT(sbi); | 774 | F2FS_SET_SB_DIRT(sbi); |
717 | } | 775 | } |
718 | 776 | ||
719 | static inline void inode_inc_dirty_dents(struct inode *inode) | 777 | static inline void inode_inc_dirty_pages(struct inode *inode) |
720 | { | 778 | { |
721 | inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); | 779 | atomic_inc(&F2FS_I(inode)->dirty_pages); |
722 | atomic_inc(&F2FS_I(inode)->dirty_dents); | 780 | if (S_ISDIR(inode->i_mode)) |
781 | inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); | ||
723 | } | 782 | } |
724 | 783 | ||
725 | static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) | 784 | static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) |
@@ -727,13 +786,15 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) | |||
727 | atomic_dec(&sbi->nr_pages[count_type]); | 786 | atomic_dec(&sbi->nr_pages[count_type]); |
728 | } | 787 | } |
729 | 788 | ||
730 | static inline void inode_dec_dirty_dents(struct inode *inode) | 789 | static inline void inode_dec_dirty_pages(struct inode *inode) |
731 | { | 790 | { |
732 | if (!S_ISDIR(inode->i_mode)) | 791 | if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) |
733 | return; | 792 | return; |
734 | 793 | ||
735 | dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); | 794 | atomic_dec(&F2FS_I(inode)->dirty_pages); |
736 | atomic_dec(&F2FS_I(inode)->dirty_dents); | 795 | |
796 | if (S_ISDIR(inode->i_mode)) | ||
797 | dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS); | ||
737 | } | 798 | } |
738 | 799 | ||
739 | static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) | 800 | static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) |
@@ -741,9 +802,9 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) | |||
741 | return atomic_read(&sbi->nr_pages[count_type]); | 802 | return atomic_read(&sbi->nr_pages[count_type]); |
742 | } | 803 | } |
743 | 804 | ||
744 | static inline int get_dirty_dents(struct inode *inode) | 805 | static inline int get_dirty_pages(struct inode *inode) |
745 | { | 806 | { |
746 | return atomic_read(&F2FS_I(inode)->dirty_dents); | 807 | return atomic_read(&F2FS_I(inode)->dirty_pages); |
747 | } | 808 | } |
748 | 809 | ||
749 | static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) | 810 | static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) |
@@ -799,7 +860,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) | |||
799 | 860 | ||
800 | /* | 861 | /* |
801 | * odd numbered checkpoint should at cp segment 0 | 862 | * odd numbered checkpoint should at cp segment 0 |
802 | * and even segent must be at cp segment 1 | 863 | * and even segment must be at cp segment 1 |
803 | */ | 864 | */ |
804 | if (!(ckpt_version & 1)) | 865 | if (!(ckpt_version & 1)) |
805 | start_addr += sbi->blocks_per_seg; | 866 | start_addr += sbi->blocks_per_seg; |
@@ -848,9 +909,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, | |||
848 | { | 909 | { |
849 | spin_lock(&sbi->stat_lock); | 910 | spin_lock(&sbi->stat_lock); |
850 | 911 | ||
851 | f2fs_bug_on(!sbi->total_valid_block_count); | 912 | f2fs_bug_on(sbi, !sbi->total_valid_block_count); |
852 | f2fs_bug_on(!sbi->total_valid_node_count); | 913 | f2fs_bug_on(sbi, !sbi->total_valid_node_count); |
853 | f2fs_bug_on(!inode->i_blocks); | 914 | f2fs_bug_on(sbi, !inode->i_blocks); |
854 | 915 | ||
855 | inode->i_blocks--; | 916 | inode->i_blocks--; |
856 | sbi->total_valid_node_count--; | 917 | sbi->total_valid_node_count--; |
@@ -867,7 +928,7 @@ static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) | |||
867 | static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) | 928 | static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) |
868 | { | 929 | { |
869 | spin_lock(&sbi->stat_lock); | 930 | spin_lock(&sbi->stat_lock); |
870 | f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count); | 931 | f2fs_bug_on(sbi, sbi->total_valid_inode_count == sbi->total_node_count); |
871 | sbi->total_valid_inode_count++; | 932 | sbi->total_valid_inode_count++; |
872 | spin_unlock(&sbi->stat_lock); | 933 | spin_unlock(&sbi->stat_lock); |
873 | } | 934 | } |
@@ -875,7 +936,7 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) | |||
875 | static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) | 936 | static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) |
876 | { | 937 | { |
877 | spin_lock(&sbi->stat_lock); | 938 | spin_lock(&sbi->stat_lock); |
878 | f2fs_bug_on(!sbi->total_valid_inode_count); | 939 | f2fs_bug_on(sbi, !sbi->total_valid_inode_count); |
879 | sbi->total_valid_inode_count--; | 940 | sbi->total_valid_inode_count--; |
880 | spin_unlock(&sbi->stat_lock); | 941 | spin_unlock(&sbi->stat_lock); |
881 | } | 942 | } |
@@ -891,7 +952,7 @@ static inline void f2fs_put_page(struct page *page, int unlock) | |||
891 | return; | 952 | return; |
892 | 953 | ||
893 | if (unlock) { | 954 | if (unlock) { |
894 | f2fs_bug_on(!PageLocked(page)); | 955 | f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page)); |
895 | unlock_page(page); | 956 | unlock_page(page); |
896 | } | 957 | } |
897 | page_cache_release(page); | 958 | page_cache_release(page); |
@@ -998,7 +1059,9 @@ enum { | |||
998 | FI_INLINE_DATA, /* used for inline data*/ | 1059 | FI_INLINE_DATA, /* used for inline data*/ |
999 | FI_APPEND_WRITE, /* inode has appended data */ | 1060 | FI_APPEND_WRITE, /* inode has appended data */ |
1000 | FI_UPDATE_WRITE, /* inode has in-place-update data */ | 1061 | FI_UPDATE_WRITE, /* inode has in-place-update data */ |
1001 | FI_NEED_IPU, /* used fo ipu for fdatasync */ | 1062 | FI_NEED_IPU, /* used for ipu per file */ |
1063 | FI_ATOMIC_FILE, /* indicate atomic file */ | ||
1064 | FI_VOLATILE_FILE, /* indicate volatile file */ | ||
1002 | }; | 1065 | }; |
1003 | 1066 | ||
1004 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) | 1067 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) |
@@ -1085,6 +1148,16 @@ static inline int f2fs_has_inline_data(struct inode *inode) | |||
1085 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); | 1148 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); |
1086 | } | 1149 | } |
1087 | 1150 | ||
1151 | static inline bool f2fs_is_atomic_file(struct inode *inode) | ||
1152 | { | ||
1153 | return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); | ||
1154 | } | ||
1155 | |||
1156 | static inline bool f2fs_is_volatile_file(struct inode *inode) | ||
1157 | { | ||
1158 | return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); | ||
1159 | } | ||
1160 | |||
1088 | static inline void *inline_data_addr(struct page *page) | 1161 | static inline void *inline_data_addr(struct page *page) |
1089 | { | 1162 | { |
1090 | struct f2fs_inode *ri = F2FS_INODE(page); | 1163 | struct f2fs_inode *ri = F2FS_INODE(page); |
@@ -1096,6 +1169,11 @@ static inline int f2fs_readonly(struct super_block *sb) | |||
1096 | return sb->s_flags & MS_RDONLY; | 1169 | return sb->s_flags & MS_RDONLY; |
1097 | } | 1170 | } |
1098 | 1171 | ||
1172 | static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) | ||
1173 | { | ||
1174 | return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); | ||
1175 | } | ||
1176 | |||
1099 | static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) | 1177 | static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) |
1100 | { | 1178 | { |
1101 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); | 1179 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); |
@@ -1117,7 +1195,7 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) | |||
1117 | */ | 1195 | */ |
1118 | int f2fs_sync_file(struct file *, loff_t, loff_t, int); | 1196 | int f2fs_sync_file(struct file *, loff_t, loff_t, int); |
1119 | void truncate_data_blocks(struct dnode_of_data *); | 1197 | void truncate_data_blocks(struct dnode_of_data *); |
1120 | int truncate_blocks(struct inode *, u64); | 1198 | int truncate_blocks(struct inode *, u64, bool); |
1121 | void f2fs_truncate(struct inode *); | 1199 | void f2fs_truncate(struct inode *); |
1122 | int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 1200 | int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
1123 | int f2fs_setattr(struct dentry *, struct iattr *); | 1201 | int f2fs_setattr(struct dentry *, struct iattr *); |
@@ -1136,6 +1214,7 @@ void update_inode(struct inode *, struct page *); | |||
1136 | void update_inode_page(struct inode *); | 1214 | void update_inode_page(struct inode *); |
1137 | int f2fs_write_inode(struct inode *, struct writeback_control *); | 1215 | int f2fs_write_inode(struct inode *, struct writeback_control *); |
1138 | void f2fs_evict_inode(struct inode *); | 1216 | void f2fs_evict_inode(struct inode *); |
1217 | void handle_failed_inode(struct inode *); | ||
1139 | 1218 | ||
1140 | /* | 1219 | /* |
1141 | * namei.c | 1220 | * namei.c |
@@ -1183,9 +1262,9 @@ struct dnode_of_data; | |||
1183 | struct node_info; | 1262 | struct node_info; |
1184 | 1263 | ||
1185 | bool available_free_memory(struct f2fs_sb_info *, int); | 1264 | bool available_free_memory(struct f2fs_sb_info *, int); |
1186 | int is_checkpointed_node(struct f2fs_sb_info *, nid_t); | 1265 | bool is_checkpointed_node(struct f2fs_sb_info *, nid_t); |
1187 | bool fsync_mark_done(struct f2fs_sb_info *, nid_t); | 1266 | bool has_fsynced_inode(struct f2fs_sb_info *, nid_t); |
1188 | void fsync_mark_clear(struct f2fs_sb_info *, nid_t); | 1267 | bool need_inode_block_update(struct f2fs_sb_info *, nid_t); |
1189 | void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); | 1268 | void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); |
1190 | int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); | 1269 | int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); |
1191 | int truncate_inode_blocks(struct inode *, pgoff_t); | 1270 | int truncate_inode_blocks(struct inode *, pgoff_t); |
@@ -1202,10 +1281,8 @@ int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); | |||
1202 | bool alloc_nid(struct f2fs_sb_info *, nid_t *); | 1281 | bool alloc_nid(struct f2fs_sb_info *, nid_t *); |
1203 | void alloc_nid_done(struct f2fs_sb_info *, nid_t); | 1282 | void alloc_nid_done(struct f2fs_sb_info *, nid_t); |
1204 | void alloc_nid_failed(struct f2fs_sb_info *, nid_t); | 1283 | void alloc_nid_failed(struct f2fs_sb_info *, nid_t); |
1205 | void recover_node_page(struct f2fs_sb_info *, struct page *, | ||
1206 | struct f2fs_summary *, struct node_info *, block_t); | ||
1207 | void recover_inline_xattr(struct inode *, struct page *); | 1284 | void recover_inline_xattr(struct inode *, struct page *); |
1208 | bool recover_xattr_data(struct inode *, struct page *, block_t); | 1285 | void recover_xattr_data(struct inode *, struct page *, block_t); |
1209 | int recover_inode_page(struct f2fs_sb_info *, struct page *); | 1286 | int recover_inode_page(struct f2fs_sb_info *, struct page *); |
1210 | int restore_node_summary(struct f2fs_sb_info *, unsigned int, | 1287 | int restore_node_summary(struct f2fs_sb_info *, unsigned int, |
1211 | struct f2fs_summary_block *); | 1288 | struct f2fs_summary_block *); |
@@ -1218,6 +1295,8 @@ void destroy_node_manager_caches(void); | |||
1218 | /* | 1295 | /* |
1219 | * segment.c | 1296 | * segment.c |
1220 | */ | 1297 | */ |
1298 | void register_inmem_page(struct inode *, struct page *); | ||
1299 | void commit_inmem_pages(struct inode *, bool); | ||
1221 | void f2fs_balance_fs(struct f2fs_sb_info *); | 1300 | void f2fs_balance_fs(struct f2fs_sb_info *); |
1222 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); | 1301 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); |
1223 | int f2fs_issue_flush(struct f2fs_sb_info *); | 1302 | int f2fs_issue_flush(struct f2fs_sb_info *); |
@@ -1226,9 +1305,11 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *); | |||
1226 | void invalidate_blocks(struct f2fs_sb_info *, block_t); | 1305 | void invalidate_blocks(struct f2fs_sb_info *, block_t); |
1227 | void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); | 1306 | void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); |
1228 | void clear_prefree_segments(struct f2fs_sb_info *); | 1307 | void clear_prefree_segments(struct f2fs_sb_info *); |
1308 | void release_discard_addrs(struct f2fs_sb_info *); | ||
1229 | void discard_next_dnode(struct f2fs_sb_info *, block_t); | 1309 | void discard_next_dnode(struct f2fs_sb_info *, block_t); |
1230 | int npages_for_summary_flush(struct f2fs_sb_info *); | 1310 | int npages_for_summary_flush(struct f2fs_sb_info *); |
1231 | void allocate_new_segments(struct f2fs_sb_info *); | 1311 | void allocate_new_segments(struct f2fs_sb_info *); |
1312 | int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); | ||
1232 | struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); | 1313 | struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); |
1233 | void write_meta_page(struct f2fs_sb_info *, struct page *); | 1314 | void write_meta_page(struct f2fs_sb_info *, struct page *); |
1234 | void write_node_page(struct f2fs_sb_info *, struct page *, | 1315 | void write_node_page(struct f2fs_sb_info *, struct page *, |
@@ -1238,8 +1319,6 @@ void write_data_page(struct page *, struct dnode_of_data *, block_t *, | |||
1238 | void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); | 1319 | void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); |
1239 | void recover_data_page(struct f2fs_sb_info *, struct page *, | 1320 | void recover_data_page(struct f2fs_sb_info *, struct page *, |
1240 | struct f2fs_summary *, block_t, block_t); | 1321 | struct f2fs_summary *, block_t, block_t); |
1241 | void rewrite_node_page(struct f2fs_sb_info *, struct page *, | ||
1242 | struct f2fs_summary *, block_t, block_t); | ||
1243 | void allocate_data_block(struct f2fs_sb_info *, struct page *, | 1322 | void allocate_data_block(struct f2fs_sb_info *, struct page *, |
1244 | block_t, block_t *, struct f2fs_summary *, int); | 1323 | block_t, block_t *, struct f2fs_summary *, int); |
1245 | void f2fs_wait_on_page_writeback(struct page *, enum page_type); | 1324 | void f2fs_wait_on_page_writeback(struct page *, enum page_type); |
@@ -1247,7 +1326,7 @@ void write_data_summaries(struct f2fs_sb_info *, block_t); | |||
1247 | void write_node_summaries(struct f2fs_sb_info *, block_t); | 1326 | void write_node_summaries(struct f2fs_sb_info *, block_t); |
1248 | int lookup_journal_in_cursum(struct f2fs_summary_block *, | 1327 | int lookup_journal_in_cursum(struct f2fs_summary_block *, |
1249 | int, unsigned int, int); | 1328 | int, unsigned int, int); |
1250 | void flush_sit_entries(struct f2fs_sb_info *); | 1329 | void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *); |
1251 | int build_segment_manager(struct f2fs_sb_info *); | 1330 | int build_segment_manager(struct f2fs_sb_info *); |
1252 | void destroy_segment_manager(struct f2fs_sb_info *); | 1331 | void destroy_segment_manager(struct f2fs_sb_info *); |
1253 | int __init create_segment_manager_caches(void); | 1332 | int __init create_segment_manager_caches(void); |
@@ -1258,10 +1337,12 @@ void destroy_segment_manager_caches(void); | |||
1258 | */ | 1337 | */ |
1259 | struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); | 1338 | struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); |
1260 | struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); | 1339 | struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); |
1261 | int ra_meta_pages(struct f2fs_sb_info *, int, int, int); | 1340 | struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t); |
1341 | int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); | ||
1262 | long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); | 1342 | long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); |
1263 | void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); | 1343 | void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); |
1264 | void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); | 1344 | void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); |
1345 | void release_dirty_inode(struct f2fs_sb_info *); | ||
1265 | bool exist_written_data(struct f2fs_sb_info *, nid_t, int); | 1346 | bool exist_written_data(struct f2fs_sb_info *, nid_t, int); |
1266 | int acquire_orphan_inode(struct f2fs_sb_info *); | 1347 | int acquire_orphan_inode(struct f2fs_sb_info *); |
1267 | void release_orphan_inode(struct f2fs_sb_info *); | 1348 | void release_orphan_inode(struct f2fs_sb_info *); |
@@ -1269,11 +1350,11 @@ void add_orphan_inode(struct f2fs_sb_info *, nid_t); | |||
1269 | void remove_orphan_inode(struct f2fs_sb_info *, nid_t); | 1350 | void remove_orphan_inode(struct f2fs_sb_info *, nid_t); |
1270 | void recover_orphan_inodes(struct f2fs_sb_info *); | 1351 | void recover_orphan_inodes(struct f2fs_sb_info *); |
1271 | int get_valid_checkpoint(struct f2fs_sb_info *); | 1352 | int get_valid_checkpoint(struct f2fs_sb_info *); |
1272 | void set_dirty_dir_page(struct inode *, struct page *); | 1353 | void update_dirty_page(struct inode *, struct page *); |
1273 | void add_dirty_dir_inode(struct inode *); | 1354 | void add_dirty_dir_inode(struct inode *); |
1274 | void remove_dirty_dir_inode(struct inode *); | 1355 | void remove_dirty_dir_inode(struct inode *); |
1275 | void sync_dirty_dir_inodes(struct f2fs_sb_info *); | 1356 | void sync_dirty_dir_inodes(struct f2fs_sb_info *); |
1276 | void write_checkpoint(struct f2fs_sb_info *, bool); | 1357 | void write_checkpoint(struct f2fs_sb_info *, struct cp_control *); |
1277 | void init_ino_entry_info(struct f2fs_sb_info *); | 1358 | void init_ino_entry_info(struct f2fs_sb_info *); |
1278 | int __init create_checkpoint_caches(void); | 1359 | int __init create_checkpoint_caches(void); |
1279 | void destroy_checkpoint_caches(void); | 1360 | void destroy_checkpoint_caches(void); |
@@ -1357,12 +1438,12 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) | |||
1357 | #define stat_inc_inline_inode(inode) \ | 1438 | #define stat_inc_inline_inode(inode) \ |
1358 | do { \ | 1439 | do { \ |
1359 | if (f2fs_has_inline_data(inode)) \ | 1440 | if (f2fs_has_inline_data(inode)) \ |
1360 | ((F2FS_SB(inode->i_sb))->inline_inode++); \ | 1441 | ((F2FS_I_SB(inode))->inline_inode++); \ |
1361 | } while (0) | 1442 | } while (0) |
1362 | #define stat_dec_inline_inode(inode) \ | 1443 | #define stat_dec_inline_inode(inode) \ |
1363 | do { \ | 1444 | do { \ |
1364 | if (f2fs_has_inline_data(inode)) \ | 1445 | if (f2fs_has_inline_data(inode)) \ |
1365 | ((F2FS_SB(inode->i_sb))->inline_inode--); \ | 1446 | ((F2FS_I_SB(inode))->inline_inode--); \ |
1366 | } while (0) | 1447 | } while (0) |
1367 | 1448 | ||
1368 | #define stat_inc_seg_type(sbi, curseg) \ | 1449 | #define stat_inc_seg_type(sbi, curseg) \ |
@@ -1439,8 +1520,8 @@ extern const struct inode_operations f2fs_special_inode_operations; | |||
1439 | */ | 1520 | */ |
1440 | bool f2fs_may_inline(struct inode *); | 1521 | bool f2fs_may_inline(struct inode *); |
1441 | int f2fs_read_inline_data(struct inode *, struct page *); | 1522 | int f2fs_read_inline_data(struct inode *, struct page *); |
1442 | int f2fs_convert_inline_data(struct inode *, pgoff_t); | 1523 | int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); |
1443 | int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); | 1524 | int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); |
1444 | void truncate_inline_data(struct inode *, u64); | 1525 | void truncate_inline_data(struct inode *, u64); |
1445 | int recover_inline_data(struct inode *, struct page *); | 1526 | bool recover_inline_data(struct inode *, struct page *); |
1446 | #endif | 1527 | #endif |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 208f1a9bd569..8e68bb64f835 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -33,7 +33,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
33 | { | 33 | { |
34 | struct page *page = vmf->page; | 34 | struct page *page = vmf->page; |
35 | struct inode *inode = file_inode(vma->vm_file); | 35 | struct inode *inode = file_inode(vma->vm_file); |
36 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 36 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
37 | struct dnode_of_data dn; | 37 | struct dnode_of_data dn; |
38 | int err; | 38 | int err; |
39 | 39 | ||
@@ -41,6 +41,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
41 | 41 | ||
42 | sb_start_pagefault(inode->i_sb); | 42 | sb_start_pagefault(inode->i_sb); |
43 | 43 | ||
44 | /* force to convert with normal data indices */ | ||
45 | err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); | ||
46 | if (err) | ||
47 | goto out; | ||
48 | |||
44 | /* block allocation */ | 49 | /* block allocation */ |
45 | f2fs_lock_op(sbi); | 50 | f2fs_lock_op(sbi); |
46 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 51 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
@@ -110,11 +115,31 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) | |||
110 | return 1; | 115 | return 1; |
111 | } | 116 | } |
112 | 117 | ||
118 | static inline bool need_do_checkpoint(struct inode *inode) | ||
119 | { | ||
120 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
121 | bool need_cp = false; | ||
122 | |||
123 | if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) | ||
124 | need_cp = true; | ||
125 | else if (file_wrong_pino(inode)) | ||
126 | need_cp = true; | ||
127 | else if (!space_for_roll_forward(sbi)) | ||
128 | need_cp = true; | ||
129 | else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) | ||
130 | need_cp = true; | ||
131 | else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) | ||
132 | need_cp = true; | ||
133 | |||
134 | return need_cp; | ||
135 | } | ||
136 | |||
113 | int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | 137 | int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) |
114 | { | 138 | { |
115 | struct inode *inode = file->f_mapping->host; | 139 | struct inode *inode = file->f_mapping->host; |
116 | struct f2fs_inode_info *fi = F2FS_I(inode); | 140 | struct f2fs_inode_info *fi = F2FS_I(inode); |
117 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 141 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
142 | nid_t ino = inode->i_ino; | ||
118 | int ret = 0; | 143 | int ret = 0; |
119 | bool need_cp = false; | 144 | bool need_cp = false; |
120 | struct writeback_control wbc = { | 145 | struct writeback_control wbc = { |
@@ -129,12 +154,11 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
129 | trace_f2fs_sync_file_enter(inode); | 154 | trace_f2fs_sync_file_enter(inode); |
130 | 155 | ||
131 | /* if fdatasync is triggered, let's do in-place-update */ | 156 | /* if fdatasync is triggered, let's do in-place-update */ |
132 | if (datasync) | 157 | if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) |
133 | set_inode_flag(fi, FI_NEED_IPU); | 158 | set_inode_flag(fi, FI_NEED_IPU); |
134 | |||
135 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 159 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
136 | if (datasync) | 160 | clear_inode_flag(fi, FI_NEED_IPU); |
137 | clear_inode_flag(fi, FI_NEED_IPU); | 161 | |
138 | if (ret) { | 162 | if (ret) { |
139 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); | 163 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); |
140 | return ret; | 164 | return ret; |
@@ -144,33 +168,31 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
144 | * if there is no written data, don't waste time to write recovery info. | 168 | * if there is no written data, don't waste time to write recovery info. |
145 | */ | 169 | */ |
146 | if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && | 170 | if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && |
147 | !exist_written_data(sbi, inode->i_ino, APPEND_INO)) { | 171 | !exist_written_data(sbi, ino, APPEND_INO)) { |
172 | struct page *i = find_get_page(NODE_MAPPING(sbi), ino); | ||
173 | |||
174 | /* But we need to avoid that there are some inode updates */ | ||
175 | if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) { | ||
176 | f2fs_put_page(i, 0); | ||
177 | goto go_write; | ||
178 | } | ||
179 | f2fs_put_page(i, 0); | ||
180 | |||
148 | if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || | 181 | if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || |
149 | exist_written_data(sbi, inode->i_ino, UPDATE_INO)) | 182 | exist_written_data(sbi, ino, UPDATE_INO)) |
150 | goto flush_out; | 183 | goto flush_out; |
151 | goto out; | 184 | goto out; |
152 | } | 185 | } |
153 | 186 | go_write: | |
154 | /* guarantee free sections for fsync */ | 187 | /* guarantee free sections for fsync */ |
155 | f2fs_balance_fs(sbi); | 188 | f2fs_balance_fs(sbi); |
156 | 189 | ||
157 | down_read(&fi->i_sem); | ||
158 | |||
159 | /* | 190 | /* |
160 | * Both of fdatasync() and fsync() are able to be recovered from | 191 | * Both of fdatasync() and fsync() are able to be recovered from |
161 | * sudden-power-off. | 192 | * sudden-power-off. |
162 | */ | 193 | */ |
163 | if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) | 194 | down_read(&fi->i_sem); |
164 | need_cp = true; | 195 | need_cp = need_do_checkpoint(inode); |
165 | else if (file_wrong_pino(inode)) | ||
166 | need_cp = true; | ||
167 | else if (!space_for_roll_forward(sbi)) | ||
168 | need_cp = true; | ||
169 | else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) | ||
170 | need_cp = true; | ||
171 | else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) | ||
172 | need_cp = true; | ||
173 | |||
174 | up_read(&fi->i_sem); | 196 | up_read(&fi->i_sem); |
175 | 197 | ||
176 | if (need_cp) { | 198 | if (need_cp) { |
@@ -194,26 +216,28 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
194 | up_write(&fi->i_sem); | 216 | up_write(&fi->i_sem); |
195 | } | 217 | } |
196 | } else { | 218 | } else { |
197 | /* if there is no written node page, write its inode page */ | 219 | sync_nodes: |
198 | while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { | 220 | sync_node_pages(sbi, ino, &wbc); |
199 | if (fsync_mark_done(sbi, inode->i_ino)) | 221 | |
200 | goto out; | 222 | if (need_inode_block_update(sbi, ino)) { |
201 | mark_inode_dirty_sync(inode); | 223 | mark_inode_dirty_sync(inode); |
202 | ret = f2fs_write_inode(inode, NULL); | 224 | ret = f2fs_write_inode(inode, NULL); |
203 | if (ret) | 225 | if (ret) |
204 | goto out; | 226 | goto out; |
227 | goto sync_nodes; | ||
205 | } | 228 | } |
206 | ret = wait_on_node_pages_writeback(sbi, inode->i_ino); | 229 | |
230 | ret = wait_on_node_pages_writeback(sbi, ino); | ||
207 | if (ret) | 231 | if (ret) |
208 | goto out; | 232 | goto out; |
209 | 233 | ||
210 | /* once recovery info is written, don't need to tack this */ | 234 | /* once recovery info is written, don't need to tack this */ |
211 | remove_dirty_inode(sbi, inode->i_ino, APPEND_INO); | 235 | remove_dirty_inode(sbi, ino, APPEND_INO); |
212 | clear_inode_flag(fi, FI_APPEND_WRITE); | 236 | clear_inode_flag(fi, FI_APPEND_WRITE); |
213 | flush_out: | 237 | flush_out: |
214 | remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO); | 238 | remove_dirty_inode(sbi, ino, UPDATE_INO); |
215 | clear_inode_flag(fi, FI_UPDATE_WRITE); | 239 | clear_inode_flag(fi, FI_UPDATE_WRITE); |
216 | ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); | 240 | ret = f2fs_issue_flush(F2FS_I_SB(inode)); |
217 | } | 241 | } |
218 | out: | 242 | out: |
219 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); | 243 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); |
@@ -288,7 +312,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) | |||
288 | if (err && err != -ENOENT) { | 312 | if (err && err != -ENOENT) { |
289 | goto fail; | 313 | goto fail; |
290 | } else if (err == -ENOENT) { | 314 | } else if (err == -ENOENT) { |
291 | /* direct node is not exist */ | 315 | /* direct node does not exists */ |
292 | if (whence == SEEK_DATA) { | 316 | if (whence == SEEK_DATA) { |
293 | pgofs = PGOFS_OF_NEXT_DNODE(pgofs, | 317 | pgofs = PGOFS_OF_NEXT_DNODE(pgofs, |
294 | F2FS_I(inode)); | 318 | F2FS_I(inode)); |
@@ -340,6 +364,8 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) | |||
340 | maxbytes, i_size_read(inode)); | 364 | maxbytes, i_size_read(inode)); |
341 | case SEEK_DATA: | 365 | case SEEK_DATA: |
342 | case SEEK_HOLE: | 366 | case SEEK_HOLE: |
367 | if (offset < 0) | ||
368 | return -ENXIO; | ||
343 | return f2fs_seek_block(file, offset, whence); | 369 | return f2fs_seek_block(file, offset, whence); |
344 | } | 370 | } |
345 | 371 | ||
@@ -356,7 +382,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
356 | int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | 382 | int truncate_data_blocks_range(struct dnode_of_data *dn, int count) |
357 | { | 383 | { |
358 | int nr_free = 0, ofs = dn->ofs_in_node; | 384 | int nr_free = 0, ofs = dn->ofs_in_node; |
359 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 385 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
360 | struct f2fs_node *raw_node; | 386 | struct f2fs_node *raw_node; |
361 | __le32 *addr; | 387 | __le32 *addr; |
362 | 388 | ||
@@ -417,9 +443,9 @@ out: | |||
417 | f2fs_put_page(page, 1); | 443 | f2fs_put_page(page, 1); |
418 | } | 444 | } |
419 | 445 | ||
420 | int truncate_blocks(struct inode *inode, u64 from) | 446 | int truncate_blocks(struct inode *inode, u64 from, bool lock) |
421 | { | 447 | { |
422 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 448 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
423 | unsigned int blocksize = inode->i_sb->s_blocksize; | 449 | unsigned int blocksize = inode->i_sb->s_blocksize; |
424 | struct dnode_of_data dn; | 450 | struct dnode_of_data dn; |
425 | pgoff_t free_from; | 451 | pgoff_t free_from; |
@@ -433,14 +459,16 @@ int truncate_blocks(struct inode *inode, u64 from) | |||
433 | free_from = (pgoff_t) | 459 | free_from = (pgoff_t) |
434 | ((from + blocksize - 1) >> (sbi->log_blocksize)); | 460 | ((from + blocksize - 1) >> (sbi->log_blocksize)); |
435 | 461 | ||
436 | f2fs_lock_op(sbi); | 462 | if (lock) |
463 | f2fs_lock_op(sbi); | ||
437 | 464 | ||
438 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 465 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
439 | err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); | 466 | err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); |
440 | if (err) { | 467 | if (err) { |
441 | if (err == -ENOENT) | 468 | if (err == -ENOENT) |
442 | goto free_next; | 469 | goto free_next; |
443 | f2fs_unlock_op(sbi); | 470 | if (lock) |
471 | f2fs_unlock_op(sbi); | ||
444 | trace_f2fs_truncate_blocks_exit(inode, err); | 472 | trace_f2fs_truncate_blocks_exit(inode, err); |
445 | return err; | 473 | return err; |
446 | } | 474 | } |
@@ -448,7 +476,7 @@ int truncate_blocks(struct inode *inode, u64 from) | |||
448 | count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); | 476 | count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); |
449 | 477 | ||
450 | count -= dn.ofs_in_node; | 478 | count -= dn.ofs_in_node; |
451 | f2fs_bug_on(count < 0); | 479 | f2fs_bug_on(sbi, count < 0); |
452 | 480 | ||
453 | if (dn.ofs_in_node || IS_INODE(dn.node_page)) { | 481 | if (dn.ofs_in_node || IS_INODE(dn.node_page)) { |
454 | truncate_data_blocks_range(&dn, count); | 482 | truncate_data_blocks_range(&dn, count); |
@@ -458,7 +486,8 @@ int truncate_blocks(struct inode *inode, u64 from) | |||
458 | f2fs_put_dnode(&dn); | 486 | f2fs_put_dnode(&dn); |
459 | free_next: | 487 | free_next: |
460 | err = truncate_inode_blocks(inode, free_from); | 488 | err = truncate_inode_blocks(inode, free_from); |
461 | f2fs_unlock_op(sbi); | 489 | if (lock) |
490 | f2fs_unlock_op(sbi); | ||
462 | done: | 491 | done: |
463 | /* lastly zero out the first data page */ | 492 | /* lastly zero out the first data page */ |
464 | truncate_partial_data_page(inode, from); | 493 | truncate_partial_data_page(inode, from); |
@@ -475,7 +504,7 @@ void f2fs_truncate(struct inode *inode) | |||
475 | 504 | ||
476 | trace_f2fs_truncate(inode); | 505 | trace_f2fs_truncate(inode); |
477 | 506 | ||
478 | if (!truncate_blocks(inode, i_size_read(inode))) { | 507 | if (!truncate_blocks(inode, i_size_read(inode), true)) { |
479 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 508 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
480 | mark_inode_dirty(inode); | 509 | mark_inode_dirty(inode); |
481 | } | 510 | } |
@@ -531,15 +560,22 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) | |||
531 | if (err) | 560 | if (err) |
532 | return err; | 561 | return err; |
533 | 562 | ||
534 | if ((attr->ia_valid & ATTR_SIZE) && | 563 | if (attr->ia_valid & ATTR_SIZE) { |
535 | attr->ia_size != i_size_read(inode)) { | 564 | err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); |
536 | err = f2fs_convert_inline_data(inode, attr->ia_size); | ||
537 | if (err) | 565 | if (err) |
538 | return err; | 566 | return err; |
539 | 567 | ||
540 | truncate_setsize(inode, attr->ia_size); | 568 | if (attr->ia_size != i_size_read(inode)) { |
541 | f2fs_truncate(inode); | 569 | truncate_setsize(inode, attr->ia_size); |
542 | f2fs_balance_fs(F2FS_SB(inode->i_sb)); | 570 | f2fs_truncate(inode); |
571 | f2fs_balance_fs(F2FS_I_SB(inode)); | ||
572 | } else { | ||
573 | /* | ||
574 | * giving a chance to truncate blocks past EOF which | ||
575 | * are fallocated with FALLOC_FL_KEEP_SIZE. | ||
576 | */ | ||
577 | f2fs_truncate(inode); | ||
578 | } | ||
543 | } | 579 | } |
544 | 580 | ||
545 | __setattr_copy(inode, attr); | 581 | __setattr_copy(inode, attr); |
@@ -573,7 +609,7 @@ const struct inode_operations f2fs_file_inode_operations = { | |||
573 | static void fill_zero(struct inode *inode, pgoff_t index, | 609 | static void fill_zero(struct inode *inode, pgoff_t index, |
574 | loff_t start, loff_t len) | 610 | loff_t start, loff_t len) |
575 | { | 611 | { |
576 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 612 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
577 | struct page *page; | 613 | struct page *page; |
578 | 614 | ||
579 | if (!len) | 615 | if (!len) |
@@ -622,7 +658,14 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
622 | loff_t off_start, off_end; | 658 | loff_t off_start, off_end; |
623 | int ret = 0; | 659 | int ret = 0; |
624 | 660 | ||
625 | ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1); | 661 | if (!S_ISREG(inode->i_mode)) |
662 | return -EOPNOTSUPP; | ||
663 | |||
664 | /* skip punching hole beyond i_size */ | ||
665 | if (offset >= inode->i_size) | ||
666 | return ret; | ||
667 | |||
668 | ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); | ||
626 | if (ret) | 669 | if (ret) |
627 | return ret; | 670 | return ret; |
628 | 671 | ||
@@ -645,7 +688,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
645 | if (pg_start < pg_end) { | 688 | if (pg_start < pg_end) { |
646 | struct address_space *mapping = inode->i_mapping; | 689 | struct address_space *mapping = inode->i_mapping; |
647 | loff_t blk_start, blk_end; | 690 | loff_t blk_start, blk_end; |
648 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 691 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
649 | 692 | ||
650 | f2fs_balance_fs(sbi); | 693 | f2fs_balance_fs(sbi); |
651 | 694 | ||
@@ -666,7 +709,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
666 | static int expand_inode_data(struct inode *inode, loff_t offset, | 709 | static int expand_inode_data(struct inode *inode, loff_t offset, |
667 | loff_t len, int mode) | 710 | loff_t len, int mode) |
668 | { | 711 | { |
669 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 712 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
670 | pgoff_t index, pg_start, pg_end; | 713 | pgoff_t index, pg_start, pg_end; |
671 | loff_t new_size = i_size_read(inode); | 714 | loff_t new_size = i_size_read(inode); |
672 | loff_t off_start, off_end; | 715 | loff_t off_start, off_end; |
@@ -678,7 +721,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, | |||
678 | if (ret) | 721 | if (ret) |
679 | return ret; | 722 | return ret; |
680 | 723 | ||
681 | ret = f2fs_convert_inline_data(inode, offset + len); | 724 | ret = f2fs_convert_inline_data(inode, offset + len, NULL); |
682 | if (ret) | 725 | if (ret) |
683 | return ret; | 726 | return ret; |
684 | 727 | ||
@@ -762,61 +805,157 @@ static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) | |||
762 | return flags & F2FS_OTHER_FLMASK; | 805 | return flags & F2FS_OTHER_FLMASK; |
763 | } | 806 | } |
764 | 807 | ||
765 | long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 808 | static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) |
809 | { | ||
810 | struct inode *inode = file_inode(filp); | ||
811 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
812 | unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; | ||
813 | return put_user(flags, (int __user *)arg); | ||
814 | } | ||
815 | |||
816 | static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) | ||
766 | { | 817 | { |
767 | struct inode *inode = file_inode(filp); | 818 | struct inode *inode = file_inode(filp); |
768 | struct f2fs_inode_info *fi = F2FS_I(inode); | 819 | struct f2fs_inode_info *fi = F2FS_I(inode); |
769 | unsigned int flags; | 820 | unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; |
821 | unsigned int oldflags; | ||
770 | int ret; | 822 | int ret; |
771 | 823 | ||
772 | switch (cmd) { | 824 | ret = mnt_want_write_file(filp); |
773 | case F2FS_IOC_GETFLAGS: | 825 | if (ret) |
774 | flags = fi->i_flags & FS_FL_USER_VISIBLE; | 826 | return ret; |
775 | return put_user(flags, (int __user *) arg); | ||
776 | case F2FS_IOC_SETFLAGS: | ||
777 | { | ||
778 | unsigned int oldflags; | ||
779 | 827 | ||
780 | ret = mnt_want_write_file(filp); | 828 | if (!inode_owner_or_capable(inode)) { |
781 | if (ret) | 829 | ret = -EACCES; |
782 | return ret; | 830 | goto out; |
831 | } | ||
783 | 832 | ||
784 | if (!inode_owner_or_capable(inode)) { | 833 | if (get_user(flags, (int __user *)arg)) { |
785 | ret = -EACCES; | 834 | ret = -EFAULT; |
786 | goto out; | 835 | goto out; |
787 | } | 836 | } |
837 | |||
838 | flags = f2fs_mask_flags(inode->i_mode, flags); | ||
839 | |||
840 | mutex_lock(&inode->i_mutex); | ||
841 | |||
842 | oldflags = fi->i_flags; | ||
788 | 843 | ||
789 | if (get_user(flags, (int __user *) arg)) { | 844 | if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { |
790 | ret = -EFAULT; | 845 | if (!capable(CAP_LINUX_IMMUTABLE)) { |
846 | mutex_unlock(&inode->i_mutex); | ||
847 | ret = -EPERM; | ||
791 | goto out; | 848 | goto out; |
792 | } | 849 | } |
850 | } | ||
793 | 851 | ||
794 | flags = f2fs_mask_flags(inode->i_mode, flags); | 852 | flags = flags & FS_FL_USER_MODIFIABLE; |
853 | flags |= oldflags & ~FS_FL_USER_MODIFIABLE; | ||
854 | fi->i_flags = flags; | ||
855 | mutex_unlock(&inode->i_mutex); | ||
795 | 856 | ||
796 | mutex_lock(&inode->i_mutex); | 857 | f2fs_set_inode_flags(inode); |
858 | inode->i_ctime = CURRENT_TIME; | ||
859 | mark_inode_dirty(inode); | ||
860 | out: | ||
861 | mnt_drop_write_file(filp); | ||
862 | return ret; | ||
863 | } | ||
797 | 864 | ||
798 | oldflags = fi->i_flags; | 865 | static int f2fs_ioc_start_atomic_write(struct file *filp) |
866 | { | ||
867 | struct inode *inode = file_inode(filp); | ||
868 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
799 | 869 | ||
800 | if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { | 870 | if (!inode_owner_or_capable(inode)) |
801 | if (!capable(CAP_LINUX_IMMUTABLE)) { | 871 | return -EACCES; |
802 | mutex_unlock(&inode->i_mutex); | 872 | |
803 | ret = -EPERM; | 873 | f2fs_balance_fs(sbi); |
804 | goto out; | ||
805 | } | ||
806 | } | ||
807 | 874 | ||
808 | flags = flags & FS_FL_USER_MODIFIABLE; | 875 | set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); |
809 | flags |= oldflags & ~FS_FL_USER_MODIFIABLE; | ||
810 | fi->i_flags = flags; | ||
811 | mutex_unlock(&inode->i_mutex); | ||
812 | 876 | ||
813 | f2fs_set_inode_flags(inode); | 877 | return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); |
814 | inode->i_ctime = CURRENT_TIME; | 878 | } |
815 | mark_inode_dirty(inode); | 879 | |
816 | out: | 880 | static int f2fs_ioc_commit_atomic_write(struct file *filp) |
817 | mnt_drop_write_file(filp); | 881 | { |
882 | struct inode *inode = file_inode(filp); | ||
883 | int ret; | ||
884 | |||
885 | if (!inode_owner_or_capable(inode)) | ||
886 | return -EACCES; | ||
887 | |||
888 | if (f2fs_is_volatile_file(inode)) | ||
889 | return 0; | ||
890 | |||
891 | ret = mnt_want_write_file(filp); | ||
892 | if (ret) | ||
818 | return ret; | 893 | return ret; |
819 | } | 894 | |
895 | if (f2fs_is_atomic_file(inode)) | ||
896 | commit_inmem_pages(inode, false); | ||
897 | |||
898 | ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); | ||
899 | mnt_drop_write_file(filp); | ||
900 | return ret; | ||
901 | } | ||
902 | |||
903 | static int f2fs_ioc_start_volatile_write(struct file *filp) | ||
904 | { | ||
905 | struct inode *inode = file_inode(filp); | ||
906 | |||
907 | if (!inode_owner_or_capable(inode)) | ||
908 | return -EACCES; | ||
909 | |||
910 | set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); | ||
911 | return 0; | ||
912 | } | ||
913 | |||
914 | static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) | ||
915 | { | ||
916 | struct inode *inode = file_inode(filp); | ||
917 | struct super_block *sb = inode->i_sb; | ||
918 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | ||
919 | struct fstrim_range range; | ||
920 | int ret; | ||
921 | |||
922 | if (!capable(CAP_SYS_ADMIN)) | ||
923 | return -EPERM; | ||
924 | |||
925 | if (!blk_queue_discard(q)) | ||
926 | return -EOPNOTSUPP; | ||
927 | |||
928 | if (copy_from_user(&range, (struct fstrim_range __user *)arg, | ||
929 | sizeof(range))) | ||
930 | return -EFAULT; | ||
931 | |||
932 | range.minlen = max((unsigned int)range.minlen, | ||
933 | q->limits.discard_granularity); | ||
934 | ret = f2fs_trim_fs(F2FS_SB(sb), &range); | ||
935 | if (ret < 0) | ||
936 | return ret; | ||
937 | |||
938 | if (copy_to_user((struct fstrim_range __user *)arg, &range, | ||
939 | sizeof(range))) | ||
940 | return -EFAULT; | ||
941 | return 0; | ||
942 | } | ||
943 | |||
944 | long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
945 | { | ||
946 | switch (cmd) { | ||
947 | case F2FS_IOC_GETFLAGS: | ||
948 | return f2fs_ioc_getflags(filp, arg); | ||
949 | case F2FS_IOC_SETFLAGS: | ||
950 | return f2fs_ioc_setflags(filp, arg); | ||
951 | case F2FS_IOC_START_ATOMIC_WRITE: | ||
952 | return f2fs_ioc_start_atomic_write(filp); | ||
953 | case F2FS_IOC_COMMIT_ATOMIC_WRITE: | ||
954 | return f2fs_ioc_commit_atomic_write(filp); | ||
955 | case F2FS_IOC_START_VOLATILE_WRITE: | ||
956 | return f2fs_ioc_start_volatile_write(filp); | ||
957 | case FITRIM: | ||
958 | return f2fs_ioc_fitrim(filp, arg); | ||
820 | default: | 959 | default: |
821 | return -ENOTTY; | 960 | return -ENOTTY; |
822 | } | 961 | } |
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d7947d90ccc3..2a8f4acdb86b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c | |||
@@ -58,7 +58,7 @@ static int gc_thread_func(void *data) | |||
58 | * 3. IO subsystem is idle by checking the # of requests in | 58 | * 3. IO subsystem is idle by checking the # of requests in |
59 | * bdev's request list. | 59 | * bdev's request list. |
60 | * | 60 | * |
61 | * Note) We have to avoid triggering GCs too much frequently. | 61 | * Note) We have to avoid triggering GCs frequently. |
62 | * Because it is possible that some segments can be | 62 | * Because it is possible that some segments can be |
63 | * invalidated soon after by user update or deletion. | 63 | * invalidated soon after by user update or deletion. |
64 | * So, I'd like to wait some time to collect dirty segments. | 64 | * So, I'd like to wait some time to collect dirty segments. |
@@ -193,7 +193,7 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) | |||
193 | * selected by background GC before. | 193 | * selected by background GC before. |
194 | * Those segments guarantee they have small valid blocks. | 194 | * Those segments guarantee they have small valid blocks. |
195 | */ | 195 | */ |
196 | for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) { | 196 | for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) { |
197 | if (sec_usage_check(sbi, secno)) | 197 | if (sec_usage_check(sbi, secno)) |
198 | continue; | 198 | continue; |
199 | clear_bit(secno, dirty_i->victim_secmap); | 199 | clear_bit(secno, dirty_i->victim_secmap); |
@@ -222,7 +222,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) | |||
222 | 222 | ||
223 | u = (vblocks * 100) >> sbi->log_blocks_per_seg; | 223 | u = (vblocks * 100) >> sbi->log_blocks_per_seg; |
224 | 224 | ||
225 | /* Handle if the system time is changed by user */ | 225 | /* Handle if the system time has changed by the user */ |
226 | if (mtime < sit_i->min_mtime) | 226 | if (mtime < sit_i->min_mtime) |
227 | sit_i->min_mtime = mtime; | 227 | sit_i->min_mtime = mtime; |
228 | if (mtime > sit_i->max_mtime) | 228 | if (mtime > sit_i->max_mtime) |
@@ -263,14 +263,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, | |||
263 | unsigned int secno, max_cost; | 263 | unsigned int secno, max_cost; |
264 | int nsearched = 0; | 264 | int nsearched = 0; |
265 | 265 | ||
266 | mutex_lock(&dirty_i->seglist_lock); | ||
267 | |||
266 | p.alloc_mode = alloc_mode; | 268 | p.alloc_mode = alloc_mode; |
267 | select_policy(sbi, gc_type, type, &p); | 269 | select_policy(sbi, gc_type, type, &p); |
268 | 270 | ||
269 | p.min_segno = NULL_SEGNO; | 271 | p.min_segno = NULL_SEGNO; |
270 | p.min_cost = max_cost = get_max_cost(sbi, &p); | 272 | p.min_cost = max_cost = get_max_cost(sbi, &p); |
271 | 273 | ||
272 | mutex_lock(&dirty_i->seglist_lock); | ||
273 | |||
274 | if (p.alloc_mode == LFS && gc_type == FG_GC) { | 274 | if (p.alloc_mode == LFS && gc_type == FG_GC) { |
275 | p.min_segno = check_bg_victims(sbi); | 275 | p.min_segno = check_bg_victims(sbi); |
276 | if (p.min_segno != NULL_SEGNO) | 276 | if (p.min_segno != NULL_SEGNO) |
@@ -281,9 +281,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, | |||
281 | unsigned long cost; | 281 | unsigned long cost; |
282 | unsigned int segno; | 282 | unsigned int segno; |
283 | 283 | ||
284 | segno = find_next_bit(p.dirty_segmap, | 284 | segno = find_next_bit(p.dirty_segmap, MAIN_SEGS(sbi), p.offset); |
285 | TOTAL_SEGS(sbi), p.offset); | 285 | if (segno >= MAIN_SEGS(sbi)) { |
286 | if (segno >= TOTAL_SEGS(sbi)) { | ||
287 | if (sbi->last_victim[p.gc_mode]) { | 286 | if (sbi->last_victim[p.gc_mode]) { |
288 | sbi->last_victim[p.gc_mode] = 0; | 287 | sbi->last_victim[p.gc_mode] = 0; |
289 | p.offset = 0; | 288 | p.offset = 0; |
@@ -423,6 +422,12 @@ next_step: | |||
423 | if (IS_ERR(node_page)) | 422 | if (IS_ERR(node_page)) |
424 | continue; | 423 | continue; |
425 | 424 | ||
425 | /* block may become invalid during get_node_page */ | ||
426 | if (check_valid_map(sbi, segno, off) == 0) { | ||
427 | f2fs_put_page(node_page, 1); | ||
428 | continue; | ||
429 | } | ||
430 | |||
426 | /* set page dirty and write it */ | 431 | /* set page dirty and write it */ |
427 | if (gc_type == FG_GC) { | 432 | if (gc_type == FG_GC) { |
428 | f2fs_wait_on_page_writeback(node_page, NODE); | 433 | f2fs_wait_on_page_writeback(node_page, NODE); |
@@ -531,7 +536,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) | |||
531 | f2fs_wait_on_page_writeback(page, DATA); | 536 | f2fs_wait_on_page_writeback(page, DATA); |
532 | 537 | ||
533 | if (clear_page_dirty_for_io(page)) | 538 | if (clear_page_dirty_for_io(page)) |
534 | inode_dec_dirty_dents(inode); | 539 | inode_dec_dirty_pages(inode); |
535 | set_cold_data(page); | 540 | set_cold_data(page); |
536 | do_write_data_page(page, &fio); | 541 | do_write_data_page(page, &fio); |
537 | clear_cold_data(page); | 542 | clear_cold_data(page); |
@@ -593,7 +598,7 @@ next_step: | |||
593 | 598 | ||
594 | if (phase == 2) { | 599 | if (phase == 2) { |
595 | inode = f2fs_iget(sb, dni.ino); | 600 | inode = f2fs_iget(sb, dni.ino); |
596 | if (IS_ERR(inode)) | 601 | if (IS_ERR(inode) || is_bad_inode(inode)) |
597 | continue; | 602 | continue; |
598 | 603 | ||
599 | start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); | 604 | start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); |
@@ -688,17 +693,20 @@ int f2fs_gc(struct f2fs_sb_info *sbi) | |||
688 | int gc_type = BG_GC; | 693 | int gc_type = BG_GC; |
689 | int nfree = 0; | 694 | int nfree = 0; |
690 | int ret = -1; | 695 | int ret = -1; |
696 | struct cp_control cpc = { | ||
697 | .reason = CP_SYNC, | ||
698 | }; | ||
691 | 699 | ||
692 | INIT_LIST_HEAD(&ilist); | 700 | INIT_LIST_HEAD(&ilist); |
693 | gc_more: | 701 | gc_more: |
694 | if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) | 702 | if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) |
695 | goto stop; | 703 | goto stop; |
696 | if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) | 704 | if (unlikely(f2fs_cp_error(sbi))) |
697 | goto stop; | 705 | goto stop; |
698 | 706 | ||
699 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { | 707 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { |
700 | gc_type = FG_GC; | 708 | gc_type = FG_GC; |
701 | write_checkpoint(sbi, false); | 709 | write_checkpoint(sbi, &cpc); |
702 | } | 710 | } |
703 | 711 | ||
704 | if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) | 712 | if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) |
@@ -723,7 +731,7 @@ gc_more: | |||
723 | goto gc_more; | 731 | goto gc_more; |
724 | 732 | ||
725 | if (gc_type == FG_GC) | 733 | if (gc_type == FG_GC) |
726 | write_checkpoint(sbi, false); | 734 | write_checkpoint(sbi, &cpc); |
727 | stop: | 735 | stop: |
728 | mutex_unlock(&sbi->gc_mutex); | 736 | mutex_unlock(&sbi->gc_mutex); |
729 | 737 | ||
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 5d5eb6047bf4..16f0b2b22999 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h | |||
@@ -91,7 +91,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) | |||
91 | block_t invalid_user_blocks = sbi->user_block_count - | 91 | block_t invalid_user_blocks = sbi->user_block_count - |
92 | written_block_count(sbi); | 92 | written_block_count(sbi); |
93 | /* | 93 | /* |
94 | * Background GC is triggered with the following condition. | 94 | * Background GC is triggered with the following conditions. |
95 | * 1. There are a number of invalid blocks. | 95 | * 1. There are a number of invalid blocks. |
96 | * 2. There is not enough free space. | 96 | * 2. There is not enough free space. |
97 | */ | 97 | */ |
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 948d17bf7281..a844fcfb9a8d 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c | |||
@@ -42,7 +42,8 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[]) | |||
42 | buf[1] += b1; | 42 | buf[1] += b1; |
43 | } | 43 | } |
44 | 44 | ||
45 | static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) | 45 | static void str2hashbuf(const unsigned char *msg, size_t len, |
46 | unsigned int *buf, int num) | ||
46 | { | 47 | { |
47 | unsigned pad, val; | 48 | unsigned pad, val; |
48 | int i; | 49 | int i; |
@@ -73,9 +74,9 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) | |||
73 | { | 74 | { |
74 | __u32 hash; | 75 | __u32 hash; |
75 | f2fs_hash_t f2fs_hash; | 76 | f2fs_hash_t f2fs_hash; |
76 | const char *p; | 77 | const unsigned char *p; |
77 | __u32 in[8], buf[4]; | 78 | __u32 in[8], buf[4]; |
78 | const char *name = name_info->name; | 79 | const unsigned char *name = name_info->name; |
79 | size_t len = name_info->len; | 80 | size_t len = name_info->len; |
80 | 81 | ||
81 | if ((len <= 2) && (name[0] == '.') && | 82 | if ((len <= 2) && (name[0] == '.') && |
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 5beeccef9ae1..88036fd75797 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c | |||
@@ -15,11 +15,13 @@ | |||
15 | 15 | ||
16 | bool f2fs_may_inline(struct inode *inode) | 16 | bool f2fs_may_inline(struct inode *inode) |
17 | { | 17 | { |
18 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
19 | block_t nr_blocks; | 18 | block_t nr_blocks; |
20 | loff_t i_size; | 19 | loff_t i_size; |
21 | 20 | ||
22 | if (!test_opt(sbi, INLINE_DATA)) | 21 | if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) |
22 | return false; | ||
23 | |||
24 | if (f2fs_is_atomic_file(inode)) | ||
23 | return false; | 25 | return false; |
24 | 26 | ||
25 | nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; | 27 | nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; |
@@ -35,7 +37,6 @@ bool f2fs_may_inline(struct inode *inode) | |||
35 | 37 | ||
36 | int f2fs_read_inline_data(struct inode *inode, struct page *page) | 38 | int f2fs_read_inline_data(struct inode *inode, struct page *page) |
37 | { | 39 | { |
38 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
39 | struct page *ipage; | 40 | struct page *ipage; |
40 | void *src_addr, *dst_addr; | 41 | void *src_addr, *dst_addr; |
41 | 42 | ||
@@ -44,7 +45,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) | |||
44 | goto out; | 45 | goto out; |
45 | } | 46 | } |
46 | 47 | ||
47 | ipage = get_node_page(sbi, inode->i_ino); | 48 | ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); |
48 | if (IS_ERR(ipage)) { | 49 | if (IS_ERR(ipage)) { |
49 | unlock_page(page); | 50 | unlock_page(page); |
50 | return PTR_ERR(ipage); | 51 | return PTR_ERR(ipage); |
@@ -68,12 +69,12 @@ out: | |||
68 | 69 | ||
69 | static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) | 70 | static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) |
70 | { | 71 | { |
71 | int err; | 72 | int err = 0; |
72 | struct page *ipage; | 73 | struct page *ipage; |
73 | struct dnode_of_data dn; | 74 | struct dnode_of_data dn; |
74 | void *src_addr, *dst_addr; | 75 | void *src_addr, *dst_addr; |
75 | block_t new_blk_addr; | 76 | block_t new_blk_addr; |
76 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 77 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
77 | struct f2fs_io_info fio = { | 78 | struct f2fs_io_info fio = { |
78 | .type = DATA, | 79 | .type = DATA, |
79 | .rw = WRITE_SYNC | REQ_PRIO, | 80 | .rw = WRITE_SYNC | REQ_PRIO, |
@@ -86,6 +87,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) | |||
86 | goto out; | 87 | goto out; |
87 | } | 88 | } |
88 | 89 | ||
90 | /* someone else converted inline_data already */ | ||
91 | if (!f2fs_has_inline_data(inode)) | ||
92 | goto out; | ||
93 | |||
89 | /* | 94 | /* |
90 | * i_addr[0] is not used for inline data, | 95 | * i_addr[0] is not used for inline data, |
91 | * so reserving new block will not destroy inline data | 96 | * so reserving new block will not destroy inline data |
@@ -124,9 +129,10 @@ out: | |||
124 | return err; | 129 | return err; |
125 | } | 130 | } |
126 | 131 | ||
127 | int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) | 132 | int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size, |
133 | struct page *page) | ||
128 | { | 134 | { |
129 | struct page *page; | 135 | struct page *new_page = page; |
130 | int err; | 136 | int err; |
131 | 137 | ||
132 | if (!f2fs_has_inline_data(inode)) | 138 | if (!f2fs_has_inline_data(inode)) |
@@ -134,17 +140,20 @@ int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) | |||
134 | else if (to_size <= MAX_INLINE_DATA) | 140 | else if (to_size <= MAX_INLINE_DATA) |
135 | return 0; | 141 | return 0; |
136 | 142 | ||
137 | page = grab_cache_page(inode->i_mapping, 0); | 143 | if (!page || page->index != 0) { |
138 | if (!page) | 144 | new_page = grab_cache_page(inode->i_mapping, 0); |
139 | return -ENOMEM; | 145 | if (!new_page) |
146 | return -ENOMEM; | ||
147 | } | ||
140 | 148 | ||
141 | err = __f2fs_convert_inline_data(inode, page); | 149 | err = __f2fs_convert_inline_data(inode, new_page); |
142 | f2fs_put_page(page, 1); | 150 | if (!page || page->index != 0) |
151 | f2fs_put_page(new_page, 1); | ||
143 | return err; | 152 | return err; |
144 | } | 153 | } |
145 | 154 | ||
146 | int f2fs_write_inline_data(struct inode *inode, | 155 | int f2fs_write_inline_data(struct inode *inode, |
147 | struct page *page, unsigned size) | 156 | struct page *page, unsigned size) |
148 | { | 157 | { |
149 | void *src_addr, *dst_addr; | 158 | void *src_addr, *dst_addr; |
150 | struct page *ipage; | 159 | struct page *ipage; |
@@ -181,13 +190,12 @@ int f2fs_write_inline_data(struct inode *inode, | |||
181 | 190 | ||
182 | void truncate_inline_data(struct inode *inode, u64 from) | 191 | void truncate_inline_data(struct inode *inode, u64 from) |
183 | { | 192 | { |
184 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
185 | struct page *ipage; | 193 | struct page *ipage; |
186 | 194 | ||
187 | if (from >= MAX_INLINE_DATA) | 195 | if (from >= MAX_INLINE_DATA) |
188 | return; | 196 | return; |
189 | 197 | ||
190 | ipage = get_node_page(sbi, inode->i_ino); | 198 | ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); |
191 | if (IS_ERR(ipage)) | 199 | if (IS_ERR(ipage)) |
192 | return; | 200 | return; |
193 | 201 | ||
@@ -199,9 +207,9 @@ void truncate_inline_data(struct inode *inode, u64 from) | |||
199 | f2fs_put_page(ipage, 1); | 207 | f2fs_put_page(ipage, 1); |
200 | } | 208 | } |
201 | 209 | ||
202 | int recover_inline_data(struct inode *inode, struct page *npage) | 210 | bool recover_inline_data(struct inode *inode, struct page *npage) |
203 | { | 211 | { |
204 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 212 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
205 | struct f2fs_inode *ri = NULL; | 213 | struct f2fs_inode *ri = NULL; |
206 | void *src_addr, *dst_addr; | 214 | void *src_addr, *dst_addr; |
207 | struct page *ipage; | 215 | struct page *ipage; |
@@ -218,10 +226,10 @@ int recover_inline_data(struct inode *inode, struct page *npage) | |||
218 | ri = F2FS_INODE(npage); | 226 | ri = F2FS_INODE(npage); |
219 | 227 | ||
220 | if (f2fs_has_inline_data(inode) && | 228 | if (f2fs_has_inline_data(inode) && |
221 | ri && ri->i_inline & F2FS_INLINE_DATA) { | 229 | ri && (ri->i_inline & F2FS_INLINE_DATA)) { |
222 | process_inline: | 230 | process_inline: |
223 | ipage = get_node_page(sbi, inode->i_ino); | 231 | ipage = get_node_page(sbi, inode->i_ino); |
224 | f2fs_bug_on(IS_ERR(ipage)); | 232 | f2fs_bug_on(sbi, IS_ERR(ipage)); |
225 | 233 | ||
226 | f2fs_wait_on_page_writeback(ipage, NODE); | 234 | f2fs_wait_on_page_writeback(ipage, NODE); |
227 | 235 | ||
@@ -230,22 +238,22 @@ process_inline: | |||
230 | memcpy(dst_addr, src_addr, MAX_INLINE_DATA); | 238 | memcpy(dst_addr, src_addr, MAX_INLINE_DATA); |
231 | update_inode(inode, ipage); | 239 | update_inode(inode, ipage); |
232 | f2fs_put_page(ipage, 1); | 240 | f2fs_put_page(ipage, 1); |
233 | return -1; | 241 | return true; |
234 | } | 242 | } |
235 | 243 | ||
236 | if (f2fs_has_inline_data(inode)) { | 244 | if (f2fs_has_inline_data(inode)) { |
237 | ipage = get_node_page(sbi, inode->i_ino); | 245 | ipage = get_node_page(sbi, inode->i_ino); |
238 | f2fs_bug_on(IS_ERR(ipage)); | 246 | f2fs_bug_on(sbi, IS_ERR(ipage)); |
239 | f2fs_wait_on_page_writeback(ipage, NODE); | 247 | f2fs_wait_on_page_writeback(ipage, NODE); |
240 | zero_user_segment(ipage, INLINE_DATA_OFFSET, | 248 | zero_user_segment(ipage, INLINE_DATA_OFFSET, |
241 | INLINE_DATA_OFFSET + MAX_INLINE_DATA); | 249 | INLINE_DATA_OFFSET + MAX_INLINE_DATA); |
242 | clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); | 250 | clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); |
243 | update_inode(inode, ipage); | 251 | update_inode(inode, ipage); |
244 | f2fs_put_page(ipage, 1); | 252 | f2fs_put_page(ipage, 1); |
245 | } else if (ri && ri->i_inline & F2FS_INLINE_DATA) { | 253 | } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { |
246 | truncate_blocks(inode, 0); | 254 | truncate_blocks(inode, 0, false); |
247 | set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); | 255 | set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); |
248 | goto process_inline; | 256 | goto process_inline; |
249 | } | 257 | } |
250 | return 0; | 258 | return false; |
251 | } | 259 | } |
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2c39999f3868..0deead4505e7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c | |||
@@ -69,7 +69,7 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) | |||
69 | 69 | ||
70 | static int do_read_inode(struct inode *inode) | 70 | static int do_read_inode(struct inode *inode) |
71 | { | 71 | { |
72 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 72 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
73 | struct f2fs_inode_info *fi = F2FS_I(inode); | 73 | struct f2fs_inode_info *fi = F2FS_I(inode); |
74 | struct page *node_page; | 74 | struct page *node_page; |
75 | struct f2fs_inode *ri; | 75 | struct f2fs_inode *ri; |
@@ -218,7 +218,7 @@ void update_inode(struct inode *inode, struct page *node_page) | |||
218 | 218 | ||
219 | void update_inode_page(struct inode *inode) | 219 | void update_inode_page(struct inode *inode) |
220 | { | 220 | { |
221 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 221 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
222 | struct page *node_page; | 222 | struct page *node_page; |
223 | retry: | 223 | retry: |
224 | node_page = get_node_page(sbi, inode->i_ino); | 224 | node_page = get_node_page(sbi, inode->i_ino); |
@@ -238,7 +238,7 @@ retry: | |||
238 | 238 | ||
239 | int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) | 239 | int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) |
240 | { | 240 | { |
241 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 241 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
242 | 242 | ||
243 | if (inode->i_ino == F2FS_NODE_INO(sbi) || | 243 | if (inode->i_ino == F2FS_NODE_INO(sbi) || |
244 | inode->i_ino == F2FS_META_INO(sbi)) | 244 | inode->i_ino == F2FS_META_INO(sbi)) |
@@ -266,9 +266,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
266 | */ | 266 | */ |
267 | void f2fs_evict_inode(struct inode *inode) | 267 | void f2fs_evict_inode(struct inode *inode) |
268 | { | 268 | { |
269 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 269 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
270 | nid_t xnid = F2FS_I(inode)->i_xattr_nid; | 270 | nid_t xnid = F2FS_I(inode)->i_xattr_nid; |
271 | 271 | ||
272 | /* some remained atomic pages should discarded */ | ||
273 | if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) | ||
274 | commit_inmem_pages(inode, true); | ||
275 | |||
272 | trace_f2fs_evict_inode(inode); | 276 | trace_f2fs_evict_inode(inode); |
273 | truncate_inode_pages_final(&inode->i_data); | 277 | truncate_inode_pages_final(&inode->i_data); |
274 | 278 | ||
@@ -276,7 +280,7 @@ void f2fs_evict_inode(struct inode *inode) | |||
276 | inode->i_ino == F2FS_META_INO(sbi)) | 280 | inode->i_ino == F2FS_META_INO(sbi)) |
277 | goto out_clear; | 281 | goto out_clear; |
278 | 282 | ||
279 | f2fs_bug_on(get_dirty_dents(inode)); | 283 | f2fs_bug_on(sbi, get_dirty_pages(inode)); |
280 | remove_dirty_dir_inode(inode); | 284 | remove_dirty_dir_inode(inode); |
281 | 285 | ||
282 | if (inode->i_nlink || is_bad_inode(inode)) | 286 | if (inode->i_nlink || is_bad_inode(inode)) |
@@ -306,3 +310,26 @@ no_delete: | |||
306 | out_clear: | 310 | out_clear: |
307 | clear_inode(inode); | 311 | clear_inode(inode); |
308 | } | 312 | } |
313 | |||
314 | /* caller should call f2fs_lock_op() */ | ||
315 | void handle_failed_inode(struct inode *inode) | ||
316 | { | ||
317 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
318 | |||
319 | clear_nlink(inode); | ||
320 | make_bad_inode(inode); | ||
321 | unlock_new_inode(inode); | ||
322 | |||
323 | i_size_write(inode, 0); | ||
324 | if (F2FS_HAS_BLOCKS(inode)) | ||
325 | f2fs_truncate(inode); | ||
326 | |||
327 | remove_inode_page(inode); | ||
328 | stat_dec_inline_inode(inode); | ||
329 | |||
330 | alloc_nid_failed(sbi, inode->i_ino); | ||
331 | f2fs_unlock_op(sbi); | ||
332 | |||
333 | /* iput will drop the inode object */ | ||
334 | iput(inode); | ||
335 | } | ||
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 27b03776ffd2..0d2526e5aa11 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c | |||
@@ -23,7 +23,7 @@ | |||
23 | 23 | ||
24 | static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) | 24 | static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) |
25 | { | 25 | { |
26 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 26 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
27 | nid_t ino; | 27 | nid_t ino; |
28 | struct inode *inode; | 28 | struct inode *inode; |
29 | bool nid_free = false; | 29 | bool nid_free = false; |
@@ -102,7 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, | |||
102 | static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, | 102 | static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, |
103 | bool excl) | 103 | bool excl) |
104 | { | 104 | { |
105 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 105 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
106 | struct inode *inode; | 106 | struct inode *inode; |
107 | nid_t ino = 0; | 107 | nid_t ino = 0; |
108 | int err; | 108 | int err; |
@@ -123,9 +123,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
123 | 123 | ||
124 | f2fs_lock_op(sbi); | 124 | f2fs_lock_op(sbi); |
125 | err = f2fs_add_link(dentry, inode); | 125 | err = f2fs_add_link(dentry, inode); |
126 | f2fs_unlock_op(sbi); | ||
127 | if (err) | 126 | if (err) |
128 | goto out; | 127 | goto out; |
128 | f2fs_unlock_op(sbi); | ||
129 | 129 | ||
130 | alloc_nid_done(sbi, ino); | 130 | alloc_nid_done(sbi, ino); |
131 | 131 | ||
@@ -133,11 +133,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
133 | unlock_new_inode(inode); | 133 | unlock_new_inode(inode); |
134 | return 0; | 134 | return 0; |
135 | out: | 135 | out: |
136 | clear_nlink(inode); | 136 | handle_failed_inode(inode); |
137 | unlock_new_inode(inode); | ||
138 | make_bad_inode(inode); | ||
139 | iput(inode); | ||
140 | alloc_nid_failed(sbi, ino); | ||
141 | return err; | 137 | return err; |
142 | } | 138 | } |
143 | 139 | ||
@@ -145,7 +141,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, | |||
145 | struct dentry *dentry) | 141 | struct dentry *dentry) |
146 | { | 142 | { |
147 | struct inode *inode = old_dentry->d_inode; | 143 | struct inode *inode = old_dentry->d_inode; |
148 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 144 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
149 | int err; | 145 | int err; |
150 | 146 | ||
151 | f2fs_balance_fs(sbi); | 147 | f2fs_balance_fs(sbi); |
@@ -156,15 +152,16 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, | |||
156 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); | 152 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); |
157 | f2fs_lock_op(sbi); | 153 | f2fs_lock_op(sbi); |
158 | err = f2fs_add_link(dentry, inode); | 154 | err = f2fs_add_link(dentry, inode); |
159 | f2fs_unlock_op(sbi); | ||
160 | if (err) | 155 | if (err) |
161 | goto out; | 156 | goto out; |
157 | f2fs_unlock_op(sbi); | ||
162 | 158 | ||
163 | d_instantiate(dentry, inode); | 159 | d_instantiate(dentry, inode); |
164 | return 0; | 160 | return 0; |
165 | out: | 161 | out: |
166 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | 162 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); |
167 | iput(inode); | 163 | iput(inode); |
164 | f2fs_unlock_op(sbi); | ||
168 | return err; | 165 | return err; |
169 | } | 166 | } |
170 | 167 | ||
@@ -205,7 +202,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, | |||
205 | 202 | ||
206 | static int f2fs_unlink(struct inode *dir, struct dentry *dentry) | 203 | static int f2fs_unlink(struct inode *dir, struct dentry *dentry) |
207 | { | 204 | { |
208 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 205 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
209 | struct inode *inode = dentry->d_inode; | 206 | struct inode *inode = dentry->d_inode; |
210 | struct f2fs_dir_entry *de; | 207 | struct f2fs_dir_entry *de; |
211 | struct page *page; | 208 | struct page *page; |
@@ -229,7 +226,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) | |||
229 | f2fs_delete_entry(de, page, inode); | 226 | f2fs_delete_entry(de, page, inode); |
230 | f2fs_unlock_op(sbi); | 227 | f2fs_unlock_op(sbi); |
231 | 228 | ||
232 | /* In order to evict this inode, we set it dirty */ | 229 | /* In order to evict this inode, we set it dirty */ |
233 | mark_inode_dirty(inode); | 230 | mark_inode_dirty(inode); |
234 | fail: | 231 | fail: |
235 | trace_f2fs_unlink_exit(inode, err); | 232 | trace_f2fs_unlink_exit(inode, err); |
@@ -239,7 +236,7 @@ fail: | |||
239 | static int f2fs_symlink(struct inode *dir, struct dentry *dentry, | 236 | static int f2fs_symlink(struct inode *dir, struct dentry *dentry, |
240 | const char *symname) | 237 | const char *symname) |
241 | { | 238 | { |
242 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 239 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
243 | struct inode *inode; | 240 | struct inode *inode; |
244 | size_t symlen = strlen(symname) + 1; | 241 | size_t symlen = strlen(symname) + 1; |
245 | int err; | 242 | int err; |
@@ -255,9 +252,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, | |||
255 | 252 | ||
256 | f2fs_lock_op(sbi); | 253 | f2fs_lock_op(sbi); |
257 | err = f2fs_add_link(dentry, inode); | 254 | err = f2fs_add_link(dentry, inode); |
258 | f2fs_unlock_op(sbi); | ||
259 | if (err) | 255 | if (err) |
260 | goto out; | 256 | goto out; |
257 | f2fs_unlock_op(sbi); | ||
261 | 258 | ||
262 | err = page_symlink(inode, symname, symlen); | 259 | err = page_symlink(inode, symname, symlen); |
263 | alloc_nid_done(sbi, inode->i_ino); | 260 | alloc_nid_done(sbi, inode->i_ino); |
@@ -266,17 +263,13 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, | |||
266 | unlock_new_inode(inode); | 263 | unlock_new_inode(inode); |
267 | return err; | 264 | return err; |
268 | out: | 265 | out: |
269 | clear_nlink(inode); | 266 | handle_failed_inode(inode); |
270 | unlock_new_inode(inode); | ||
271 | make_bad_inode(inode); | ||
272 | iput(inode); | ||
273 | alloc_nid_failed(sbi, inode->i_ino); | ||
274 | return err; | 267 | return err; |
275 | } | 268 | } |
276 | 269 | ||
277 | static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | 270 | static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
278 | { | 271 | { |
279 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 272 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
280 | struct inode *inode; | 273 | struct inode *inode; |
281 | int err; | 274 | int err; |
282 | 275 | ||
@@ -294,9 +287,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
294 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); | 287 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); |
295 | f2fs_lock_op(sbi); | 288 | f2fs_lock_op(sbi); |
296 | err = f2fs_add_link(dentry, inode); | 289 | err = f2fs_add_link(dentry, inode); |
297 | f2fs_unlock_op(sbi); | ||
298 | if (err) | 290 | if (err) |
299 | goto out_fail; | 291 | goto out_fail; |
292 | f2fs_unlock_op(sbi); | ||
300 | 293 | ||
301 | alloc_nid_done(sbi, inode->i_ino); | 294 | alloc_nid_done(sbi, inode->i_ino); |
302 | 295 | ||
@@ -307,11 +300,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
307 | 300 | ||
308 | out_fail: | 301 | out_fail: |
309 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | 302 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); |
310 | clear_nlink(inode); | 303 | handle_failed_inode(inode); |
311 | unlock_new_inode(inode); | ||
312 | make_bad_inode(inode); | ||
313 | iput(inode); | ||
314 | alloc_nid_failed(sbi, inode->i_ino); | ||
315 | return err; | 304 | return err; |
316 | } | 305 | } |
317 | 306 | ||
@@ -326,7 +315,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) | |||
326 | static int f2fs_mknod(struct inode *dir, struct dentry *dentry, | 315 | static int f2fs_mknod(struct inode *dir, struct dentry *dentry, |
327 | umode_t mode, dev_t rdev) | 316 | umode_t mode, dev_t rdev) |
328 | { | 317 | { |
329 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 318 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
330 | struct inode *inode; | 319 | struct inode *inode; |
331 | int err = 0; | 320 | int err = 0; |
332 | 321 | ||
@@ -344,27 +333,23 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, | |||
344 | 333 | ||
345 | f2fs_lock_op(sbi); | 334 | f2fs_lock_op(sbi); |
346 | err = f2fs_add_link(dentry, inode); | 335 | err = f2fs_add_link(dentry, inode); |
347 | f2fs_unlock_op(sbi); | ||
348 | if (err) | 336 | if (err) |
349 | goto out; | 337 | goto out; |
338 | f2fs_unlock_op(sbi); | ||
350 | 339 | ||
351 | alloc_nid_done(sbi, inode->i_ino); | 340 | alloc_nid_done(sbi, inode->i_ino); |
352 | d_instantiate(dentry, inode); | 341 | d_instantiate(dentry, inode); |
353 | unlock_new_inode(inode); | 342 | unlock_new_inode(inode); |
354 | return 0; | 343 | return 0; |
355 | out: | 344 | out: |
356 | clear_nlink(inode); | 345 | handle_failed_inode(inode); |
357 | unlock_new_inode(inode); | ||
358 | make_bad_inode(inode); | ||
359 | iput(inode); | ||
360 | alloc_nid_failed(sbi, inode->i_ino); | ||
361 | return err; | 346 | return err; |
362 | } | 347 | } |
363 | 348 | ||
364 | static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, | 349 | static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, |
365 | struct inode *new_dir, struct dentry *new_dentry) | 350 | struct inode *new_dir, struct dentry *new_dentry) |
366 | { | 351 | { |
367 | struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb); | 352 | struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); |
368 | struct inode *old_inode = old_dentry->d_inode; | 353 | struct inode *old_inode = old_dentry->d_inode; |
369 | struct inode *new_inode = new_dentry->d_inode; | 354 | struct inode *new_inode = new_dentry->d_inode; |
370 | struct page *old_dir_page; | 355 | struct page *old_dir_page; |
@@ -488,8 +473,7 @@ out: | |||
488 | static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, | 473 | static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, |
489 | struct inode *new_dir, struct dentry *new_dentry) | 474 | struct inode *new_dir, struct dentry *new_dentry) |
490 | { | 475 | { |
491 | struct super_block *sb = old_dir->i_sb; | 476 | struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); |
492 | struct f2fs_sb_info *sbi = F2FS_SB(sb); | ||
493 | struct inode *old_inode = old_dentry->d_inode; | 477 | struct inode *old_inode = old_dentry->d_inode; |
494 | struct inode *new_inode = new_dentry->d_inode; | 478 | struct inode *new_inode = new_dentry->d_inode; |
495 | struct page *old_dir_page, *new_dir_page; | 479 | struct page *old_dir_page, *new_dir_page; |
@@ -650,7 +634,7 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, | |||
650 | 634 | ||
651 | static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | 635 | static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) |
652 | { | 636 | { |
653 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | 637 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
654 | struct inode *inode; | 638 | struct inode *inode; |
655 | int err; | 639 | int err; |
656 | 640 | ||
@@ -686,12 +670,7 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
686 | release_out: | 670 | release_out: |
687 | release_orphan_inode(sbi); | 671 | release_orphan_inode(sbi); |
688 | out: | 672 | out: |
689 | f2fs_unlock_op(sbi); | 673 | handle_failed_inode(inode); |
690 | clear_nlink(inode); | ||
691 | unlock_new_inode(inode); | ||
692 | make_bad_inode(inode); | ||
693 | iput(inode); | ||
694 | alloc_nid_failed(sbi, inode->i_ino); | ||
695 | return err; | 674 | return err; |
696 | } | 675 | } |
697 | 676 | ||
@@ -704,7 +683,6 @@ const struct inode_operations f2fs_dir_inode_operations = { | |||
704 | .mkdir = f2fs_mkdir, | 683 | .mkdir = f2fs_mkdir, |
705 | .rmdir = f2fs_rmdir, | 684 | .rmdir = f2fs_rmdir, |
706 | .mknod = f2fs_mknod, | 685 | .mknod = f2fs_mknod, |
707 | .rename = f2fs_rename, | ||
708 | .rename2 = f2fs_rename2, | 686 | .rename2 = f2fs_rename2, |
709 | .tmpfile = f2fs_tmpfile, | 687 | .tmpfile = f2fs_tmpfile, |
710 | .getattr = f2fs_getattr, | 688 | .getattr = f2fs_getattr, |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d3d90d284631..44b8afef43d9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -54,7 +54,6 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) | |||
54 | static void clear_node_page_dirty(struct page *page) | 54 | static void clear_node_page_dirty(struct page *page) |
55 | { | 55 | { |
56 | struct address_space *mapping = page->mapping; | 56 | struct address_space *mapping = page->mapping; |
57 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | ||
58 | unsigned int long flags; | 57 | unsigned int long flags; |
59 | 58 | ||
60 | if (PageDirty(page)) { | 59 | if (PageDirty(page)) { |
@@ -65,7 +64,7 @@ static void clear_node_page_dirty(struct page *page) | |||
65 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 64 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
66 | 65 | ||
67 | clear_page_dirty_for_io(page); | 66 | clear_page_dirty_for_io(page); |
68 | dec_page_count(sbi, F2FS_DIRTY_NODES); | 67 | dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); |
69 | } | 68 | } |
70 | ClearPageUptodate(page); | 69 | ClearPageUptodate(page); |
71 | } | 70 | } |
@@ -92,7 +91,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) | |||
92 | /* get current nat block page with lock */ | 91 | /* get current nat block page with lock */ |
93 | src_page = get_meta_page(sbi, src_off); | 92 | src_page = get_meta_page(sbi, src_off); |
94 | dst_page = grab_meta_page(sbi, dst_off); | 93 | dst_page = grab_meta_page(sbi, dst_off); |
95 | f2fs_bug_on(PageDirty(src_page)); | 94 | f2fs_bug_on(sbi, PageDirty(src_page)); |
96 | 95 | ||
97 | src_addr = page_address(src_page); | 96 | src_addr = page_address(src_page); |
98 | dst_addr = page_address(dst_page); | 97 | dst_addr = page_address(dst_page); |
@@ -124,44 +123,99 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) | |||
124 | kmem_cache_free(nat_entry_slab, e); | 123 | kmem_cache_free(nat_entry_slab, e); |
125 | } | 124 | } |
126 | 125 | ||
127 | int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) | 126 | static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, |
127 | struct nat_entry *ne) | ||
128 | { | ||
129 | nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); | ||
130 | struct nat_entry_set *head; | ||
131 | |||
132 | if (get_nat_flag(ne, IS_DIRTY)) | ||
133 | return; | ||
134 | retry: | ||
135 | head = radix_tree_lookup(&nm_i->nat_set_root, set); | ||
136 | if (!head) { | ||
137 | head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); | ||
138 | |||
139 | INIT_LIST_HEAD(&head->entry_list); | ||
140 | INIT_LIST_HEAD(&head->set_list); | ||
141 | head->set = set; | ||
142 | head->entry_cnt = 0; | ||
143 | |||
144 | if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { | ||
145 | cond_resched(); | ||
146 | goto retry; | ||
147 | } | ||
148 | } | ||
149 | list_move_tail(&ne->list, &head->entry_list); | ||
150 | nm_i->dirty_nat_cnt++; | ||
151 | head->entry_cnt++; | ||
152 | set_nat_flag(ne, IS_DIRTY, true); | ||
153 | } | ||
154 | |||
155 | static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, | ||
156 | struct nat_entry *ne) | ||
157 | { | ||
158 | nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK; | ||
159 | struct nat_entry_set *head; | ||
160 | |||
161 | head = radix_tree_lookup(&nm_i->nat_set_root, set); | ||
162 | if (head) { | ||
163 | list_move_tail(&ne->list, &nm_i->nat_entries); | ||
164 | set_nat_flag(ne, IS_DIRTY, false); | ||
165 | head->entry_cnt--; | ||
166 | nm_i->dirty_nat_cnt--; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, | ||
171 | nid_t start, unsigned int nr, struct nat_entry_set **ep) | ||
172 | { | ||
173 | return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep, | ||
174 | start, nr); | ||
175 | } | ||
176 | |||
177 | bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) | ||
128 | { | 178 | { |
129 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 179 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
130 | struct nat_entry *e; | 180 | struct nat_entry *e; |
131 | int is_cp = 1; | 181 | bool is_cp = true; |
132 | 182 | ||
133 | read_lock(&nm_i->nat_tree_lock); | 183 | read_lock(&nm_i->nat_tree_lock); |
134 | e = __lookup_nat_cache(nm_i, nid); | 184 | e = __lookup_nat_cache(nm_i, nid); |
135 | if (e && !e->checkpointed) | 185 | if (e && !get_nat_flag(e, IS_CHECKPOINTED)) |
136 | is_cp = 0; | 186 | is_cp = false; |
137 | read_unlock(&nm_i->nat_tree_lock); | 187 | read_unlock(&nm_i->nat_tree_lock); |
138 | return is_cp; | 188 | return is_cp; |
139 | } | 189 | } |
140 | 190 | ||
141 | bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) | 191 | bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) |
142 | { | 192 | { |
143 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 193 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
144 | struct nat_entry *e; | 194 | struct nat_entry *e; |
145 | bool fsync_done = false; | 195 | bool fsynced = false; |
146 | 196 | ||
147 | read_lock(&nm_i->nat_tree_lock); | 197 | read_lock(&nm_i->nat_tree_lock); |
148 | e = __lookup_nat_cache(nm_i, nid); | 198 | e = __lookup_nat_cache(nm_i, ino); |
149 | if (e) | 199 | if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) |
150 | fsync_done = e->fsync_done; | 200 | fsynced = true; |
151 | read_unlock(&nm_i->nat_tree_lock); | 201 | read_unlock(&nm_i->nat_tree_lock); |
152 | return fsync_done; | 202 | return fsynced; |
153 | } | 203 | } |
154 | 204 | ||
155 | void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) | 205 | bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) |
156 | { | 206 | { |
157 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 207 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
158 | struct nat_entry *e; | 208 | struct nat_entry *e; |
209 | bool need_update = true; | ||
159 | 210 | ||
160 | write_lock(&nm_i->nat_tree_lock); | 211 | read_lock(&nm_i->nat_tree_lock); |
161 | e = __lookup_nat_cache(nm_i, nid); | 212 | e = __lookup_nat_cache(nm_i, ino); |
162 | if (e) | 213 | if (e && get_nat_flag(e, HAS_LAST_FSYNC) && |
163 | e->fsync_done = false; | 214 | (get_nat_flag(e, IS_CHECKPOINTED) || |
164 | write_unlock(&nm_i->nat_tree_lock); | 215 | get_nat_flag(e, HAS_FSYNCED_INODE))) |
216 | need_update = false; | ||
217 | read_unlock(&nm_i->nat_tree_lock); | ||
218 | return need_update; | ||
165 | } | 219 | } |
166 | 220 | ||
167 | static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) | 221 | static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) |
@@ -177,7 +231,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) | |||
177 | } | 231 | } |
178 | memset(new, 0, sizeof(struct nat_entry)); | 232 | memset(new, 0, sizeof(struct nat_entry)); |
179 | nat_set_nid(new, nid); | 233 | nat_set_nid(new, nid); |
180 | new->checkpointed = true; | 234 | nat_reset_flag(new); |
181 | list_add_tail(&new->list, &nm_i->nat_entries); | 235 | list_add_tail(&new->list, &nm_i->nat_entries); |
182 | nm_i->nat_cnt++; | 236 | nm_i->nat_cnt++; |
183 | return new; | 237 | return new; |
@@ -216,7 +270,7 @@ retry: | |||
216 | goto retry; | 270 | goto retry; |
217 | } | 271 | } |
218 | e->ni = *ni; | 272 | e->ni = *ni; |
219 | f2fs_bug_on(ni->blk_addr == NEW_ADDR); | 273 | f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); |
220 | } else if (new_blkaddr == NEW_ADDR) { | 274 | } else if (new_blkaddr == NEW_ADDR) { |
221 | /* | 275 | /* |
222 | * when nid is reallocated, | 276 | * when nid is reallocated, |
@@ -224,20 +278,20 @@ retry: | |||
224 | * So, reinitialize it with new information. | 278 | * So, reinitialize it with new information. |
225 | */ | 279 | */ |
226 | e->ni = *ni; | 280 | e->ni = *ni; |
227 | f2fs_bug_on(ni->blk_addr != NULL_ADDR); | 281 | f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); |
228 | } | 282 | } |
229 | 283 | ||
230 | /* sanity check */ | 284 | /* sanity check */ |
231 | f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); | 285 | f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); |
232 | f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && | 286 | f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR && |
233 | new_blkaddr == NULL_ADDR); | 287 | new_blkaddr == NULL_ADDR); |
234 | f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR && | 288 | f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR && |
235 | new_blkaddr == NEW_ADDR); | 289 | new_blkaddr == NEW_ADDR); |
236 | f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR && | 290 | f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR && |
237 | nat_get_blkaddr(e) != NULL_ADDR && | 291 | nat_get_blkaddr(e) != NULL_ADDR && |
238 | new_blkaddr == NEW_ADDR); | 292 | new_blkaddr == NEW_ADDR); |
239 | 293 | ||
240 | /* increament version no as node is removed */ | 294 | /* increment version no as node is removed */ |
241 | if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { | 295 | if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { |
242 | unsigned char version = nat_get_version(e); | 296 | unsigned char version = nat_get_version(e); |
243 | nat_set_version(e, inc_node_version(version)); | 297 | nat_set_version(e, inc_node_version(version)); |
@@ -245,12 +299,17 @@ retry: | |||
245 | 299 | ||
246 | /* change address */ | 300 | /* change address */ |
247 | nat_set_blkaddr(e, new_blkaddr); | 301 | nat_set_blkaddr(e, new_blkaddr); |
302 | if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR) | ||
303 | set_nat_flag(e, IS_CHECKPOINTED, false); | ||
248 | __set_nat_cache_dirty(nm_i, e); | 304 | __set_nat_cache_dirty(nm_i, e); |
249 | 305 | ||
250 | /* update fsync_mark if its inode nat entry is still alive */ | 306 | /* update fsync_mark if its inode nat entry is still alive */ |
251 | e = __lookup_nat_cache(nm_i, ni->ino); | 307 | e = __lookup_nat_cache(nm_i, ni->ino); |
252 | if (e) | 308 | if (e) { |
253 | e->fsync_done = fsync_done; | 309 | if (fsync_done && ni->nid == ni->ino) |
310 | set_nat_flag(e, HAS_FSYNCED_INODE, true); | ||
311 | set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); | ||
312 | } | ||
254 | write_unlock(&nm_i->nat_tree_lock); | 313 | write_unlock(&nm_i->nat_tree_lock); |
255 | } | 314 | } |
256 | 315 | ||
@@ -274,7 +333,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) | |||
274 | } | 333 | } |
275 | 334 | ||
276 | /* | 335 | /* |
277 | * This function returns always success | 336 | * This function always returns success |
278 | */ | 337 | */ |
279 | void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) | 338 | void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) |
280 | { | 339 | { |
@@ -411,7 +470,7 @@ got: | |||
411 | */ | 470 | */ |
412 | int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) | 471 | int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) |
413 | { | 472 | { |
414 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 473 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
415 | struct page *npage[4]; | 474 | struct page *npage[4]; |
416 | struct page *parent; | 475 | struct page *parent; |
417 | int offset[4]; | 476 | int offset[4]; |
@@ -504,15 +563,15 @@ release_out: | |||
504 | 563 | ||
505 | static void truncate_node(struct dnode_of_data *dn) | 564 | static void truncate_node(struct dnode_of_data *dn) |
506 | { | 565 | { |
507 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 566 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
508 | struct node_info ni; | 567 | struct node_info ni; |
509 | 568 | ||
510 | get_node_info(sbi, dn->nid, &ni); | 569 | get_node_info(sbi, dn->nid, &ni); |
511 | if (dn->inode->i_blocks == 0) { | 570 | if (dn->inode->i_blocks == 0) { |
512 | f2fs_bug_on(ni.blk_addr != NULL_ADDR); | 571 | f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR); |
513 | goto invalidate; | 572 | goto invalidate; |
514 | } | 573 | } |
515 | f2fs_bug_on(ni.blk_addr == NULL_ADDR); | 574 | f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); |
516 | 575 | ||
517 | /* Deallocate node address */ | 576 | /* Deallocate node address */ |
518 | invalidate_blocks(sbi, ni.blk_addr); | 577 | invalidate_blocks(sbi, ni.blk_addr); |
@@ -540,14 +599,13 @@ invalidate: | |||
540 | 599 | ||
541 | static int truncate_dnode(struct dnode_of_data *dn) | 600 | static int truncate_dnode(struct dnode_of_data *dn) |
542 | { | 601 | { |
543 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | ||
544 | struct page *page; | 602 | struct page *page; |
545 | 603 | ||
546 | if (dn->nid == 0) | 604 | if (dn->nid == 0) |
547 | return 1; | 605 | return 1; |
548 | 606 | ||
549 | /* get direct node */ | 607 | /* get direct node */ |
550 | page = get_node_page(sbi, dn->nid); | 608 | page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); |
551 | if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) | 609 | if (IS_ERR(page) && PTR_ERR(page) == -ENOENT) |
552 | return 1; | 610 | return 1; |
553 | else if (IS_ERR(page)) | 611 | else if (IS_ERR(page)) |
@@ -564,7 +622,6 @@ static int truncate_dnode(struct dnode_of_data *dn) | |||
564 | static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, | 622 | static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, |
565 | int ofs, int depth) | 623 | int ofs, int depth) |
566 | { | 624 | { |
567 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | ||
568 | struct dnode_of_data rdn = *dn; | 625 | struct dnode_of_data rdn = *dn; |
569 | struct page *page; | 626 | struct page *page; |
570 | struct f2fs_node *rn; | 627 | struct f2fs_node *rn; |
@@ -578,7 +635,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, | |||
578 | 635 | ||
579 | trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); | 636 | trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); |
580 | 637 | ||
581 | page = get_node_page(sbi, dn->nid); | 638 | page = get_node_page(F2FS_I_SB(dn->inode), dn->nid); |
582 | if (IS_ERR(page)) { | 639 | if (IS_ERR(page)) { |
583 | trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); | 640 | trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); |
584 | return PTR_ERR(page); | 641 | return PTR_ERR(page); |
@@ -636,7 +693,6 @@ out_err: | |||
636 | static int truncate_partial_nodes(struct dnode_of_data *dn, | 693 | static int truncate_partial_nodes(struct dnode_of_data *dn, |
637 | struct f2fs_inode *ri, int *offset, int depth) | 694 | struct f2fs_inode *ri, int *offset, int depth) |
638 | { | 695 | { |
639 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | ||
640 | struct page *pages[2]; | 696 | struct page *pages[2]; |
641 | nid_t nid[3]; | 697 | nid_t nid[3]; |
642 | nid_t child_nid; | 698 | nid_t child_nid; |
@@ -650,8 +706,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, | |||
650 | 706 | ||
651 | /* get indirect nodes in the path */ | 707 | /* get indirect nodes in the path */ |
652 | for (i = 0; i < idx + 1; i++) { | 708 | for (i = 0; i < idx + 1; i++) { |
653 | /* refernece count'll be increased */ | 709 | /* reference count'll be increased */ |
654 | pages[i] = get_node_page(sbi, nid[i]); | 710 | pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]); |
655 | if (IS_ERR(pages[i])) { | 711 | if (IS_ERR(pages[i])) { |
656 | err = PTR_ERR(pages[i]); | 712 | err = PTR_ERR(pages[i]); |
657 | idx = i - 1; | 713 | idx = i - 1; |
@@ -696,7 +752,7 @@ fail: | |||
696 | */ | 752 | */ |
697 | int truncate_inode_blocks(struct inode *inode, pgoff_t from) | 753 | int truncate_inode_blocks(struct inode *inode, pgoff_t from) |
698 | { | 754 | { |
699 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 755 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
700 | int err = 0, cont = 1; | 756 | int err = 0, cont = 1; |
701 | int level, offset[4], noffset[4]; | 757 | int level, offset[4], noffset[4]; |
702 | unsigned int nofs = 0; | 758 | unsigned int nofs = 0; |
@@ -792,7 +848,7 @@ fail: | |||
792 | 848 | ||
793 | int truncate_xattr_node(struct inode *inode, struct page *page) | 849 | int truncate_xattr_node(struct inode *inode, struct page *page) |
794 | { | 850 | { |
795 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 851 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
796 | nid_t nid = F2FS_I(inode)->i_xattr_nid; | 852 | nid_t nid = F2FS_I(inode)->i_xattr_nid; |
797 | struct dnode_of_data dn; | 853 | struct dnode_of_data dn; |
798 | struct page *npage; | 854 | struct page *npage; |
@@ -823,22 +879,27 @@ int truncate_xattr_node(struct inode *inode, struct page *page) | |||
823 | */ | 879 | */ |
824 | void remove_inode_page(struct inode *inode) | 880 | void remove_inode_page(struct inode *inode) |
825 | { | 881 | { |
826 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
827 | struct page *page; | ||
828 | nid_t ino = inode->i_ino; | ||
829 | struct dnode_of_data dn; | 882 | struct dnode_of_data dn; |
830 | 883 | ||
831 | page = get_node_page(sbi, ino); | 884 | set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); |
832 | if (IS_ERR(page)) | 885 | if (get_dnode_of_data(&dn, 0, LOOKUP_NODE)) |
833 | return; | 886 | return; |
834 | 887 | ||
835 | if (truncate_xattr_node(inode, page)) { | 888 | if (truncate_xattr_node(inode, dn.inode_page)) { |
836 | f2fs_put_page(page, 1); | 889 | f2fs_put_dnode(&dn); |
837 | return; | 890 | return; |
838 | } | 891 | } |
839 | /* 0 is possible, after f2fs_new_inode() is failed */ | 892 | |
840 | f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); | 893 | /* remove potential inline_data blocks */ |
841 | set_new_dnode(&dn, inode, page, page, ino); | 894 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
895 | S_ISLNK(inode->i_mode)) | ||
896 | truncate_data_blocks_range(&dn, 1); | ||
897 | |||
898 | /* 0 is possible, after f2fs_new_inode() has failed */ | ||
899 | f2fs_bug_on(F2FS_I_SB(inode), | ||
900 | inode->i_blocks != 0 && inode->i_blocks != 1); | ||
901 | |||
902 | /* will put inode & node pages */ | ||
842 | truncate_node(&dn); | 903 | truncate_node(&dn); |
843 | } | 904 | } |
844 | 905 | ||
@@ -856,7 +917,7 @@ struct page *new_inode_page(struct inode *inode) | |||
856 | struct page *new_node_page(struct dnode_of_data *dn, | 917 | struct page *new_node_page(struct dnode_of_data *dn, |
857 | unsigned int ofs, struct page *ipage) | 918 | unsigned int ofs, struct page *ipage) |
858 | { | 919 | { |
859 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 920 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
860 | struct node_info old_ni, new_ni; | 921 | struct node_info old_ni, new_ni; |
861 | struct page *page; | 922 | struct page *page; |
862 | int err; | 923 | int err; |
@@ -876,7 +937,7 @@ struct page *new_node_page(struct dnode_of_data *dn, | |||
876 | get_node_info(sbi, dn->nid, &old_ni); | 937 | get_node_info(sbi, dn->nid, &old_ni); |
877 | 938 | ||
878 | /* Reinitialize old_ni with new node page */ | 939 | /* Reinitialize old_ni with new node page */ |
879 | f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); | 940 | f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR); |
880 | new_ni = old_ni; | 941 | new_ni = old_ni; |
881 | new_ni.ino = dn->inode->i_ino; | 942 | new_ni.ino = dn->inode->i_ino; |
882 | set_node_addr(sbi, &new_ni, NEW_ADDR, false); | 943 | set_node_addr(sbi, &new_ni, NEW_ADDR, false); |
@@ -914,7 +975,7 @@ fail: | |||
914 | */ | 975 | */ |
915 | static int read_node_page(struct page *page, int rw) | 976 | static int read_node_page(struct page *page, int rw) |
916 | { | 977 | { |
917 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | 978 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); |
918 | struct node_info ni; | 979 | struct node_info ni; |
919 | 980 | ||
920 | get_node_info(sbi, page->index, &ni); | 981 | get_node_info(sbi, page->index, &ni); |
@@ -990,7 +1051,7 @@ got_it: | |||
990 | */ | 1051 | */ |
991 | struct page *get_node_page_ra(struct page *parent, int start) | 1052 | struct page *get_node_page_ra(struct page *parent, int start) |
992 | { | 1053 | { |
993 | struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); | 1054 | struct f2fs_sb_info *sbi = F2FS_P_SB(parent); |
994 | struct blk_plug plug; | 1055 | struct blk_plug plug; |
995 | struct page *page; | 1056 | struct page *page; |
996 | int err, i, end; | 1057 | int err, i, end; |
@@ -1120,17 +1181,24 @@ continue_unlock: | |||
1120 | 1181 | ||
1121 | /* called by fsync() */ | 1182 | /* called by fsync() */ |
1122 | if (ino && IS_DNODE(page)) { | 1183 | if (ino && IS_DNODE(page)) { |
1123 | int mark = !is_checkpointed_node(sbi, ino); | ||
1124 | set_fsync_mark(page, 1); | 1184 | set_fsync_mark(page, 1); |
1125 | if (IS_INODE(page)) | 1185 | if (IS_INODE(page)) { |
1126 | set_dentry_mark(page, mark); | 1186 | if (!is_checkpointed_node(sbi, ino) && |
1187 | !has_fsynced_inode(sbi, ino)) | ||
1188 | set_dentry_mark(page, 1); | ||
1189 | else | ||
1190 | set_dentry_mark(page, 0); | ||
1191 | } | ||
1127 | nwritten++; | 1192 | nwritten++; |
1128 | } else { | 1193 | } else { |
1129 | set_fsync_mark(page, 0); | 1194 | set_fsync_mark(page, 0); |
1130 | set_dentry_mark(page, 0); | 1195 | set_dentry_mark(page, 0); |
1131 | } | 1196 | } |
1132 | NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); | 1197 | |
1133 | wrote++; | 1198 | if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) |
1199 | unlock_page(page); | ||
1200 | else | ||
1201 | wrote++; | ||
1134 | 1202 | ||
1135 | if (--wbc->nr_to_write == 0) | 1203 | if (--wbc->nr_to_write == 0) |
1136 | break; | 1204 | break; |
@@ -1199,7 +1267,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) | |||
1199 | static int f2fs_write_node_page(struct page *page, | 1267 | static int f2fs_write_node_page(struct page *page, |
1200 | struct writeback_control *wbc) | 1268 | struct writeback_control *wbc) |
1201 | { | 1269 | { |
1202 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | 1270 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); |
1203 | nid_t nid; | 1271 | nid_t nid; |
1204 | block_t new_addr; | 1272 | block_t new_addr; |
1205 | struct node_info ni; | 1273 | struct node_info ni; |
@@ -1212,12 +1280,14 @@ static int f2fs_write_node_page(struct page *page, | |||
1212 | 1280 | ||
1213 | if (unlikely(sbi->por_doing)) | 1281 | if (unlikely(sbi->por_doing)) |
1214 | goto redirty_out; | 1282 | goto redirty_out; |
1283 | if (unlikely(f2fs_cp_error(sbi))) | ||
1284 | goto redirty_out; | ||
1215 | 1285 | ||
1216 | f2fs_wait_on_page_writeback(page, NODE); | 1286 | f2fs_wait_on_page_writeback(page, NODE); |
1217 | 1287 | ||
1218 | /* get old block addr of this node page */ | 1288 | /* get old block addr of this node page */ |
1219 | nid = nid_of_node(page); | 1289 | nid = nid_of_node(page); |
1220 | f2fs_bug_on(page->index != nid); | 1290 | f2fs_bug_on(sbi, page->index != nid); |
1221 | 1291 | ||
1222 | get_node_info(sbi, nid, &ni); | 1292 | get_node_info(sbi, nid, &ni); |
1223 | 1293 | ||
@@ -1248,7 +1318,7 @@ redirty_out: | |||
1248 | static int f2fs_write_node_pages(struct address_space *mapping, | 1318 | static int f2fs_write_node_pages(struct address_space *mapping, |
1249 | struct writeback_control *wbc) | 1319 | struct writeback_control *wbc) |
1250 | { | 1320 | { |
1251 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | 1321 | struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); |
1252 | long diff; | 1322 | long diff; |
1253 | 1323 | ||
1254 | trace_f2fs_writepages(mapping->host, wbc, NODE); | 1324 | trace_f2fs_writepages(mapping->host, wbc, NODE); |
@@ -1273,15 +1343,12 @@ skip_write: | |||
1273 | 1343 | ||
1274 | static int f2fs_set_node_page_dirty(struct page *page) | 1344 | static int f2fs_set_node_page_dirty(struct page *page) |
1275 | { | 1345 | { |
1276 | struct address_space *mapping = page->mapping; | ||
1277 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | ||
1278 | |||
1279 | trace_f2fs_set_page_dirty(page, NODE); | 1346 | trace_f2fs_set_page_dirty(page, NODE); |
1280 | 1347 | ||
1281 | SetPageUptodate(page); | 1348 | SetPageUptodate(page); |
1282 | if (!PageDirty(page)) { | 1349 | if (!PageDirty(page)) { |
1283 | __set_page_dirty_nobuffers(page); | 1350 | __set_page_dirty_nobuffers(page); |
1284 | inc_page_count(sbi, F2FS_DIRTY_NODES); | 1351 | inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); |
1285 | SetPagePrivate(page); | 1352 | SetPagePrivate(page); |
1286 | return 1; | 1353 | return 1; |
1287 | } | 1354 | } |
@@ -1292,9 +1359,8 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned int offset, | |||
1292 | unsigned int length) | 1359 | unsigned int length) |
1293 | { | 1360 | { |
1294 | struct inode *inode = page->mapping->host; | 1361 | struct inode *inode = page->mapping->host; |
1295 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
1296 | if (PageDirty(page)) | 1362 | if (PageDirty(page)) |
1297 | dec_page_count(sbi, F2FS_DIRTY_NODES); | 1363 | dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES); |
1298 | ClearPagePrivate(page); | 1364 | ClearPagePrivate(page); |
1299 | } | 1365 | } |
1300 | 1366 | ||
@@ -1347,7 +1413,8 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) | |||
1347 | read_lock(&nm_i->nat_tree_lock); | 1413 | read_lock(&nm_i->nat_tree_lock); |
1348 | ne = __lookup_nat_cache(nm_i, nid); | 1414 | ne = __lookup_nat_cache(nm_i, nid); |
1349 | if (ne && | 1415 | if (ne && |
1350 | (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR)) | 1416 | (!get_nat_flag(ne, IS_CHECKPOINTED) || |
1417 | nat_get_blkaddr(ne) != NULL_ADDR)) | ||
1351 | allocated = true; | 1418 | allocated = true; |
1352 | read_unlock(&nm_i->nat_tree_lock); | 1419 | read_unlock(&nm_i->nat_tree_lock); |
1353 | if (allocated) | 1420 | if (allocated) |
@@ -1404,7 +1471,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, | |||
1404 | break; | 1471 | break; |
1405 | 1472 | ||
1406 | blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); | 1473 | blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); |
1407 | f2fs_bug_on(blk_addr == NEW_ADDR); | 1474 | f2fs_bug_on(sbi, blk_addr == NEW_ADDR); |
1408 | if (blk_addr == NULL_ADDR) { | 1475 | if (blk_addr == NULL_ADDR) { |
1409 | if (add_free_nid(sbi, start_nid, true) < 0) | 1476 | if (add_free_nid(sbi, start_nid, true) < 0) |
1410 | break; | 1477 | break; |
@@ -1474,12 +1541,12 @@ retry: | |||
1474 | 1541 | ||
1475 | /* We should not use stale free nids created by build_free_nids */ | 1542 | /* We should not use stale free nids created by build_free_nids */ |
1476 | if (nm_i->fcnt && !on_build_free_nids(nm_i)) { | 1543 | if (nm_i->fcnt && !on_build_free_nids(nm_i)) { |
1477 | f2fs_bug_on(list_empty(&nm_i->free_nid_list)); | 1544 | f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); |
1478 | list_for_each_entry(i, &nm_i->free_nid_list, list) | 1545 | list_for_each_entry(i, &nm_i->free_nid_list, list) |
1479 | if (i->state == NID_NEW) | 1546 | if (i->state == NID_NEW) |
1480 | break; | 1547 | break; |
1481 | 1548 | ||
1482 | f2fs_bug_on(i->state != NID_NEW); | 1549 | f2fs_bug_on(sbi, i->state != NID_NEW); |
1483 | *nid = i->nid; | 1550 | *nid = i->nid; |
1484 | i->state = NID_ALLOC; | 1551 | i->state = NID_ALLOC; |
1485 | nm_i->fcnt--; | 1552 | nm_i->fcnt--; |
@@ -1505,7 +1572,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) | |||
1505 | 1572 | ||
1506 | spin_lock(&nm_i->free_nid_list_lock); | 1573 | spin_lock(&nm_i->free_nid_list_lock); |
1507 | i = __lookup_free_nid_list(nm_i, nid); | 1574 | i = __lookup_free_nid_list(nm_i, nid); |
1508 | f2fs_bug_on(!i || i->state != NID_ALLOC); | 1575 | f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); |
1509 | __del_from_free_nid_list(nm_i, i); | 1576 | __del_from_free_nid_list(nm_i, i); |
1510 | spin_unlock(&nm_i->free_nid_list_lock); | 1577 | spin_unlock(&nm_i->free_nid_list_lock); |
1511 | 1578 | ||
@@ -1526,7 +1593,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) | |||
1526 | 1593 | ||
1527 | spin_lock(&nm_i->free_nid_list_lock); | 1594 | spin_lock(&nm_i->free_nid_list_lock); |
1528 | i = __lookup_free_nid_list(nm_i, nid); | 1595 | i = __lookup_free_nid_list(nm_i, nid); |
1529 | f2fs_bug_on(!i || i->state != NID_ALLOC); | 1596 | f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); |
1530 | if (!available_free_memory(sbi, FREE_NIDS)) { | 1597 | if (!available_free_memory(sbi, FREE_NIDS)) { |
1531 | __del_from_free_nid_list(nm_i, i); | 1598 | __del_from_free_nid_list(nm_i, i); |
1532 | need_free = true; | 1599 | need_free = true; |
@@ -1540,35 +1607,21 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) | |||
1540 | kmem_cache_free(free_nid_slab, i); | 1607 | kmem_cache_free(free_nid_slab, i); |
1541 | } | 1608 | } |
1542 | 1609 | ||
1543 | void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, | ||
1544 | struct f2fs_summary *sum, struct node_info *ni, | ||
1545 | block_t new_blkaddr) | ||
1546 | { | ||
1547 | rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); | ||
1548 | set_node_addr(sbi, ni, new_blkaddr, false); | ||
1549 | clear_node_page_dirty(page); | ||
1550 | } | ||
1551 | |||
1552 | void recover_inline_xattr(struct inode *inode, struct page *page) | 1610 | void recover_inline_xattr(struct inode *inode, struct page *page) |
1553 | { | 1611 | { |
1554 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
1555 | void *src_addr, *dst_addr; | 1612 | void *src_addr, *dst_addr; |
1556 | size_t inline_size; | 1613 | size_t inline_size; |
1557 | struct page *ipage; | 1614 | struct page *ipage; |
1558 | struct f2fs_inode *ri; | 1615 | struct f2fs_inode *ri; |
1559 | 1616 | ||
1560 | if (!f2fs_has_inline_xattr(inode)) | 1617 | ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); |
1561 | return; | 1618 | f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); |
1562 | |||
1563 | if (!IS_INODE(page)) | ||
1564 | return; | ||
1565 | 1619 | ||
1566 | ri = F2FS_INODE(page); | 1620 | ri = F2FS_INODE(page); |
1567 | if (!(ri->i_inline & F2FS_INLINE_XATTR)) | 1621 | if (!(ri->i_inline & F2FS_INLINE_XATTR)) { |
1568 | return; | 1622 | clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR); |
1569 | 1623 | goto update_inode; | |
1570 | ipage = get_node_page(sbi, inode->i_ino); | 1624 | } |
1571 | f2fs_bug_on(IS_ERR(ipage)); | ||
1572 | 1625 | ||
1573 | dst_addr = inline_xattr_addr(ipage); | 1626 | dst_addr = inline_xattr_addr(ipage); |
1574 | src_addr = inline_xattr_addr(page); | 1627 | src_addr = inline_xattr_addr(page); |
@@ -1576,28 +1629,25 @@ void recover_inline_xattr(struct inode *inode, struct page *page) | |||
1576 | 1629 | ||
1577 | f2fs_wait_on_page_writeback(ipage, NODE); | 1630 | f2fs_wait_on_page_writeback(ipage, NODE); |
1578 | memcpy(dst_addr, src_addr, inline_size); | 1631 | memcpy(dst_addr, src_addr, inline_size); |
1579 | 1632 | update_inode: | |
1580 | update_inode(inode, ipage); | 1633 | update_inode(inode, ipage); |
1581 | f2fs_put_page(ipage, 1); | 1634 | f2fs_put_page(ipage, 1); |
1582 | } | 1635 | } |
1583 | 1636 | ||
1584 | bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) | 1637 | void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) |
1585 | { | 1638 | { |
1586 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 1639 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
1587 | nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; | 1640 | nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; |
1588 | nid_t new_xnid = nid_of_node(page); | 1641 | nid_t new_xnid = nid_of_node(page); |
1589 | struct node_info ni; | 1642 | struct node_info ni; |
1590 | 1643 | ||
1591 | if (!f2fs_has_xattr_block(ofs_of_node(page))) | ||
1592 | return false; | ||
1593 | |||
1594 | /* 1: invalidate the previous xattr nid */ | 1644 | /* 1: invalidate the previous xattr nid */ |
1595 | if (!prev_xnid) | 1645 | if (!prev_xnid) |
1596 | goto recover_xnid; | 1646 | goto recover_xnid; |
1597 | 1647 | ||
1598 | /* Deallocate node address */ | 1648 | /* Deallocate node address */ |
1599 | get_node_info(sbi, prev_xnid, &ni); | 1649 | get_node_info(sbi, prev_xnid, &ni); |
1600 | f2fs_bug_on(ni.blk_addr == NULL_ADDR); | 1650 | f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); |
1601 | invalidate_blocks(sbi, ni.blk_addr); | 1651 | invalidate_blocks(sbi, ni.blk_addr); |
1602 | dec_valid_node_count(sbi, inode); | 1652 | dec_valid_node_count(sbi, inode); |
1603 | set_node_addr(sbi, &ni, NULL_ADDR, false); | 1653 | set_node_addr(sbi, &ni, NULL_ADDR, false); |
@@ -1605,7 +1655,7 @@ bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) | |||
1605 | recover_xnid: | 1655 | recover_xnid: |
1606 | /* 2: allocate new xattr nid */ | 1656 | /* 2: allocate new xattr nid */ |
1607 | if (unlikely(!inc_valid_node_count(sbi, inode))) | 1657 | if (unlikely(!inc_valid_node_count(sbi, inode))) |
1608 | f2fs_bug_on(1); | 1658 | f2fs_bug_on(sbi, 1); |
1609 | 1659 | ||
1610 | remove_free_nid(NM_I(sbi), new_xnid); | 1660 | remove_free_nid(NM_I(sbi), new_xnid); |
1611 | get_node_info(sbi, new_xnid, &ni); | 1661 | get_node_info(sbi, new_xnid, &ni); |
@@ -1618,7 +1668,6 @@ recover_xnid: | |||
1618 | set_node_addr(sbi, &ni, blkaddr, false); | 1668 | set_node_addr(sbi, &ni, blkaddr, false); |
1619 | 1669 | ||
1620 | update_inode_page(inode); | 1670 | update_inode_page(inode); |
1621 | return true; | ||
1622 | } | 1671 | } |
1623 | 1672 | ||
1624 | int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | 1673 | int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) |
@@ -1637,7 +1686,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | |||
1637 | if (!ipage) | 1686 | if (!ipage) |
1638 | return -ENOMEM; | 1687 | return -ENOMEM; |
1639 | 1688 | ||
1640 | /* Should not use this inode from free nid list */ | 1689 | /* Should not use this inode from free nid list */ |
1641 | remove_free_nid(NM_I(sbi), ino); | 1690 | remove_free_nid(NM_I(sbi), ino); |
1642 | 1691 | ||
1643 | SetPageUptodate(ipage); | 1692 | SetPageUptodate(ipage); |
@@ -1651,6 +1700,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | |||
1651 | dst->i_blocks = cpu_to_le64(1); | 1700 | dst->i_blocks = cpu_to_le64(1); |
1652 | dst->i_links = cpu_to_le32(1); | 1701 | dst->i_links = cpu_to_le32(1); |
1653 | dst->i_xattr_nid = 0; | 1702 | dst->i_xattr_nid = 0; |
1703 | dst->i_inline = src->i_inline & F2FS_INLINE_XATTR; | ||
1654 | 1704 | ||
1655 | new_ni = old_ni; | 1705 | new_ni = old_ni; |
1656 | new_ni.ino = ino; | 1706 | new_ni.ino = ino; |
@@ -1659,13 +1709,14 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | |||
1659 | WARN_ON(1); | 1709 | WARN_ON(1); |
1660 | set_node_addr(sbi, &new_ni, NEW_ADDR, false); | 1710 | set_node_addr(sbi, &new_ni, NEW_ADDR, false); |
1661 | inc_valid_inode_count(sbi); | 1711 | inc_valid_inode_count(sbi); |
1712 | set_page_dirty(ipage); | ||
1662 | f2fs_put_page(ipage, 1); | 1713 | f2fs_put_page(ipage, 1); |
1663 | return 0; | 1714 | return 0; |
1664 | } | 1715 | } |
1665 | 1716 | ||
1666 | /* | 1717 | /* |
1667 | * ra_sum_pages() merge contiguous pages into one bio and submit. | 1718 | * ra_sum_pages() merge contiguous pages into one bio and submit. |
1668 | * these pre-readed pages are alloced in bd_inode's mapping tree. | 1719 | * these pre-read pages are allocated in bd_inode's mapping tree. |
1669 | */ | 1720 | */ |
1670 | static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, | 1721 | static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, |
1671 | int start, int nrpages) | 1722 | int start, int nrpages) |
@@ -1697,7 +1748,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, | |||
1697 | struct f2fs_summary *sum_entry; | 1748 | struct f2fs_summary *sum_entry; |
1698 | struct inode *inode = sbi->sb->s_bdev->bd_inode; | 1749 | struct inode *inode = sbi->sb->s_bdev->bd_inode; |
1699 | block_t addr; | 1750 | block_t addr; |
1700 | int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 1751 | int bio_blocks = MAX_BIO_BLOCKS(sbi); |
1701 | struct page *pages[bio_blocks]; | 1752 | struct page *pages[bio_blocks]; |
1702 | int i, idx, last_offset, nrpages, err = 0; | 1753 | int i, idx, last_offset, nrpages, err = 0; |
1703 | 1754 | ||
@@ -1709,7 +1760,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, | |||
1709 | for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { | 1760 | for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { |
1710 | nrpages = min(last_offset - i, bio_blocks); | 1761 | nrpages = min(last_offset - i, bio_blocks); |
1711 | 1762 | ||
1712 | /* read ahead node pages */ | 1763 | /* readahead node pages */ |
1713 | nrpages = ra_sum_pages(sbi, pages, addr, nrpages); | 1764 | nrpages = ra_sum_pages(sbi, pages, addr, nrpages); |
1714 | if (!nrpages) | 1765 | if (!nrpages) |
1715 | return -ENOMEM; | 1766 | return -ENOMEM; |
@@ -1739,89 +1790,6 @@ skip: | |||
1739 | return err; | 1790 | return err; |
1740 | } | 1791 | } |
1741 | 1792 | ||
1742 | static struct nat_entry_set *grab_nat_entry_set(void) | ||
1743 | { | ||
1744 | struct nat_entry_set *nes = | ||
1745 | f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); | ||
1746 | |||
1747 | nes->entry_cnt = 0; | ||
1748 | INIT_LIST_HEAD(&nes->set_list); | ||
1749 | INIT_LIST_HEAD(&nes->entry_list); | ||
1750 | return nes; | ||
1751 | } | ||
1752 | |||
1753 | static void release_nat_entry_set(struct nat_entry_set *nes, | ||
1754 | struct f2fs_nm_info *nm_i) | ||
1755 | { | ||
1756 | f2fs_bug_on(!list_empty(&nes->entry_list)); | ||
1757 | |||
1758 | nm_i->dirty_nat_cnt -= nes->entry_cnt; | ||
1759 | list_del(&nes->set_list); | ||
1760 | kmem_cache_free(nat_entry_set_slab, nes); | ||
1761 | } | ||
1762 | |||
1763 | static void adjust_nat_entry_set(struct nat_entry_set *nes, | ||
1764 | struct list_head *head) | ||
1765 | { | ||
1766 | struct nat_entry_set *next = nes; | ||
1767 | |||
1768 | if (list_is_last(&nes->set_list, head)) | ||
1769 | return; | ||
1770 | |||
1771 | list_for_each_entry_continue(next, head, set_list) | ||
1772 | if (nes->entry_cnt <= next->entry_cnt) | ||
1773 | break; | ||
1774 | |||
1775 | list_move_tail(&nes->set_list, &next->set_list); | ||
1776 | } | ||
1777 | |||
1778 | static void add_nat_entry(struct nat_entry *ne, struct list_head *head) | ||
1779 | { | ||
1780 | struct nat_entry_set *nes; | ||
1781 | nid_t start_nid = START_NID(ne->ni.nid); | ||
1782 | |||
1783 | list_for_each_entry(nes, head, set_list) { | ||
1784 | if (nes->start_nid == start_nid) { | ||
1785 | list_move_tail(&ne->list, &nes->entry_list); | ||
1786 | nes->entry_cnt++; | ||
1787 | adjust_nat_entry_set(nes, head); | ||
1788 | return; | ||
1789 | } | ||
1790 | } | ||
1791 | |||
1792 | nes = grab_nat_entry_set(); | ||
1793 | |||
1794 | nes->start_nid = start_nid; | ||
1795 | list_move_tail(&ne->list, &nes->entry_list); | ||
1796 | nes->entry_cnt++; | ||
1797 | list_add(&nes->set_list, head); | ||
1798 | } | ||
1799 | |||
1800 | static void merge_nats_in_set(struct f2fs_sb_info *sbi) | ||
1801 | { | ||
1802 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
1803 | struct list_head *dirty_list = &nm_i->dirty_nat_entries; | ||
1804 | struct list_head *set_list = &nm_i->nat_entry_set; | ||
1805 | struct nat_entry *ne, *tmp; | ||
1806 | |||
1807 | write_lock(&nm_i->nat_tree_lock); | ||
1808 | list_for_each_entry_safe(ne, tmp, dirty_list, list) { | ||
1809 | if (nat_get_blkaddr(ne) == NEW_ADDR) | ||
1810 | continue; | ||
1811 | add_nat_entry(ne, set_list); | ||
1812 | nm_i->dirty_nat_cnt++; | ||
1813 | } | ||
1814 | write_unlock(&nm_i->nat_tree_lock); | ||
1815 | } | ||
1816 | |||
1817 | static bool __has_cursum_space(struct f2fs_summary_block *sum, int size) | ||
1818 | { | ||
1819 | if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES) | ||
1820 | return true; | ||
1821 | else | ||
1822 | return false; | ||
1823 | } | ||
1824 | |||
1825 | static void remove_nats_in_journal(struct f2fs_sb_info *sbi) | 1793 | static void remove_nats_in_journal(struct f2fs_sb_info *sbi) |
1826 | { | 1794 | { |
1827 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1795 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
@@ -1856,99 +1824,130 @@ found: | |||
1856 | mutex_unlock(&curseg->curseg_mutex); | 1824 | mutex_unlock(&curseg->curseg_mutex); |
1857 | } | 1825 | } |
1858 | 1826 | ||
1859 | /* | 1827 | static void __adjust_nat_entry_set(struct nat_entry_set *nes, |
1860 | * This function is called during the checkpointing process. | 1828 | struct list_head *head, int max) |
1861 | */ | ||
1862 | void flush_nat_entries(struct f2fs_sb_info *sbi) | ||
1863 | { | 1829 | { |
1864 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1830 | struct nat_entry_set *cur; |
1865 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | ||
1866 | struct f2fs_summary_block *sum = curseg->sum_blk; | ||
1867 | struct nat_entry_set *nes, *tmp; | ||
1868 | struct list_head *head = &nm_i->nat_entry_set; | ||
1869 | bool to_journal = true; | ||
1870 | 1831 | ||
1871 | /* merge nat entries of dirty list to nat entry set temporarily */ | 1832 | if (nes->entry_cnt >= max) |
1872 | merge_nats_in_set(sbi); | 1833 | goto add_out; |
1873 | 1834 | ||
1874 | /* | 1835 | list_for_each_entry(cur, head, set_list) { |
1875 | * if there are no enough space in journal to store dirty nat | 1836 | if (cur->entry_cnt >= nes->entry_cnt) { |
1876 | * entries, remove all entries from journal and merge them | 1837 | list_add(&nes->set_list, cur->set_list.prev); |
1877 | * into nat entry set. | 1838 | return; |
1878 | */ | 1839 | } |
1879 | if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) { | ||
1880 | remove_nats_in_journal(sbi); | ||
1881 | |||
1882 | /* | ||
1883 | * merge nat entries of dirty list to nat entry set temporarily | ||
1884 | */ | ||
1885 | merge_nats_in_set(sbi); | ||
1886 | } | 1840 | } |
1841 | add_out: | ||
1842 | list_add_tail(&nes->set_list, head); | ||
1843 | } | ||
1887 | 1844 | ||
1888 | if (!nm_i->dirty_nat_cnt) | 1845 | static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, |
1889 | return; | 1846 | struct nat_entry_set *set) |
1847 | { | ||
1848 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | ||
1849 | struct f2fs_summary_block *sum = curseg->sum_blk; | ||
1850 | nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK; | ||
1851 | bool to_journal = true; | ||
1852 | struct f2fs_nat_block *nat_blk; | ||
1853 | struct nat_entry *ne, *cur; | ||
1854 | struct page *page = NULL; | ||
1890 | 1855 | ||
1891 | /* | 1856 | /* |
1892 | * there are two steps to flush nat entries: | 1857 | * there are two steps to flush nat entries: |
1893 | * #1, flush nat entries to journal in current hot data summary block. | 1858 | * #1, flush nat entries to journal in current hot data summary block. |
1894 | * #2, flush nat entries to nat page. | 1859 | * #2, flush nat entries to nat page. |
1895 | */ | 1860 | */ |
1896 | list_for_each_entry_safe(nes, tmp, head, set_list) { | 1861 | if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL)) |
1897 | struct f2fs_nat_block *nat_blk; | 1862 | to_journal = false; |
1898 | struct nat_entry *ne, *cur; | ||
1899 | struct page *page; | ||
1900 | nid_t start_nid = nes->start_nid; | ||
1901 | 1863 | ||
1902 | if (to_journal && !__has_cursum_space(sum, nes->entry_cnt)) | 1864 | if (to_journal) { |
1903 | to_journal = false; | 1865 | mutex_lock(&curseg->curseg_mutex); |
1866 | } else { | ||
1867 | page = get_next_nat_page(sbi, start_nid); | ||
1868 | nat_blk = page_address(page); | ||
1869 | f2fs_bug_on(sbi, !nat_blk); | ||
1870 | } | ||
1871 | |||
1872 | /* flush dirty nats in nat entry set */ | ||
1873 | list_for_each_entry_safe(ne, cur, &set->entry_list, list) { | ||
1874 | struct f2fs_nat_entry *raw_ne; | ||
1875 | nid_t nid = nat_get_nid(ne); | ||
1876 | int offset; | ||
1877 | |||
1878 | if (nat_get_blkaddr(ne) == NEW_ADDR) | ||
1879 | continue; | ||
1904 | 1880 | ||
1905 | if (to_journal) { | 1881 | if (to_journal) { |
1906 | mutex_lock(&curseg->curseg_mutex); | 1882 | offset = lookup_journal_in_cursum(sum, |
1883 | NAT_JOURNAL, nid, 1); | ||
1884 | f2fs_bug_on(sbi, offset < 0); | ||
1885 | raw_ne = &nat_in_journal(sum, offset); | ||
1886 | nid_in_journal(sum, offset) = cpu_to_le32(nid); | ||
1907 | } else { | 1887 | } else { |
1908 | page = get_next_nat_page(sbi, start_nid); | 1888 | raw_ne = &nat_blk->entries[nid - start_nid]; |
1909 | nat_blk = page_address(page); | ||
1910 | f2fs_bug_on(!nat_blk); | ||
1911 | } | 1889 | } |
1890 | raw_nat_from_node_info(raw_ne, &ne->ni); | ||
1912 | 1891 | ||
1913 | /* flush dirty nats in nat entry set */ | 1892 | write_lock(&NM_I(sbi)->nat_tree_lock); |
1914 | list_for_each_entry_safe(ne, cur, &nes->entry_list, list) { | 1893 | nat_reset_flag(ne); |
1915 | struct f2fs_nat_entry *raw_ne; | 1894 | __clear_nat_cache_dirty(NM_I(sbi), ne); |
1916 | nid_t nid = nat_get_nid(ne); | 1895 | write_unlock(&NM_I(sbi)->nat_tree_lock); |
1917 | int offset; | ||
1918 | 1896 | ||
1919 | if (to_journal) { | 1897 | if (nat_get_blkaddr(ne) == NULL_ADDR) |
1920 | offset = lookup_journal_in_cursum(sum, | 1898 | add_free_nid(sbi, nid, false); |
1921 | NAT_JOURNAL, nid, 1); | 1899 | } |
1922 | f2fs_bug_on(offset < 0); | ||
1923 | raw_ne = &nat_in_journal(sum, offset); | ||
1924 | nid_in_journal(sum, offset) = cpu_to_le32(nid); | ||
1925 | } else { | ||
1926 | raw_ne = &nat_blk->entries[nid - start_nid]; | ||
1927 | } | ||
1928 | raw_nat_from_node_info(raw_ne, &ne->ni); | ||
1929 | 1900 | ||
1930 | if (nat_get_blkaddr(ne) == NULL_ADDR && | 1901 | if (to_journal) |
1931 | add_free_nid(sbi, nid, false) <= 0) { | 1902 | mutex_unlock(&curseg->curseg_mutex); |
1932 | write_lock(&nm_i->nat_tree_lock); | 1903 | else |
1933 | __del_from_nat_cache(nm_i, ne); | 1904 | f2fs_put_page(page, 1); |
1934 | write_unlock(&nm_i->nat_tree_lock); | ||
1935 | } else { | ||
1936 | write_lock(&nm_i->nat_tree_lock); | ||
1937 | __clear_nat_cache_dirty(nm_i, ne); | ||
1938 | write_unlock(&nm_i->nat_tree_lock); | ||
1939 | } | ||
1940 | } | ||
1941 | 1905 | ||
1942 | if (to_journal) | 1906 | if (!set->entry_cnt) { |
1943 | mutex_unlock(&curseg->curseg_mutex); | 1907 | radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); |
1944 | else | 1908 | kmem_cache_free(nat_entry_set_slab, set); |
1945 | f2fs_put_page(page, 1); | 1909 | } |
1910 | } | ||
1946 | 1911 | ||
1947 | release_nat_entry_set(nes, nm_i); | 1912 | /* |
1913 | * This function is called during the checkpointing process. | ||
1914 | */ | ||
1915 | void flush_nat_entries(struct f2fs_sb_info *sbi) | ||
1916 | { | ||
1917 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
1918 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | ||
1919 | struct f2fs_summary_block *sum = curseg->sum_blk; | ||
1920 | struct nat_entry_set *setvec[NATVEC_SIZE]; | ||
1921 | struct nat_entry_set *set, *tmp; | ||
1922 | unsigned int found; | ||
1923 | nid_t set_idx = 0; | ||
1924 | LIST_HEAD(sets); | ||
1925 | |||
1926 | /* | ||
1927 | * if there are no enough space in journal to store dirty nat | ||
1928 | * entries, remove all entries from journal and merge them | ||
1929 | * into nat entry set. | ||
1930 | */ | ||
1931 | if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) | ||
1932 | remove_nats_in_journal(sbi); | ||
1933 | |||
1934 | if (!nm_i->dirty_nat_cnt) | ||
1935 | return; | ||
1936 | |||
1937 | while ((found = __gang_lookup_nat_set(nm_i, | ||
1938 | set_idx, NATVEC_SIZE, setvec))) { | ||
1939 | unsigned idx; | ||
1940 | set_idx = setvec[found - 1]->set + 1; | ||
1941 | for (idx = 0; idx < found; idx++) | ||
1942 | __adjust_nat_entry_set(setvec[idx], &sets, | ||
1943 | MAX_NAT_JENTRIES(sum)); | ||
1948 | } | 1944 | } |
1949 | 1945 | ||
1950 | f2fs_bug_on(!list_empty(head)); | 1946 | /* flush dirty nats in nat entry set */ |
1951 | f2fs_bug_on(nm_i->dirty_nat_cnt); | 1947 | list_for_each_entry_safe(set, tmp, &sets, set_list) |
1948 | __flush_nat_entry_set(sbi, set); | ||
1949 | |||
1950 | f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); | ||
1952 | } | 1951 | } |
1953 | 1952 | ||
1954 | static int init_node_manager(struct f2fs_sb_info *sbi) | 1953 | static int init_node_manager(struct f2fs_sb_info *sbi) |
@@ -1967,7 +1966,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) | |||
1967 | nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; | 1966 | nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; |
1968 | 1967 | ||
1969 | /* not used nids: 0, node, meta, (and root counted as valid node) */ | 1968 | /* not used nids: 0, node, meta, (and root counted as valid node) */ |
1970 | nm_i->available_nids = nm_i->max_nid - 3; | 1969 | nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM; |
1971 | nm_i->fcnt = 0; | 1970 | nm_i->fcnt = 0; |
1972 | nm_i->nat_cnt = 0; | 1971 | nm_i->nat_cnt = 0; |
1973 | nm_i->ram_thresh = DEF_RAM_THRESHOLD; | 1972 | nm_i->ram_thresh = DEF_RAM_THRESHOLD; |
@@ -1975,9 +1974,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi) | |||
1975 | INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); | 1974 | INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); |
1976 | INIT_LIST_HEAD(&nm_i->free_nid_list); | 1975 | INIT_LIST_HEAD(&nm_i->free_nid_list); |
1977 | INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); | 1976 | INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); |
1977 | INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC); | ||
1978 | INIT_LIST_HEAD(&nm_i->nat_entries); | 1978 | INIT_LIST_HEAD(&nm_i->nat_entries); |
1979 | INIT_LIST_HEAD(&nm_i->dirty_nat_entries); | ||
1980 | INIT_LIST_HEAD(&nm_i->nat_entry_set); | ||
1981 | 1979 | ||
1982 | mutex_init(&nm_i->build_lock); | 1980 | mutex_init(&nm_i->build_lock); |
1983 | spin_lock_init(&nm_i->free_nid_list_lock); | 1981 | spin_lock_init(&nm_i->free_nid_list_lock); |
@@ -2026,14 +2024,14 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
2026 | /* destroy free nid list */ | 2024 | /* destroy free nid list */ |
2027 | spin_lock(&nm_i->free_nid_list_lock); | 2025 | spin_lock(&nm_i->free_nid_list_lock); |
2028 | list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { | 2026 | list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { |
2029 | f2fs_bug_on(i->state == NID_ALLOC); | 2027 | f2fs_bug_on(sbi, i->state == NID_ALLOC); |
2030 | __del_from_free_nid_list(nm_i, i); | 2028 | __del_from_free_nid_list(nm_i, i); |
2031 | nm_i->fcnt--; | 2029 | nm_i->fcnt--; |
2032 | spin_unlock(&nm_i->free_nid_list_lock); | 2030 | spin_unlock(&nm_i->free_nid_list_lock); |
2033 | kmem_cache_free(free_nid_slab, i); | 2031 | kmem_cache_free(free_nid_slab, i); |
2034 | spin_lock(&nm_i->free_nid_list_lock); | 2032 | spin_lock(&nm_i->free_nid_list_lock); |
2035 | } | 2033 | } |
2036 | f2fs_bug_on(nm_i->fcnt); | 2034 | f2fs_bug_on(sbi, nm_i->fcnt); |
2037 | spin_unlock(&nm_i->free_nid_list_lock); | 2035 | spin_unlock(&nm_i->free_nid_list_lock); |
2038 | 2036 | ||
2039 | /* destroy nat cache */ | 2037 | /* destroy nat cache */ |
@@ -2045,7 +2043,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
2045 | for (idx = 0; idx < found; idx++) | 2043 | for (idx = 0; idx < found; idx++) |
2046 | __del_from_nat_cache(nm_i, natvec[idx]); | 2044 | __del_from_nat_cache(nm_i, natvec[idx]); |
2047 | } | 2045 | } |
2048 | f2fs_bug_on(nm_i->nat_cnt); | 2046 | f2fs_bug_on(sbi, nm_i->nat_cnt); |
2049 | write_unlock(&nm_i->nat_tree_lock); | 2047 | write_unlock(&nm_i->nat_tree_lock); |
2050 | 2048 | ||
2051 | kfree(nm_i->nat_bitmap); | 2049 | kfree(nm_i->nat_bitmap); |
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8a116a407599..8d5e6e0dd840 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h | |||
@@ -39,10 +39,16 @@ struct node_info { | |||
39 | unsigned char version; /* version of the node */ | 39 | unsigned char version; /* version of the node */ |
40 | }; | 40 | }; |
41 | 41 | ||
42 | enum { | ||
43 | IS_CHECKPOINTED, /* is it checkpointed before? */ | ||
44 | HAS_FSYNCED_INODE, /* is the inode fsynced before? */ | ||
45 | HAS_LAST_FSYNC, /* has the latest node fsync mark? */ | ||
46 | IS_DIRTY, /* this nat entry is dirty? */ | ||
47 | }; | ||
48 | |||
42 | struct nat_entry { | 49 | struct nat_entry { |
43 | struct list_head list; /* for clean or dirty nat list */ | 50 | struct list_head list; /* for clean or dirty nat list */ |
44 | bool checkpointed; /* whether it is checkpointed or not */ | 51 | unsigned char flag; /* for node information bits */ |
45 | bool fsync_done; /* whether the latest node has fsync mark */ | ||
46 | struct node_info ni; /* in-memory node information */ | 52 | struct node_info ni; /* in-memory node information */ |
47 | }; | 53 | }; |
48 | 54 | ||
@@ -55,18 +61,32 @@ struct nat_entry { | |||
55 | #define nat_get_version(nat) (nat->ni.version) | 61 | #define nat_get_version(nat) (nat->ni.version) |
56 | #define nat_set_version(nat, v) (nat->ni.version = v) | 62 | #define nat_set_version(nat, v) (nat->ni.version = v) |
57 | 63 | ||
58 | #define __set_nat_cache_dirty(nm_i, ne) \ | ||
59 | do { \ | ||
60 | ne->checkpointed = false; \ | ||
61 | list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \ | ||
62 | } while (0) | ||
63 | #define __clear_nat_cache_dirty(nm_i, ne) \ | ||
64 | do { \ | ||
65 | ne->checkpointed = true; \ | ||
66 | list_move_tail(&ne->list, &nm_i->nat_entries); \ | ||
67 | } while (0) | ||
68 | #define inc_node_version(version) (++version) | 64 | #define inc_node_version(version) (++version) |
69 | 65 | ||
66 | static inline void set_nat_flag(struct nat_entry *ne, | ||
67 | unsigned int type, bool set) | ||
68 | { | ||
69 | unsigned char mask = 0x01 << type; | ||
70 | if (set) | ||
71 | ne->flag |= mask; | ||
72 | else | ||
73 | ne->flag &= ~mask; | ||
74 | } | ||
75 | |||
76 | static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) | ||
77 | { | ||
78 | unsigned char mask = 0x01 << type; | ||
79 | return ne->flag & mask; | ||
80 | } | ||
81 | |||
82 | static inline void nat_reset_flag(struct nat_entry *ne) | ||
83 | { | ||
84 | /* these states can be set only after checkpoint was done */ | ||
85 | set_nat_flag(ne, IS_CHECKPOINTED, true); | ||
86 | set_nat_flag(ne, HAS_FSYNCED_INODE, false); | ||
87 | set_nat_flag(ne, HAS_LAST_FSYNC, true); | ||
88 | } | ||
89 | |||
70 | static inline void node_info_from_raw_nat(struct node_info *ni, | 90 | static inline void node_info_from_raw_nat(struct node_info *ni, |
71 | struct f2fs_nat_entry *raw_ne) | 91 | struct f2fs_nat_entry *raw_ne) |
72 | { | 92 | { |
@@ -90,9 +110,9 @@ enum mem_type { | |||
90 | }; | 110 | }; |
91 | 111 | ||
92 | struct nat_entry_set { | 112 | struct nat_entry_set { |
93 | struct list_head set_list; /* link with all nat sets */ | 113 | struct list_head set_list; /* link with other nat sets */ |
94 | struct list_head entry_list; /* link with dirty nat entries */ | 114 | struct list_head entry_list; /* link with dirty nat entries */ |
95 | nid_t start_nid; /* start nid of nats in set */ | 115 | nid_t set; /* set number*/ |
96 | unsigned int entry_cnt; /* the # of nat entries in set */ | 116 | unsigned int entry_cnt; /* the # of nat entries in set */ |
97 | }; | 117 | }; |
98 | 118 | ||
@@ -110,18 +130,19 @@ struct free_nid { | |||
110 | int state; /* in use or not: NID_NEW or NID_ALLOC */ | 130 | int state; /* in use or not: NID_NEW or NID_ALLOC */ |
111 | }; | 131 | }; |
112 | 132 | ||
113 | static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) | 133 | static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) |
114 | { | 134 | { |
115 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 135 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
116 | struct free_nid *fnid; | 136 | struct free_nid *fnid; |
117 | 137 | ||
118 | if (nm_i->fcnt <= 0) | ||
119 | return -1; | ||
120 | spin_lock(&nm_i->free_nid_list_lock); | 138 | spin_lock(&nm_i->free_nid_list_lock); |
139 | if (nm_i->fcnt <= 0) { | ||
140 | spin_unlock(&nm_i->free_nid_list_lock); | ||
141 | return; | ||
142 | } | ||
121 | fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); | 143 | fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); |
122 | *nid = fnid->nid; | 144 | *nid = fnid->nid; |
123 | spin_unlock(&nm_i->free_nid_list_lock); | 145 | spin_unlock(&nm_i->free_nid_list_lock); |
124 | return 0; | ||
125 | } | 146 | } |
126 | 147 | ||
127 | /* | 148 | /* |
@@ -197,8 +218,7 @@ static inline void copy_node_footer(struct page *dst, struct page *src) | |||
197 | 218 | ||
198 | static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) | 219 | static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) |
199 | { | 220 | { |
200 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | 221 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); |
201 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | ||
202 | struct f2fs_node *rn = F2FS_NODE(page); | 222 | struct f2fs_node *rn = F2FS_NODE(page); |
203 | 223 | ||
204 | rn->footer.cp_ver = ckpt->checkpoint_ver; | 224 | rn->footer.cp_ver = ckpt->checkpoint_ver; |
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index fe1c6d921ba2..ebd013225788 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c | |||
@@ -14,6 +14,37 @@ | |||
14 | #include "node.h" | 14 | #include "node.h" |
15 | #include "segment.h" | 15 | #include "segment.h" |
16 | 16 | ||
17 | /* | ||
18 | * Roll forward recovery scenarios. | ||
19 | * | ||
20 | * [Term] F: fsync_mark, D: dentry_mark | ||
21 | * | ||
22 | * 1. inode(x) | CP | inode(x) | dnode(F) | ||
23 | * -> Update the latest inode(x). | ||
24 | * | ||
25 | * 2. inode(x) | CP | inode(F) | dnode(F) | ||
26 | * -> No problem. | ||
27 | * | ||
28 | * 3. inode(x) | CP | dnode(F) | inode(x) | ||
29 | * -> Recover to the latest dnode(F), and drop the last inode(x) | ||
30 | * | ||
31 | * 4. inode(x) | CP | dnode(F) | inode(F) | ||
32 | * -> No problem. | ||
33 | * | ||
34 | * 5. CP | inode(x) | dnode(F) | ||
35 | * -> The inode(DF) was missing. Should drop this dnode(F). | ||
36 | * | ||
37 | * 6. CP | inode(DF) | dnode(F) | ||
38 | * -> No problem. | ||
39 | * | ||
40 | * 7. CP | dnode(F) | inode(DF) | ||
41 | * -> If f2fs_iget fails, then goto next to find inode(DF). | ||
42 | * | ||
43 | * 8. CP | dnode(F) | inode(x) | ||
44 | * -> If f2fs_iget fails, then goto next to find inode(DF). | ||
45 | * But it will fail due to no inode(DF). | ||
46 | */ | ||
47 | |||
17 | static struct kmem_cache *fsync_entry_slab; | 48 | static struct kmem_cache *fsync_entry_slab; |
18 | 49 | ||
19 | bool space_for_roll_forward(struct f2fs_sb_info *sbi) | 50 | bool space_for_roll_forward(struct f2fs_sb_info *sbi) |
@@ -36,7 +67,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, | |||
36 | return NULL; | 67 | return NULL; |
37 | } | 68 | } |
38 | 69 | ||
39 | static int recover_dentry(struct page *ipage, struct inode *inode) | 70 | static int recover_dentry(struct inode *inode, struct page *ipage) |
40 | { | 71 | { |
41 | struct f2fs_inode *raw_inode = F2FS_INODE(ipage); | 72 | struct f2fs_inode *raw_inode = F2FS_INODE(ipage); |
42 | nid_t pino = le32_to_cpu(raw_inode->i_pino); | 73 | nid_t pino = le32_to_cpu(raw_inode->i_pino); |
@@ -62,8 +93,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode) | |||
62 | } | 93 | } |
63 | retry: | 94 | retry: |
64 | de = f2fs_find_entry(dir, &name, &page); | 95 | de = f2fs_find_entry(dir, &name, &page); |
65 | if (de && inode->i_ino == le32_to_cpu(de->ino)) | 96 | if (de && inode->i_ino == le32_to_cpu(de->ino)) { |
97 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | ||
66 | goto out_unmap_put; | 98 | goto out_unmap_put; |
99 | } | ||
67 | if (de) { | 100 | if (de) { |
68 | einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); | 101 | einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); |
69 | if (IS_ERR(einode)) { | 102 | if (IS_ERR(einode)) { |
@@ -73,7 +106,7 @@ retry: | |||
73 | err = -EEXIST; | 106 | err = -EEXIST; |
74 | goto out_unmap_put; | 107 | goto out_unmap_put; |
75 | } | 108 | } |
76 | err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); | 109 | err = acquire_orphan_inode(F2FS_I_SB(inode)); |
77 | if (err) { | 110 | if (err) { |
78 | iput(einode); | 111 | iput(einode); |
79 | goto out_unmap_put; | 112 | goto out_unmap_put; |
@@ -108,35 +141,28 @@ out: | |||
108 | return err; | 141 | return err; |
109 | } | 142 | } |
110 | 143 | ||
111 | static int recover_inode(struct inode *inode, struct page *node_page) | 144 | static void recover_inode(struct inode *inode, struct page *page) |
112 | { | 145 | { |
113 | struct f2fs_inode *raw_inode = F2FS_INODE(node_page); | 146 | struct f2fs_inode *raw = F2FS_INODE(page); |
114 | 147 | ||
115 | if (!IS_INODE(node_page)) | 148 | inode->i_mode = le16_to_cpu(raw->i_mode); |
116 | return 0; | 149 | i_size_write(inode, le64_to_cpu(raw->i_size)); |
117 | 150 | inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime); | |
118 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 151 | inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); |
119 | i_size_write(inode, le64_to_cpu(raw_inode->i_size)); | 152 | inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); |
120 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | 153 | inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); |
121 | inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); | 154 | inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec); |
122 | inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | 155 | inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); |
123 | inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | ||
124 | inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); | ||
125 | inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | ||
126 | |||
127 | if (is_dent_dnode(node_page)) | ||
128 | return recover_dentry(node_page, inode); | ||
129 | 156 | ||
130 | f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", | 157 | f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", |
131 | ino_of_node(node_page), raw_inode->i_name); | 158 | ino_of_node(page), F2FS_INODE(page)->i_name); |
132 | return 0; | ||
133 | } | 159 | } |
134 | 160 | ||
135 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | 161 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) |
136 | { | 162 | { |
137 | unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); | 163 | unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); |
138 | struct curseg_info *curseg; | 164 | struct curseg_info *curseg; |
139 | struct page *page; | 165 | struct page *page = NULL; |
140 | block_t blkaddr; | 166 | block_t blkaddr; |
141 | int err = 0; | 167 | int err = 0; |
142 | 168 | ||
@@ -144,20 +170,13 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
144 | curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); | 170 | curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); |
145 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); | 171 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
146 | 172 | ||
147 | /* read node page */ | ||
148 | page = alloc_page(GFP_F2FS_ZERO); | ||
149 | if (!page) | ||
150 | return -ENOMEM; | ||
151 | lock_page(page); | ||
152 | |||
153 | while (1) { | 173 | while (1) { |
154 | struct fsync_inode_entry *entry; | 174 | struct fsync_inode_entry *entry; |
155 | 175 | ||
156 | err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); | 176 | if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) |
157 | if (err) | 177 | return 0; |
158 | return err; | ||
159 | 178 | ||
160 | lock_page(page); | 179 | page = get_meta_page_ra(sbi, blkaddr); |
161 | 180 | ||
162 | if (cp_ver != cpver_of_node(page)) | 181 | if (cp_ver != cpver_of_node(page)) |
163 | break; | 182 | break; |
@@ -178,33 +197,38 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
178 | } | 197 | } |
179 | 198 | ||
180 | /* add this fsync inode to the list */ | 199 | /* add this fsync inode to the list */ |
181 | entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); | 200 | entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); |
182 | if (!entry) { | 201 | if (!entry) { |
183 | err = -ENOMEM; | 202 | err = -ENOMEM; |
184 | break; | 203 | break; |
185 | } | 204 | } |
186 | 205 | /* | |
206 | * CP | dnode(F) | inode(DF) | ||
207 | * For this case, we should not give up now. | ||
208 | */ | ||
187 | entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); | 209 | entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); |
188 | if (IS_ERR(entry->inode)) { | 210 | if (IS_ERR(entry->inode)) { |
189 | err = PTR_ERR(entry->inode); | 211 | err = PTR_ERR(entry->inode); |
190 | kmem_cache_free(fsync_entry_slab, entry); | 212 | kmem_cache_free(fsync_entry_slab, entry); |
213 | if (err == -ENOENT) | ||
214 | goto next; | ||
191 | break; | 215 | break; |
192 | } | 216 | } |
193 | list_add_tail(&entry->list, head); | 217 | list_add_tail(&entry->list, head); |
194 | } | 218 | } |
195 | entry->blkaddr = blkaddr; | 219 | entry->blkaddr = blkaddr; |
196 | 220 | ||
197 | err = recover_inode(entry->inode, page); | 221 | if (IS_INODE(page)) { |
198 | if (err && err != -ENOENT) | 222 | entry->last_inode = blkaddr; |
199 | break; | 223 | if (is_dent_dnode(page)) |
224 | entry->last_dentry = blkaddr; | ||
225 | } | ||
200 | next: | 226 | next: |
201 | /* check next segment */ | 227 | /* check next segment */ |
202 | blkaddr = next_blkaddr_of_node(page); | 228 | blkaddr = next_blkaddr_of_node(page); |
229 | f2fs_put_page(page, 1); | ||
203 | } | 230 | } |
204 | 231 | f2fs_put_page(page, 1); | |
205 | unlock_page(page); | ||
206 | __free_pages(page, 0); | ||
207 | |||
208 | return err; | 232 | return err; |
209 | } | 233 | } |
210 | 234 | ||
@@ -277,16 +301,30 @@ got_it: | |||
277 | ino = ino_of_node(node_page); | 301 | ino = ino_of_node(node_page); |
278 | f2fs_put_page(node_page, 1); | 302 | f2fs_put_page(node_page, 1); |
279 | 303 | ||
280 | /* Deallocate previous index in the node page */ | 304 | if (ino != dn->inode->i_ino) { |
281 | inode = f2fs_iget(sbi->sb, ino); | 305 | /* Deallocate previous index in the node page */ |
282 | if (IS_ERR(inode)) | 306 | inode = f2fs_iget(sbi->sb, ino); |
283 | return PTR_ERR(inode); | 307 | if (IS_ERR(inode)) |
308 | return PTR_ERR(inode); | ||
309 | } else { | ||
310 | inode = dn->inode; | ||
311 | } | ||
284 | 312 | ||
285 | bidx = start_bidx_of_node(offset, F2FS_I(inode)) + | 313 | bidx = start_bidx_of_node(offset, F2FS_I(inode)) + |
286 | le16_to_cpu(sum.ofs_in_node); | 314 | le16_to_cpu(sum.ofs_in_node); |
287 | 315 | ||
288 | truncate_hole(inode, bidx, bidx + 1); | 316 | if (ino != dn->inode->i_ino) { |
289 | iput(inode); | 317 | truncate_hole(inode, bidx, bidx + 1); |
318 | iput(inode); | ||
319 | } else { | ||
320 | struct dnode_of_data tdn; | ||
321 | set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0); | ||
322 | if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) | ||
323 | return 0; | ||
324 | if (tdn.data_blkaddr != NULL_ADDR) | ||
325 | truncate_data_blocks_range(&tdn, 1); | ||
326 | f2fs_put_page(tdn.node_page, 1); | ||
327 | } | ||
290 | return 0; | 328 | return 0; |
291 | } | 329 | } |
292 | 330 | ||
@@ -300,14 +338,19 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
300 | struct node_info ni; | 338 | struct node_info ni; |
301 | int err = 0, recovered = 0; | 339 | int err = 0, recovered = 0; |
302 | 340 | ||
303 | recover_inline_xattr(inode, page); | 341 | /* step 1: recover xattr */ |
304 | 342 | if (IS_INODE(page)) { | |
305 | if (recover_inline_data(inode, page)) | 343 | recover_inline_xattr(inode, page); |
344 | } else if (f2fs_has_xattr_block(ofs_of_node(page))) { | ||
345 | recover_xattr_data(inode, page, blkaddr); | ||
306 | goto out; | 346 | goto out; |
347 | } | ||
307 | 348 | ||
308 | if (recover_xattr_data(inode, page, blkaddr)) | 349 | /* step 2: recover inline data */ |
350 | if (recover_inline_data(inode, page)) | ||
309 | goto out; | 351 | goto out; |
310 | 352 | ||
353 | /* step 3: recover data indices */ | ||
311 | start = start_bidx_of_node(ofs_of_node(page), fi); | 354 | start = start_bidx_of_node(ofs_of_node(page), fi); |
312 | end = start + ADDRS_PER_PAGE(page, fi); | 355 | end = start + ADDRS_PER_PAGE(page, fi); |
313 | 356 | ||
@@ -324,8 +367,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
324 | f2fs_wait_on_page_writeback(dn.node_page, NODE); | 367 | f2fs_wait_on_page_writeback(dn.node_page, NODE); |
325 | 368 | ||
326 | get_node_info(sbi, dn.nid, &ni); | 369 | get_node_info(sbi, dn.nid, &ni); |
327 | f2fs_bug_on(ni.ino != ino_of_node(page)); | 370 | f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); |
328 | f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page)); | 371 | f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); |
329 | 372 | ||
330 | for (; start < end; start++) { | 373 | for (; start < end; start++) { |
331 | block_t src, dest; | 374 | block_t src, dest; |
@@ -337,7 +380,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
337 | if (src == NULL_ADDR) { | 380 | if (src == NULL_ADDR) { |
338 | err = reserve_new_block(&dn); | 381 | err = reserve_new_block(&dn); |
339 | /* We should not get -ENOSPC */ | 382 | /* We should not get -ENOSPC */ |
340 | f2fs_bug_on(err); | 383 | f2fs_bug_on(sbi, err); |
341 | } | 384 | } |
342 | 385 | ||
343 | /* Check the previous node page having this index */ | 386 | /* Check the previous node page having this index */ |
@@ -364,8 +407,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
364 | fill_node_footer(dn.node_page, dn.nid, ni.ino, | 407 | fill_node_footer(dn.node_page, dn.nid, ni.ino, |
365 | ofs_of_node(page), false); | 408 | ofs_of_node(page), false); |
366 | set_page_dirty(dn.node_page); | 409 | set_page_dirty(dn.node_page); |
367 | |||
368 | recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); | ||
369 | err: | 410 | err: |
370 | f2fs_put_dnode(&dn); | 411 | f2fs_put_dnode(&dn); |
371 | f2fs_unlock_op(sbi); | 412 | f2fs_unlock_op(sbi); |
@@ -381,7 +422,7 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
381 | { | 422 | { |
382 | unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); | 423 | unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); |
383 | struct curseg_info *curseg; | 424 | struct curseg_info *curseg; |
384 | struct page *page; | 425 | struct page *page = NULL; |
385 | int err = 0; | 426 | int err = 0; |
386 | block_t blkaddr; | 427 | block_t blkaddr; |
387 | 428 | ||
@@ -389,32 +430,41 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
389 | curseg = CURSEG_I(sbi, type); | 430 | curseg = CURSEG_I(sbi, type); |
390 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); | 431 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
391 | 432 | ||
392 | /* read node page */ | ||
393 | page = alloc_page(GFP_F2FS_ZERO); | ||
394 | if (!page) | ||
395 | return -ENOMEM; | ||
396 | |||
397 | lock_page(page); | ||
398 | |||
399 | while (1) { | 433 | while (1) { |
400 | struct fsync_inode_entry *entry; | 434 | struct fsync_inode_entry *entry; |
401 | 435 | ||
402 | err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC); | 436 | if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) |
403 | if (err) | 437 | break; |
404 | return err; | ||
405 | 438 | ||
406 | lock_page(page); | 439 | page = get_meta_page_ra(sbi, blkaddr); |
407 | 440 | ||
408 | if (cp_ver != cpver_of_node(page)) | 441 | if (cp_ver != cpver_of_node(page)) { |
442 | f2fs_put_page(page, 1); | ||
409 | break; | 443 | break; |
444 | } | ||
410 | 445 | ||
411 | entry = get_fsync_inode(head, ino_of_node(page)); | 446 | entry = get_fsync_inode(head, ino_of_node(page)); |
412 | if (!entry) | 447 | if (!entry) |
413 | goto next; | 448 | goto next; |
414 | 449 | /* | |
450 | * inode(x) | CP | inode(x) | dnode(F) | ||
451 | * In this case, we can lose the latest inode(x). | ||
452 | * So, call recover_inode for the inode update. | ||
453 | */ | ||
454 | if (entry->last_inode == blkaddr) | ||
455 | recover_inode(entry->inode, page); | ||
456 | if (entry->last_dentry == blkaddr) { | ||
457 | err = recover_dentry(entry->inode, page); | ||
458 | if (err) { | ||
459 | f2fs_put_page(page, 1); | ||
460 | break; | ||
461 | } | ||
462 | } | ||
415 | err = do_recover_data(sbi, entry->inode, page, blkaddr); | 463 | err = do_recover_data(sbi, entry->inode, page, blkaddr); |
416 | if (err) | 464 | if (err) { |
465 | f2fs_put_page(page, 1); | ||
417 | break; | 466 | break; |
467 | } | ||
418 | 468 | ||
419 | if (entry->blkaddr == blkaddr) { | 469 | if (entry->blkaddr == blkaddr) { |
420 | iput(entry->inode); | 470 | iput(entry->inode); |
@@ -424,11 +474,8 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
424 | next: | 474 | next: |
425 | /* check next segment */ | 475 | /* check next segment */ |
426 | blkaddr = next_blkaddr_of_node(page); | 476 | blkaddr = next_blkaddr_of_node(page); |
477 | f2fs_put_page(page, 1); | ||
427 | } | 478 | } |
428 | |||
429 | unlock_page(page); | ||
430 | __free_pages(page, 0); | ||
431 | |||
432 | if (!err) | 479 | if (!err) |
433 | allocate_new_segments(sbi); | 480 | allocate_new_segments(sbi); |
434 | return err; | 481 | return err; |
@@ -452,6 +499,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) | |||
452 | /* step #1: find fsynced inode numbers */ | 499 | /* step #1: find fsynced inode numbers */ |
453 | sbi->por_doing = true; | 500 | sbi->por_doing = true; |
454 | 501 | ||
502 | /* prevent checkpoint */ | ||
503 | mutex_lock(&sbi->cp_mutex); | ||
504 | |||
455 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); | 505 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
456 | 506 | ||
457 | err = find_fsync_dnodes(sbi, &inode_list); | 507 | err = find_fsync_dnodes(sbi, &inode_list); |
@@ -465,11 +515,16 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) | |||
465 | 515 | ||
466 | /* step #2: recover data */ | 516 | /* step #2: recover data */ |
467 | err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); | 517 | err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); |
468 | f2fs_bug_on(!list_empty(&inode_list)); | 518 | if (!err) |
519 | f2fs_bug_on(sbi, !list_empty(&inode_list)); | ||
469 | out: | 520 | out: |
470 | destroy_fsync_dnodes(&inode_list); | 521 | destroy_fsync_dnodes(&inode_list); |
471 | kmem_cache_destroy(fsync_entry_slab); | 522 | kmem_cache_destroy(fsync_entry_slab); |
472 | 523 | ||
524 | /* truncate meta pages to be used by the recovery */ | ||
525 | truncate_inode_pages_range(META_MAPPING(sbi), | ||
526 | MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1); | ||
527 | |||
473 | if (err) { | 528 | if (err) { |
474 | truncate_inode_pages_final(NODE_MAPPING(sbi)); | 529 | truncate_inode_pages_final(NODE_MAPPING(sbi)); |
475 | truncate_inode_pages_final(META_MAPPING(sbi)); | 530 | truncate_inode_pages_final(META_MAPPING(sbi)); |
@@ -482,8 +537,16 @@ out: | |||
482 | /* Flush all the NAT/SIT pages */ | 537 | /* Flush all the NAT/SIT pages */ |
483 | while (get_pages(sbi, F2FS_DIRTY_META)) | 538 | while (get_pages(sbi, F2FS_DIRTY_META)) |
484 | sync_meta_pages(sbi, META, LONG_MAX); | 539 | sync_meta_pages(sbi, META, LONG_MAX); |
540 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); | ||
541 | mutex_unlock(&sbi->cp_mutex); | ||
485 | } else if (need_writecp) { | 542 | } else if (need_writecp) { |
486 | write_checkpoint(sbi, false); | 543 | struct cp_control cpc = { |
544 | .reason = CP_SYNC, | ||
545 | }; | ||
546 | mutex_unlock(&sbi->cp_mutex); | ||
547 | write_checkpoint(sbi, &cpc); | ||
548 | } else { | ||
549 | mutex_unlock(&sbi->cp_mutex); | ||
487 | } | 550 | } |
488 | return err; | 551 | return err; |
489 | } | 552 | } |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0dfeebae2a50..923cb76fdc46 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #define __reverse_ffz(x) __reverse_ffs(~(x)) | 25 | #define __reverse_ffz(x) __reverse_ffs(~(x)) |
26 | 26 | ||
27 | static struct kmem_cache *discard_entry_slab; | 27 | static struct kmem_cache *discard_entry_slab; |
28 | static struct kmem_cache *sit_entry_set_slab; | ||
29 | static struct kmem_cache *inmem_entry_slab; | ||
28 | 30 | ||
29 | /* | 31 | /* |
30 | * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since | 32 | * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since |
@@ -62,7 +64,7 @@ static inline unsigned long __reverse_ffs(unsigned long word) | |||
62 | } | 64 | } |
63 | 65 | ||
64 | /* | 66 | /* |
65 | * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue | 67 | * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because |
66 | * f2fs_set_bit makes MSB and LSB reversed in a byte. | 68 | * f2fs_set_bit makes MSB and LSB reversed in a byte. |
67 | * Example: | 69 | * Example: |
68 | * LSB <--> MSB | 70 | * LSB <--> MSB |
@@ -172,6 +174,60 @@ found_middle: | |||
172 | return result + __reverse_ffz(tmp); | 174 | return result + __reverse_ffz(tmp); |
173 | } | 175 | } |
174 | 176 | ||
177 | void register_inmem_page(struct inode *inode, struct page *page) | ||
178 | { | ||
179 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
180 | struct inmem_pages *new; | ||
181 | |||
182 | new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); | ||
183 | |||
184 | /* add atomic page indices to the list */ | ||
185 | new->page = page; | ||
186 | INIT_LIST_HEAD(&new->list); | ||
187 | |||
188 | /* increase reference count with clean state */ | ||
189 | mutex_lock(&fi->inmem_lock); | ||
190 | get_page(page); | ||
191 | list_add_tail(&new->list, &fi->inmem_pages); | ||
192 | mutex_unlock(&fi->inmem_lock); | ||
193 | } | ||
194 | |||
195 | void commit_inmem_pages(struct inode *inode, bool abort) | ||
196 | { | ||
197 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
198 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
199 | struct inmem_pages *cur, *tmp; | ||
200 | bool submit_bio = false; | ||
201 | struct f2fs_io_info fio = { | ||
202 | .type = DATA, | ||
203 | .rw = WRITE_SYNC, | ||
204 | }; | ||
205 | |||
206 | f2fs_balance_fs(sbi); | ||
207 | f2fs_lock_op(sbi); | ||
208 | |||
209 | mutex_lock(&fi->inmem_lock); | ||
210 | list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { | ||
211 | lock_page(cur->page); | ||
212 | if (!abort && cur->page->mapping == inode->i_mapping) { | ||
213 | f2fs_wait_on_page_writeback(cur->page, DATA); | ||
214 | if (clear_page_dirty_for_io(cur->page)) | ||
215 | inode_dec_dirty_pages(inode); | ||
216 | do_write_data_page(cur->page, &fio); | ||
217 | submit_bio = true; | ||
218 | } | ||
219 | f2fs_put_page(cur->page, 1); | ||
220 | list_del(&cur->list); | ||
221 | kmem_cache_free(inmem_entry_slab, cur); | ||
222 | } | ||
223 | if (submit_bio) | ||
224 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | ||
225 | mutex_unlock(&fi->inmem_lock); | ||
226 | |||
227 | filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX); | ||
228 | f2fs_unlock_op(sbi); | ||
229 | } | ||
230 | |||
175 | /* | 231 | /* |
176 | * This function balances dirty node and dentry pages. | 232 | * This function balances dirty node and dentry pages. |
177 | * In addition, it controls garbage collection. | 233 | * In addition, it controls garbage collection. |
@@ -205,24 +261,20 @@ repeat: | |||
205 | if (kthread_should_stop()) | 261 | if (kthread_should_stop()) |
206 | return 0; | 262 | return 0; |
207 | 263 | ||
208 | spin_lock(&fcc->issue_lock); | 264 | if (!llist_empty(&fcc->issue_list)) { |
209 | if (fcc->issue_list) { | ||
210 | fcc->dispatch_list = fcc->issue_list; | ||
211 | fcc->issue_list = fcc->issue_tail = NULL; | ||
212 | } | ||
213 | spin_unlock(&fcc->issue_lock); | ||
214 | |||
215 | if (fcc->dispatch_list) { | ||
216 | struct bio *bio = bio_alloc(GFP_NOIO, 0); | 265 | struct bio *bio = bio_alloc(GFP_NOIO, 0); |
217 | struct flush_cmd *cmd, *next; | 266 | struct flush_cmd *cmd, *next; |
218 | int ret; | 267 | int ret; |
219 | 268 | ||
269 | fcc->dispatch_list = llist_del_all(&fcc->issue_list); | ||
270 | fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); | ||
271 | |||
220 | bio->bi_bdev = sbi->sb->s_bdev; | 272 | bio->bi_bdev = sbi->sb->s_bdev; |
221 | ret = submit_bio_wait(WRITE_FLUSH, bio); | 273 | ret = submit_bio_wait(WRITE_FLUSH, bio); |
222 | 274 | ||
223 | for (cmd = fcc->dispatch_list; cmd; cmd = next) { | 275 | llist_for_each_entry_safe(cmd, next, |
276 | fcc->dispatch_list, llnode) { | ||
224 | cmd->ret = ret; | 277 | cmd->ret = ret; |
225 | next = cmd->next; | ||
226 | complete(&cmd->wait); | 278 | complete(&cmd->wait); |
227 | } | 279 | } |
228 | bio_put(bio); | 280 | bio_put(bio); |
@@ -230,7 +282,7 @@ repeat: | |||
230 | } | 282 | } |
231 | 283 | ||
232 | wait_event_interruptible(*q, | 284 | wait_event_interruptible(*q, |
233 | kthread_should_stop() || fcc->issue_list); | 285 | kthread_should_stop() || !llist_empty(&fcc->issue_list)); |
234 | goto repeat; | 286 | goto repeat; |
235 | } | 287 | } |
236 | 288 | ||
@@ -249,15 +301,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) | |||
249 | return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); | 301 | return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); |
250 | 302 | ||
251 | init_completion(&cmd.wait); | 303 | init_completion(&cmd.wait); |
252 | cmd.next = NULL; | ||
253 | 304 | ||
254 | spin_lock(&fcc->issue_lock); | 305 | llist_add(&cmd.llnode, &fcc->issue_list); |
255 | if (fcc->issue_list) | ||
256 | fcc->issue_tail->next = &cmd; | ||
257 | else | ||
258 | fcc->issue_list = &cmd; | ||
259 | fcc->issue_tail = &cmd; | ||
260 | spin_unlock(&fcc->issue_lock); | ||
261 | 306 | ||
262 | if (!fcc->dispatch_list) | 307 | if (!fcc->dispatch_list) |
263 | wake_up(&fcc->flush_wait_queue); | 308 | wake_up(&fcc->flush_wait_queue); |
@@ -276,8 +321,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) | |||
276 | fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); | 321 | fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); |
277 | if (!fcc) | 322 | if (!fcc) |
278 | return -ENOMEM; | 323 | return -ENOMEM; |
279 | spin_lock_init(&fcc->issue_lock); | ||
280 | init_waitqueue_head(&fcc->flush_wait_queue); | 324 | init_waitqueue_head(&fcc->flush_wait_queue); |
325 | init_llist_head(&fcc->issue_list); | ||
281 | SM_I(sbi)->cmd_control_info = fcc; | 326 | SM_I(sbi)->cmd_control_info = fcc; |
282 | fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, | 327 | fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, |
283 | "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); | 328 | "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); |
@@ -317,6 +362,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, | |||
317 | struct seg_entry *sentry = get_seg_entry(sbi, segno); | 362 | struct seg_entry *sentry = get_seg_entry(sbi, segno); |
318 | enum dirty_type t = sentry->type; | 363 | enum dirty_type t = sentry->type; |
319 | 364 | ||
365 | if (unlikely(t >= DIRTY)) { | ||
366 | f2fs_bug_on(sbi, 1); | ||
367 | return; | ||
368 | } | ||
320 | if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) | 369 | if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) |
321 | dirty_i->nr_dirty[t]++; | 370 | dirty_i->nr_dirty[t]++; |
322 | } | 371 | } |
@@ -376,8 +425,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) | |||
376 | static int f2fs_issue_discard(struct f2fs_sb_info *sbi, | 425 | static int f2fs_issue_discard(struct f2fs_sb_info *sbi, |
377 | block_t blkstart, block_t blklen) | 426 | block_t blkstart, block_t blklen) |
378 | { | 427 | { |
379 | sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); | 428 | sector_t start = SECTOR_FROM_BLOCK(blkstart); |
380 | sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); | 429 | sector_t len = SECTOR_FROM_BLOCK(blklen); |
381 | trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); | 430 | trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); |
382 | return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); | 431 | return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); |
383 | } | 432 | } |
@@ -392,21 +441,47 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) | |||
392 | } | 441 | } |
393 | } | 442 | } |
394 | 443 | ||
395 | static void add_discard_addrs(struct f2fs_sb_info *sbi, | 444 | static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) |
396 | unsigned int segno, struct seg_entry *se) | ||
397 | { | 445 | { |
398 | struct list_head *head = &SM_I(sbi)->discard_list; | 446 | struct list_head *head = &SM_I(sbi)->discard_list; |
399 | struct discard_entry *new; | 447 | struct discard_entry *new; |
400 | int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); | 448 | int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); |
401 | int max_blocks = sbi->blocks_per_seg; | 449 | int max_blocks = sbi->blocks_per_seg; |
450 | struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); | ||
402 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; | 451 | unsigned long *cur_map = (unsigned long *)se->cur_valid_map; |
403 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; | 452 | unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; |
404 | unsigned long dmap[entries]; | 453 | unsigned long dmap[entries]; |
405 | unsigned int start = 0, end = -1; | 454 | unsigned int start = 0, end = -1; |
455 | bool force = (cpc->reason == CP_DISCARD); | ||
406 | int i; | 456 | int i; |
407 | 457 | ||
408 | if (!test_opt(sbi, DISCARD)) | 458 | if (!force && !test_opt(sbi, DISCARD)) |
459 | return; | ||
460 | |||
461 | if (force && !se->valid_blocks) { | ||
462 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | ||
463 | /* | ||
464 | * if this segment is registered in the prefree list, then | ||
465 | * we should skip adding a discard candidate, and let the | ||
466 | * checkpoint do that later. | ||
467 | */ | ||
468 | mutex_lock(&dirty_i->seglist_lock); | ||
469 | if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) { | ||
470 | mutex_unlock(&dirty_i->seglist_lock); | ||
471 | cpc->trimmed += sbi->blocks_per_seg; | ||
472 | return; | ||
473 | } | ||
474 | mutex_unlock(&dirty_i->seglist_lock); | ||
475 | |||
476 | new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); | ||
477 | INIT_LIST_HEAD(&new->list); | ||
478 | new->blkaddr = START_BLOCK(sbi, cpc->trim_start); | ||
479 | new->len = sbi->blocks_per_seg; | ||
480 | list_add_tail(&new->list, head); | ||
481 | SM_I(sbi)->nr_discards += sbi->blocks_per_seg; | ||
482 | cpc->trimmed += sbi->blocks_per_seg; | ||
409 | return; | 483 | return; |
484 | } | ||
410 | 485 | ||
411 | /* zero block will be discarded through the prefree list */ | 486 | /* zero block will be discarded through the prefree list */ |
412 | if (!se->valid_blocks || se->valid_blocks == max_blocks) | 487 | if (!se->valid_blocks || se->valid_blocks == max_blocks) |
@@ -416,23 +491,39 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, | |||
416 | for (i = 0; i < entries; i++) | 491 | for (i = 0; i < entries; i++) |
417 | dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; | 492 | dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; |
418 | 493 | ||
419 | while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { | 494 | while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { |
420 | start = __find_rev_next_bit(dmap, max_blocks, end + 1); | 495 | start = __find_rev_next_bit(dmap, max_blocks, end + 1); |
421 | if (start >= max_blocks) | 496 | if (start >= max_blocks) |
422 | break; | 497 | break; |
423 | 498 | ||
424 | end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); | 499 | end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); |
425 | 500 | ||
501 | if (end - start < cpc->trim_minlen) | ||
502 | continue; | ||
503 | |||
426 | new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); | 504 | new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); |
427 | INIT_LIST_HEAD(&new->list); | 505 | INIT_LIST_HEAD(&new->list); |
428 | new->blkaddr = START_BLOCK(sbi, segno) + start; | 506 | new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; |
429 | new->len = end - start; | 507 | new->len = end - start; |
508 | cpc->trimmed += end - start; | ||
430 | 509 | ||
431 | list_add_tail(&new->list, head); | 510 | list_add_tail(&new->list, head); |
432 | SM_I(sbi)->nr_discards += end - start; | 511 | SM_I(sbi)->nr_discards += end - start; |
433 | } | 512 | } |
434 | } | 513 | } |
435 | 514 | ||
515 | void release_discard_addrs(struct f2fs_sb_info *sbi) | ||
516 | { | ||
517 | struct list_head *head = &(SM_I(sbi)->discard_list); | ||
518 | struct discard_entry *entry, *this; | ||
519 | |||
520 | /* drop caches */ | ||
521 | list_for_each_entry_safe(entry, this, head, list) { | ||
522 | list_del(&entry->list); | ||
523 | kmem_cache_free(discard_entry_slab, entry); | ||
524 | } | ||
525 | } | ||
526 | |||
436 | /* | 527 | /* |
437 | * Should call clear_prefree_segments after checkpoint is done. | 528 | * Should call clear_prefree_segments after checkpoint is done. |
438 | */ | 529 | */ |
@@ -440,10 +531,9 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) | |||
440 | { | 531 | { |
441 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 532 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
442 | unsigned int segno; | 533 | unsigned int segno; |
443 | unsigned int total_segs = TOTAL_SEGS(sbi); | ||
444 | 534 | ||
445 | mutex_lock(&dirty_i->seglist_lock); | 535 | mutex_lock(&dirty_i->seglist_lock); |
446 | for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs) | 536 | for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) |
447 | __set_test_and_free(sbi, segno); | 537 | __set_test_and_free(sbi, segno); |
448 | mutex_unlock(&dirty_i->seglist_lock); | 538 | mutex_unlock(&dirty_i->seglist_lock); |
449 | } | 539 | } |
@@ -454,17 +544,17 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) | |||
454 | struct discard_entry *entry, *this; | 544 | struct discard_entry *entry, *this; |
455 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 545 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
456 | unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; | 546 | unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; |
457 | unsigned int total_segs = TOTAL_SEGS(sbi); | ||
458 | unsigned int start = 0, end = -1; | 547 | unsigned int start = 0, end = -1; |
459 | 548 | ||
460 | mutex_lock(&dirty_i->seglist_lock); | 549 | mutex_lock(&dirty_i->seglist_lock); |
461 | 550 | ||
462 | while (1) { | 551 | while (1) { |
463 | int i; | 552 | int i; |
464 | start = find_next_bit(prefree_map, total_segs, end + 1); | 553 | start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); |
465 | if (start >= total_segs) | 554 | if (start >= MAIN_SEGS(sbi)) |
466 | break; | 555 | break; |
467 | end = find_next_zero_bit(prefree_map, total_segs, start + 1); | 556 | end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), |
557 | start + 1); | ||
468 | 558 | ||
469 | for (i = start; i < end; i++) | 559 | for (i = start; i < end; i++) |
470 | clear_bit(i, prefree_map); | 560 | clear_bit(i, prefree_map); |
@@ -488,11 +578,16 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) | |||
488 | } | 578 | } |
489 | } | 579 | } |
490 | 580 | ||
491 | static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) | 581 | static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) |
492 | { | 582 | { |
493 | struct sit_info *sit_i = SIT_I(sbi); | 583 | struct sit_info *sit_i = SIT_I(sbi); |
494 | if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) | 584 | |
585 | if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) { | ||
495 | sit_i->dirty_sentries++; | 586 | sit_i->dirty_sentries++; |
587 | return false; | ||
588 | } | ||
589 | |||
590 | return true; | ||
496 | } | 591 | } |
497 | 592 | ||
498 | static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, | 593 | static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, |
@@ -516,7 +611,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) | |||
516 | new_vblocks = se->valid_blocks + del; | 611 | new_vblocks = se->valid_blocks + del; |
517 | offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); | 612 | offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); |
518 | 613 | ||
519 | f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || | 614 | f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) || |
520 | (new_vblocks > sbi->blocks_per_seg))); | 615 | (new_vblocks > sbi->blocks_per_seg))); |
521 | 616 | ||
522 | se->valid_blocks = new_vblocks; | 617 | se->valid_blocks = new_vblocks; |
@@ -526,10 +621,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) | |||
526 | /* Update valid block bitmap */ | 621 | /* Update valid block bitmap */ |
527 | if (del > 0) { | 622 | if (del > 0) { |
528 | if (f2fs_set_bit(offset, se->cur_valid_map)) | 623 | if (f2fs_set_bit(offset, se->cur_valid_map)) |
529 | BUG(); | 624 | f2fs_bug_on(sbi, 1); |
530 | } else { | 625 | } else { |
531 | if (!f2fs_clear_bit(offset, se->cur_valid_map)) | 626 | if (!f2fs_clear_bit(offset, se->cur_valid_map)) |
532 | BUG(); | 627 | f2fs_bug_on(sbi, 1); |
533 | } | 628 | } |
534 | if (!f2fs_test_bit(offset, se->ckpt_valid_map)) | 629 | if (!f2fs_test_bit(offset, se->ckpt_valid_map)) |
535 | se->ckpt_valid_blocks += del; | 630 | se->ckpt_valid_blocks += del; |
@@ -558,7 +653,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) | |||
558 | unsigned int segno = GET_SEGNO(sbi, addr); | 653 | unsigned int segno = GET_SEGNO(sbi, addr); |
559 | struct sit_info *sit_i = SIT_I(sbi); | 654 | struct sit_info *sit_i = SIT_I(sbi); |
560 | 655 | ||
561 | f2fs_bug_on(addr == NULL_ADDR); | 656 | f2fs_bug_on(sbi, addr == NULL_ADDR); |
562 | if (addr == NEW_ADDR) | 657 | if (addr == NEW_ADDR) |
563 | return; | 658 | return; |
564 | 659 | ||
@@ -634,7 +729,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) | |||
634 | unsigned int segno = curseg->segno + 1; | 729 | unsigned int segno = curseg->segno + 1; |
635 | struct free_segmap_info *free_i = FREE_I(sbi); | 730 | struct free_segmap_info *free_i = FREE_I(sbi); |
636 | 731 | ||
637 | if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) | 732 | if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) |
638 | return !test_bit(segno, free_i->free_segmap); | 733 | return !test_bit(segno, free_i->free_segmap); |
639 | return 0; | 734 | return 0; |
640 | } | 735 | } |
@@ -648,7 +743,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi, | |||
648 | { | 743 | { |
649 | struct free_segmap_info *free_i = FREE_I(sbi); | 744 | struct free_segmap_info *free_i = FREE_I(sbi); |
650 | unsigned int segno, secno, zoneno; | 745 | unsigned int segno, secno, zoneno; |
651 | unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; | 746 | unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; |
652 | unsigned int hint = *newseg / sbi->segs_per_sec; | 747 | unsigned int hint = *newseg / sbi->segs_per_sec; |
653 | unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); | 748 | unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); |
654 | unsigned int left_start = hint; | 749 | unsigned int left_start = hint; |
@@ -660,18 +755,18 @@ static void get_new_segment(struct f2fs_sb_info *sbi, | |||
660 | 755 | ||
661 | if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { | 756 | if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { |
662 | segno = find_next_zero_bit(free_i->free_segmap, | 757 | segno = find_next_zero_bit(free_i->free_segmap, |
663 | TOTAL_SEGS(sbi), *newseg + 1); | 758 | MAIN_SEGS(sbi), *newseg + 1); |
664 | if (segno - *newseg < sbi->segs_per_sec - | 759 | if (segno - *newseg < sbi->segs_per_sec - |
665 | (*newseg % sbi->segs_per_sec)) | 760 | (*newseg % sbi->segs_per_sec)) |
666 | goto got_it; | 761 | goto got_it; |
667 | } | 762 | } |
668 | find_other_zone: | 763 | find_other_zone: |
669 | secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); | 764 | secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); |
670 | if (secno >= TOTAL_SECS(sbi)) { | 765 | if (secno >= MAIN_SECS(sbi)) { |
671 | if (dir == ALLOC_RIGHT) { | 766 | if (dir == ALLOC_RIGHT) { |
672 | secno = find_next_zero_bit(free_i->free_secmap, | 767 | secno = find_next_zero_bit(free_i->free_secmap, |
673 | TOTAL_SECS(sbi), 0); | 768 | MAIN_SECS(sbi), 0); |
674 | f2fs_bug_on(secno >= TOTAL_SECS(sbi)); | 769 | f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); |
675 | } else { | 770 | } else { |
676 | go_left = 1; | 771 | go_left = 1; |
677 | left_start = hint - 1; | 772 | left_start = hint - 1; |
@@ -686,8 +781,8 @@ find_other_zone: | |||
686 | continue; | 781 | continue; |
687 | } | 782 | } |
688 | left_start = find_next_zero_bit(free_i->free_secmap, | 783 | left_start = find_next_zero_bit(free_i->free_secmap, |
689 | TOTAL_SECS(sbi), 0); | 784 | MAIN_SECS(sbi), 0); |
690 | f2fs_bug_on(left_start >= TOTAL_SECS(sbi)); | 785 | f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); |
691 | break; | 786 | break; |
692 | } | 787 | } |
693 | secno = left_start; | 788 | secno = left_start; |
@@ -726,7 +821,7 @@ skip_left: | |||
726 | } | 821 | } |
727 | got_it: | 822 | got_it: |
728 | /* set it as dirty segment in free segmap */ | 823 | /* set it as dirty segment in free segmap */ |
729 | f2fs_bug_on(test_bit(segno, free_i->free_segmap)); | 824 | f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); |
730 | __set_inuse(sbi, segno); | 825 | __set_inuse(sbi, segno); |
731 | *newseg = segno; | 826 | *newseg = segno; |
732 | write_unlock(&free_i->segmap_lock); | 827 | write_unlock(&free_i->segmap_lock); |
@@ -808,7 +903,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, | |||
808 | } | 903 | } |
809 | 904 | ||
810 | /* | 905 | /* |
811 | * This function always allocates a used segment (from dirty seglist) by SSR | 906 | * This function always allocates a used segment(from dirty seglist) by SSR |
812 | * manner, so it should recover the existing segment information of valid blocks | 907 | * manner, so it should recover the existing segment information of valid blocks |
813 | */ | 908 | */ |
814 | static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) | 909 | static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) |
@@ -898,6 +993,37 @@ static const struct segment_allocation default_salloc_ops = { | |||
898 | .allocate_segment = allocate_segment_by_default, | 993 | .allocate_segment = allocate_segment_by_default, |
899 | }; | 994 | }; |
900 | 995 | ||
996 | int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) | ||
997 | { | ||
998 | __u64 start = range->start >> sbi->log_blocksize; | ||
999 | __u64 end = start + (range->len >> sbi->log_blocksize) - 1; | ||
1000 | unsigned int start_segno, end_segno; | ||
1001 | struct cp_control cpc; | ||
1002 | |||
1003 | if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) || | ||
1004 | range->len < sbi->blocksize) | ||
1005 | return -EINVAL; | ||
1006 | |||
1007 | if (end <= MAIN_BLKADDR(sbi)) | ||
1008 | goto out; | ||
1009 | |||
1010 | /* start/end segment number in main_area */ | ||
1011 | start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); | ||
1012 | end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : | ||
1013 | GET_SEGNO(sbi, end); | ||
1014 | cpc.reason = CP_DISCARD; | ||
1015 | cpc.trim_start = start_segno; | ||
1016 | cpc.trim_end = end_segno; | ||
1017 | cpc.trim_minlen = range->minlen >> sbi->log_blocksize; | ||
1018 | cpc.trimmed = 0; | ||
1019 | |||
1020 | /* do checkpoint to issue discard commands safely */ | ||
1021 | write_checkpoint(sbi, &cpc); | ||
1022 | out: | ||
1023 | range->len = cpc.trimmed << sbi->log_blocksize; | ||
1024 | return 0; | ||
1025 | } | ||
1026 | |||
901 | static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) | 1027 | static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) |
902 | { | 1028 | { |
903 | struct curseg_info *curseg = CURSEG_I(sbi, type); | 1029 | struct curseg_info *curseg = CURSEG_I(sbi, type); |
@@ -953,15 +1079,15 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type) | |||
953 | 1079 | ||
954 | static int __get_segment_type(struct page *page, enum page_type p_type) | 1080 | static int __get_segment_type(struct page *page, enum page_type p_type) |
955 | { | 1081 | { |
956 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | 1082 | switch (F2FS_P_SB(page)->active_logs) { |
957 | switch (sbi->active_logs) { | ||
958 | case 2: | 1083 | case 2: |
959 | return __get_segment_type_2(page, p_type); | 1084 | return __get_segment_type_2(page, p_type); |
960 | case 4: | 1085 | case 4: |
961 | return __get_segment_type_4(page, p_type); | 1086 | return __get_segment_type_4(page, p_type); |
962 | } | 1087 | } |
963 | /* NR_CURSEG_TYPE(6) logs by default */ | 1088 | /* NR_CURSEG_TYPE(6) logs by default */ |
964 | f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE); | 1089 | f2fs_bug_on(F2FS_P_SB(page), |
1090 | F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE); | ||
965 | return __get_segment_type_6(page, p_type); | 1091 | return __get_segment_type_6(page, p_type); |
966 | } | 1092 | } |
967 | 1093 | ||
@@ -1041,11 +1167,11 @@ void write_node_page(struct f2fs_sb_info *sbi, struct page *page, | |||
1041 | void write_data_page(struct page *page, struct dnode_of_data *dn, | 1167 | void write_data_page(struct page *page, struct dnode_of_data *dn, |
1042 | block_t *new_blkaddr, struct f2fs_io_info *fio) | 1168 | block_t *new_blkaddr, struct f2fs_io_info *fio) |
1043 | { | 1169 | { |
1044 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 1170 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
1045 | struct f2fs_summary sum; | 1171 | struct f2fs_summary sum; |
1046 | struct node_info ni; | 1172 | struct node_info ni; |
1047 | 1173 | ||
1048 | f2fs_bug_on(dn->data_blkaddr == NULL_ADDR); | 1174 | f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); |
1049 | get_node_info(sbi, dn->nid, &ni); | 1175 | get_node_info(sbi, dn->nid, &ni); |
1050 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); | 1176 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); |
1051 | 1177 | ||
@@ -1055,9 +1181,7 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, | |||
1055 | void rewrite_data_page(struct page *page, block_t old_blkaddr, | 1181 | void rewrite_data_page(struct page *page, block_t old_blkaddr, |
1056 | struct f2fs_io_info *fio) | 1182 | struct f2fs_io_info *fio) |
1057 | { | 1183 | { |
1058 | struct inode *inode = page->mapping->host; | 1184 | f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio); |
1059 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
1060 | f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio); | ||
1061 | } | 1185 | } |
1062 | 1186 | ||
1063 | void recover_data_page(struct f2fs_sb_info *sbi, | 1187 | void recover_data_page(struct f2fs_sb_info *sbi, |
@@ -1103,55 +1227,6 @@ void recover_data_page(struct f2fs_sb_info *sbi, | |||
1103 | mutex_unlock(&curseg->curseg_mutex); | 1227 | mutex_unlock(&curseg->curseg_mutex); |
1104 | } | 1228 | } |
1105 | 1229 | ||
1106 | void rewrite_node_page(struct f2fs_sb_info *sbi, | ||
1107 | struct page *page, struct f2fs_summary *sum, | ||
1108 | block_t old_blkaddr, block_t new_blkaddr) | ||
1109 | { | ||
1110 | struct sit_info *sit_i = SIT_I(sbi); | ||
1111 | int type = CURSEG_WARM_NODE; | ||
1112 | struct curseg_info *curseg; | ||
1113 | unsigned int segno, old_cursegno; | ||
1114 | block_t next_blkaddr = next_blkaddr_of_node(page); | ||
1115 | unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); | ||
1116 | struct f2fs_io_info fio = { | ||
1117 | .type = NODE, | ||
1118 | .rw = WRITE_SYNC, | ||
1119 | }; | ||
1120 | |||
1121 | curseg = CURSEG_I(sbi, type); | ||
1122 | |||
1123 | mutex_lock(&curseg->curseg_mutex); | ||
1124 | mutex_lock(&sit_i->sentry_lock); | ||
1125 | |||
1126 | segno = GET_SEGNO(sbi, new_blkaddr); | ||
1127 | old_cursegno = curseg->segno; | ||
1128 | |||
1129 | /* change the current segment */ | ||
1130 | if (segno != curseg->segno) { | ||
1131 | curseg->next_segno = segno; | ||
1132 | change_curseg(sbi, type, true); | ||
1133 | } | ||
1134 | curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); | ||
1135 | __add_sum_entry(sbi, type, sum); | ||
1136 | |||
1137 | /* change the current log to the next block addr in advance */ | ||
1138 | if (next_segno != segno) { | ||
1139 | curseg->next_segno = next_segno; | ||
1140 | change_curseg(sbi, type, true); | ||
1141 | } | ||
1142 | curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr); | ||
1143 | |||
1144 | /* rewrite node page */ | ||
1145 | set_page_writeback(page); | ||
1146 | f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); | ||
1147 | f2fs_submit_merged_bio(sbi, NODE, WRITE); | ||
1148 | refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); | ||
1149 | locate_dirty_segment(sbi, old_cursegno); | ||
1150 | |||
1151 | mutex_unlock(&sit_i->sentry_lock); | ||
1152 | mutex_unlock(&curseg->curseg_mutex); | ||
1153 | } | ||
1154 | |||
1155 | static inline bool is_merged_page(struct f2fs_sb_info *sbi, | 1230 | static inline bool is_merged_page(struct f2fs_sb_info *sbi, |
1156 | struct page *page, enum page_type type) | 1231 | struct page *page, enum page_type type) |
1157 | { | 1232 | { |
@@ -1179,8 +1254,9 @@ out: | |||
1179 | void f2fs_wait_on_page_writeback(struct page *page, | 1254 | void f2fs_wait_on_page_writeback(struct page *page, |
1180 | enum page_type type) | 1255 | enum page_type type) |
1181 | { | 1256 | { |
1182 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | ||
1183 | if (PageWriteback(page)) { | 1257 | if (PageWriteback(page)) { |
1258 | struct f2fs_sb_info *sbi = F2FS_P_SB(page); | ||
1259 | |||
1184 | if (is_merged_page(sbi, page, type)) | 1260 | if (is_merged_page(sbi, page, type)) |
1185 | f2fs_submit_merged_bio(sbi, type, WRITE); | 1261 | f2fs_submit_merged_bio(sbi, type, WRITE); |
1186 | wait_on_page_writeback(page); | 1262 | wait_on_page_writeback(page); |
@@ -1449,7 +1525,7 @@ static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, | |||
1449 | unsigned int segno) | 1525 | unsigned int segno) |
1450 | { | 1526 | { |
1451 | struct sit_info *sit_i = SIT_I(sbi); | 1527 | struct sit_info *sit_i = SIT_I(sbi); |
1452 | unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); | 1528 | unsigned int offset = SIT_BLOCK_OFFSET(segno); |
1453 | block_t blk_addr = sit_i->sit_base_addr + offset; | 1529 | block_t blk_addr = sit_i->sit_base_addr + offset; |
1454 | 1530 | ||
1455 | check_seg_range(sbi, segno); | 1531 | check_seg_range(sbi, segno); |
@@ -1475,7 +1551,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, | |||
1475 | /* get current sit block page without lock */ | 1551 | /* get current sit block page without lock */ |
1476 | src_page = get_meta_page(sbi, src_off); | 1552 | src_page = get_meta_page(sbi, src_off); |
1477 | dst_page = grab_meta_page(sbi, dst_off); | 1553 | dst_page = grab_meta_page(sbi, dst_off); |
1478 | f2fs_bug_on(PageDirty(src_page)); | 1554 | f2fs_bug_on(sbi, PageDirty(src_page)); |
1479 | 1555 | ||
1480 | src_addr = page_address(src_page); | 1556 | src_addr = page_address(src_page); |
1481 | dst_addr = page_address(dst_page); | 1557 | dst_addr = page_address(dst_page); |
@@ -1489,101 +1565,192 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, | |||
1489 | return dst_page; | 1565 | return dst_page; |
1490 | } | 1566 | } |
1491 | 1567 | ||
1492 | static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) | 1568 | static struct sit_entry_set *grab_sit_entry_set(void) |
1569 | { | ||
1570 | struct sit_entry_set *ses = | ||
1571 | f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC); | ||
1572 | |||
1573 | ses->entry_cnt = 0; | ||
1574 | INIT_LIST_HEAD(&ses->set_list); | ||
1575 | return ses; | ||
1576 | } | ||
1577 | |||
1578 | static void release_sit_entry_set(struct sit_entry_set *ses) | ||
1579 | { | ||
1580 | list_del(&ses->set_list); | ||
1581 | kmem_cache_free(sit_entry_set_slab, ses); | ||
1582 | } | ||
1583 | |||
1584 | static void adjust_sit_entry_set(struct sit_entry_set *ses, | ||
1585 | struct list_head *head) | ||
1586 | { | ||
1587 | struct sit_entry_set *next = ses; | ||
1588 | |||
1589 | if (list_is_last(&ses->set_list, head)) | ||
1590 | return; | ||
1591 | |||
1592 | list_for_each_entry_continue(next, head, set_list) | ||
1593 | if (ses->entry_cnt <= next->entry_cnt) | ||
1594 | break; | ||
1595 | |||
1596 | list_move_tail(&ses->set_list, &next->set_list); | ||
1597 | } | ||
1598 | |||
1599 | static void add_sit_entry(unsigned int segno, struct list_head *head) | ||
1600 | { | ||
1601 | struct sit_entry_set *ses; | ||
1602 | unsigned int start_segno = START_SEGNO(segno); | ||
1603 | |||
1604 | list_for_each_entry(ses, head, set_list) { | ||
1605 | if (ses->start_segno == start_segno) { | ||
1606 | ses->entry_cnt++; | ||
1607 | adjust_sit_entry_set(ses, head); | ||
1608 | return; | ||
1609 | } | ||
1610 | } | ||
1611 | |||
1612 | ses = grab_sit_entry_set(); | ||
1613 | |||
1614 | ses->start_segno = start_segno; | ||
1615 | ses->entry_cnt++; | ||
1616 | list_add(&ses->set_list, head); | ||
1617 | } | ||
1618 | |||
1619 | static void add_sits_in_set(struct f2fs_sb_info *sbi) | ||
1620 | { | ||
1621 | struct f2fs_sm_info *sm_info = SM_I(sbi); | ||
1622 | struct list_head *set_list = &sm_info->sit_entry_set; | ||
1623 | unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap; | ||
1624 | unsigned int segno; | ||
1625 | |||
1626 | for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi)) | ||
1627 | add_sit_entry(segno, set_list); | ||
1628 | } | ||
1629 | |||
1630 | static void remove_sits_in_journal(struct f2fs_sb_info *sbi) | ||
1493 | { | 1631 | { |
1494 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); | 1632 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); |
1495 | struct f2fs_summary_block *sum = curseg->sum_blk; | 1633 | struct f2fs_summary_block *sum = curseg->sum_blk; |
1496 | int i; | 1634 | int i; |
1497 | 1635 | ||
1498 | /* | 1636 | for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { |
1499 | * If the journal area in the current summary is full of sit entries, | 1637 | unsigned int segno; |
1500 | * all the sit entries will be flushed. Otherwise the sit entries | 1638 | bool dirtied; |
1501 | * are not able to replace with newly hot sit entries. | 1639 | |
1502 | */ | 1640 | segno = le32_to_cpu(segno_in_journal(sum, i)); |
1503 | if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { | 1641 | dirtied = __mark_sit_entry_dirty(sbi, segno); |
1504 | for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { | 1642 | |
1505 | unsigned int segno; | 1643 | if (!dirtied) |
1506 | segno = le32_to_cpu(segno_in_journal(sum, i)); | 1644 | add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); |
1507 | __mark_sit_entry_dirty(sbi, segno); | ||
1508 | } | ||
1509 | update_sits_in_cursum(sum, -sits_in_cursum(sum)); | ||
1510 | return true; | ||
1511 | } | 1645 | } |
1512 | return false; | 1646 | update_sits_in_cursum(sum, -sits_in_cursum(sum)); |
1513 | } | 1647 | } |
1514 | 1648 | ||
1515 | /* | 1649 | /* |
1516 | * CP calls this function, which flushes SIT entries including sit_journal, | 1650 | * CP calls this function, which flushes SIT entries including sit_journal, |
1517 | * and moves prefree segs to free segs. | 1651 | * and moves prefree segs to free segs. |
1518 | */ | 1652 | */ |
1519 | void flush_sit_entries(struct f2fs_sb_info *sbi) | 1653 | void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) |
1520 | { | 1654 | { |
1521 | struct sit_info *sit_i = SIT_I(sbi); | 1655 | struct sit_info *sit_i = SIT_I(sbi); |
1522 | unsigned long *bitmap = sit_i->dirty_sentries_bitmap; | 1656 | unsigned long *bitmap = sit_i->dirty_sentries_bitmap; |
1523 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); | 1657 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); |
1524 | struct f2fs_summary_block *sum = curseg->sum_blk; | 1658 | struct f2fs_summary_block *sum = curseg->sum_blk; |
1525 | unsigned long nsegs = TOTAL_SEGS(sbi); | 1659 | struct sit_entry_set *ses, *tmp; |
1526 | struct page *page = NULL; | 1660 | struct list_head *head = &SM_I(sbi)->sit_entry_set; |
1527 | struct f2fs_sit_block *raw_sit = NULL; | 1661 | bool to_journal = true; |
1528 | unsigned int start = 0, end = 0; | 1662 | struct seg_entry *se; |
1529 | unsigned int segno; | ||
1530 | bool flushed; | ||
1531 | 1663 | ||
1532 | mutex_lock(&curseg->curseg_mutex); | 1664 | mutex_lock(&curseg->curseg_mutex); |
1533 | mutex_lock(&sit_i->sentry_lock); | 1665 | mutex_lock(&sit_i->sentry_lock); |
1534 | 1666 | ||
1535 | /* | 1667 | /* |
1536 | * "flushed" indicates whether sit entries in journal are flushed | 1668 | * add and account sit entries of dirty bitmap in sit entry |
1537 | * to the SIT area or not. | 1669 | * set temporarily |
1538 | */ | 1670 | */ |
1539 | flushed = flush_sits_in_journal(sbi); | 1671 | add_sits_in_set(sbi); |
1540 | 1672 | ||
1541 | for_each_set_bit(segno, bitmap, nsegs) { | 1673 | /* |
1542 | struct seg_entry *se = get_seg_entry(sbi, segno); | 1674 | * if there are no enough space in journal to store dirty sit |
1543 | int sit_offset, offset; | 1675 | * entries, remove all entries from journal and add and account |
1676 | * them in sit entry set. | ||
1677 | */ | ||
1678 | if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) | ||
1679 | remove_sits_in_journal(sbi); | ||
1544 | 1680 | ||
1545 | sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); | 1681 | if (!sit_i->dirty_sentries) |
1682 | goto out; | ||
1546 | 1683 | ||
1547 | /* add discard candidates */ | 1684 | /* |
1548 | if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) | 1685 | * there are two steps to flush sit entries: |
1549 | add_discard_addrs(sbi, segno, se); | 1686 | * #1, flush sit entries to journal in current cold data summary block. |
1687 | * #2, flush sit entries to sit page. | ||
1688 | */ | ||
1689 | list_for_each_entry_safe(ses, tmp, head, set_list) { | ||
1690 | struct page *page; | ||
1691 | struct f2fs_sit_block *raw_sit = NULL; | ||
1692 | unsigned int start_segno = ses->start_segno; | ||
1693 | unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, | ||
1694 | (unsigned long)MAIN_SEGS(sbi)); | ||
1695 | unsigned int segno = start_segno; | ||
1696 | |||
1697 | if (to_journal && | ||
1698 | !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL)) | ||
1699 | to_journal = false; | ||
1700 | |||
1701 | if (!to_journal) { | ||
1702 | page = get_next_sit_page(sbi, start_segno); | ||
1703 | raw_sit = page_address(page); | ||
1704 | } | ||
1550 | 1705 | ||
1551 | if (flushed) | 1706 | /* flush dirty sit entries in region of current sit set */ |
1552 | goto to_sit_page; | 1707 | for_each_set_bit_from(segno, bitmap, end) { |
1708 | int offset, sit_offset; | ||
1553 | 1709 | ||
1554 | offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); | 1710 | se = get_seg_entry(sbi, segno); |
1555 | if (offset >= 0) { | 1711 | |
1556 | segno_in_journal(sum, offset) = cpu_to_le32(segno); | 1712 | /* add discard candidates */ |
1557 | seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); | 1713 | if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) { |
1558 | goto flush_done; | 1714 | cpc->trim_start = segno; |
1559 | } | 1715 | add_discard_addrs(sbi, cpc); |
1560 | to_sit_page: | ||
1561 | if (!page || (start > segno) || (segno > end)) { | ||
1562 | if (page) { | ||
1563 | f2fs_put_page(page, 1); | ||
1564 | page = NULL; | ||
1565 | } | 1716 | } |
1566 | 1717 | ||
1567 | start = START_SEGNO(sit_i, segno); | 1718 | if (to_journal) { |
1568 | end = start + SIT_ENTRY_PER_BLOCK - 1; | 1719 | offset = lookup_journal_in_cursum(sum, |
1720 | SIT_JOURNAL, segno, 1); | ||
1721 | f2fs_bug_on(sbi, offset < 0); | ||
1722 | segno_in_journal(sum, offset) = | ||
1723 | cpu_to_le32(segno); | ||
1724 | seg_info_to_raw_sit(se, | ||
1725 | &sit_in_journal(sum, offset)); | ||
1726 | } else { | ||
1727 | sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); | ||
1728 | seg_info_to_raw_sit(se, | ||
1729 | &raw_sit->entries[sit_offset]); | ||
1730 | } | ||
1569 | 1731 | ||
1570 | /* read sit block that will be updated */ | 1732 | __clear_bit(segno, bitmap); |
1571 | page = get_next_sit_page(sbi, start); | 1733 | sit_i->dirty_sentries--; |
1572 | raw_sit = page_address(page); | 1734 | ses->entry_cnt--; |
1573 | } | 1735 | } |
1574 | 1736 | ||
1575 | /* udpate entry in SIT block */ | 1737 | if (!to_journal) |
1576 | seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); | 1738 | f2fs_put_page(page, 1); |
1577 | flush_done: | 1739 | |
1578 | __clear_bit(segno, bitmap); | 1740 | f2fs_bug_on(sbi, ses->entry_cnt); |
1579 | sit_i->dirty_sentries--; | 1741 | release_sit_entry_set(ses); |
1742 | } | ||
1743 | |||
1744 | f2fs_bug_on(sbi, !list_empty(head)); | ||
1745 | f2fs_bug_on(sbi, sit_i->dirty_sentries); | ||
1746 | out: | ||
1747 | if (cpc->reason == CP_DISCARD) { | ||
1748 | for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) | ||
1749 | add_discard_addrs(sbi, cpc); | ||
1580 | } | 1750 | } |
1581 | mutex_unlock(&sit_i->sentry_lock); | 1751 | mutex_unlock(&sit_i->sentry_lock); |
1582 | mutex_unlock(&curseg->curseg_mutex); | 1752 | mutex_unlock(&curseg->curseg_mutex); |
1583 | 1753 | ||
1584 | /* writeout last modified SIT block */ | ||
1585 | f2fs_put_page(page, 1); | ||
1586 | |||
1587 | set_prefree_as_free_segments(sbi); | 1754 | set_prefree_as_free_segments(sbi); |
1588 | } | 1755 | } |
1589 | 1756 | ||
@@ -1603,16 +1770,16 @@ static int build_sit_info(struct f2fs_sb_info *sbi) | |||
1603 | 1770 | ||
1604 | SM_I(sbi)->sit_info = sit_i; | 1771 | SM_I(sbi)->sit_info = sit_i; |
1605 | 1772 | ||
1606 | sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); | 1773 | sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry)); |
1607 | if (!sit_i->sentries) | 1774 | if (!sit_i->sentries) |
1608 | return -ENOMEM; | 1775 | return -ENOMEM; |
1609 | 1776 | ||
1610 | bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 1777 | bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); |
1611 | sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); | 1778 | sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); |
1612 | if (!sit_i->dirty_sentries_bitmap) | 1779 | if (!sit_i->dirty_sentries_bitmap) |
1613 | return -ENOMEM; | 1780 | return -ENOMEM; |
1614 | 1781 | ||
1615 | for (start = 0; start < TOTAL_SEGS(sbi); start++) { | 1782 | for (start = 0; start < MAIN_SEGS(sbi); start++) { |
1616 | sit_i->sentries[start].cur_valid_map | 1783 | sit_i->sentries[start].cur_valid_map |
1617 | = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); | 1784 | = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); |
1618 | sit_i->sentries[start].ckpt_valid_map | 1785 | sit_i->sentries[start].ckpt_valid_map |
@@ -1623,7 +1790,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) | |||
1623 | } | 1790 | } |
1624 | 1791 | ||
1625 | if (sbi->segs_per_sec > 1) { | 1792 | if (sbi->segs_per_sec > 1) { |
1626 | sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * | 1793 | sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) * |
1627 | sizeof(struct sec_entry)); | 1794 | sizeof(struct sec_entry)); |
1628 | if (!sit_i->sec_entries) | 1795 | if (!sit_i->sec_entries) |
1629 | return -ENOMEM; | 1796 | return -ENOMEM; |
@@ -1658,7 +1825,6 @@ static int build_sit_info(struct f2fs_sb_info *sbi) | |||
1658 | 1825 | ||
1659 | static int build_free_segmap(struct f2fs_sb_info *sbi) | 1826 | static int build_free_segmap(struct f2fs_sb_info *sbi) |
1660 | { | 1827 | { |
1661 | struct f2fs_sm_info *sm_info = SM_I(sbi); | ||
1662 | struct free_segmap_info *free_i; | 1828 | struct free_segmap_info *free_i; |
1663 | unsigned int bitmap_size, sec_bitmap_size; | 1829 | unsigned int bitmap_size, sec_bitmap_size; |
1664 | 1830 | ||
@@ -1669,12 +1835,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) | |||
1669 | 1835 | ||
1670 | SM_I(sbi)->free_info = free_i; | 1836 | SM_I(sbi)->free_info = free_i; |
1671 | 1837 | ||
1672 | bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 1838 | bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); |
1673 | free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); | 1839 | free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); |
1674 | if (!free_i->free_segmap) | 1840 | if (!free_i->free_segmap) |
1675 | return -ENOMEM; | 1841 | return -ENOMEM; |
1676 | 1842 | ||
1677 | sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); | 1843 | sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); |
1678 | free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); | 1844 | free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); |
1679 | if (!free_i->free_secmap) | 1845 | if (!free_i->free_secmap) |
1680 | return -ENOMEM; | 1846 | return -ENOMEM; |
@@ -1684,8 +1850,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) | |||
1684 | memset(free_i->free_secmap, 0xff, sec_bitmap_size); | 1850 | memset(free_i->free_secmap, 0xff, sec_bitmap_size); |
1685 | 1851 | ||
1686 | /* init free segmap information */ | 1852 | /* init free segmap information */ |
1687 | free_i->start_segno = | 1853 | free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); |
1688 | (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr); | ||
1689 | free_i->free_segments = 0; | 1854 | free_i->free_segments = 0; |
1690 | free_i->free_sections = 0; | 1855 | free_i->free_sections = 0; |
1691 | rwlock_init(&free_i->segmap_lock); | 1856 | rwlock_init(&free_i->segmap_lock); |
@@ -1722,7 +1887,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) | |||
1722 | int sit_blk_cnt = SIT_BLK_CNT(sbi); | 1887 | int sit_blk_cnt = SIT_BLK_CNT(sbi); |
1723 | unsigned int i, start, end; | 1888 | unsigned int i, start, end; |
1724 | unsigned int readed, start_blk = 0; | 1889 | unsigned int readed, start_blk = 0; |
1725 | int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 1890 | int nrpages = MAX_BIO_BLOCKS(sbi); |
1726 | 1891 | ||
1727 | do { | 1892 | do { |
1728 | readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT); | 1893 | readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT); |
@@ -1730,7 +1895,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) | |||
1730 | start = start_blk * sit_i->sents_per_block; | 1895 | start = start_blk * sit_i->sents_per_block; |
1731 | end = (start_blk + readed) * sit_i->sents_per_block; | 1896 | end = (start_blk + readed) * sit_i->sents_per_block; |
1732 | 1897 | ||
1733 | for (; start < end && start < TOTAL_SEGS(sbi); start++) { | 1898 | for (; start < end && start < MAIN_SEGS(sbi); start++) { |
1734 | struct seg_entry *se = &sit_i->sentries[start]; | 1899 | struct seg_entry *se = &sit_i->sentries[start]; |
1735 | struct f2fs_sit_block *sit_blk; | 1900 | struct f2fs_sit_block *sit_blk; |
1736 | struct f2fs_sit_entry sit; | 1901 | struct f2fs_sit_entry sit; |
@@ -1768,7 +1933,7 @@ static void init_free_segmap(struct f2fs_sb_info *sbi) | |||
1768 | unsigned int start; | 1933 | unsigned int start; |
1769 | int type; | 1934 | int type; |
1770 | 1935 | ||
1771 | for (start = 0; start < TOTAL_SEGS(sbi); start++) { | 1936 | for (start = 0; start < MAIN_SEGS(sbi); start++) { |
1772 | struct seg_entry *sentry = get_seg_entry(sbi, start); | 1937 | struct seg_entry *sentry = get_seg_entry(sbi, start); |
1773 | if (!sentry->valid_blocks) | 1938 | if (!sentry->valid_blocks) |
1774 | __set_free(sbi, start); | 1939 | __set_free(sbi, start); |
@@ -1785,18 +1950,22 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) | |||
1785 | { | 1950 | { |
1786 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 1951 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
1787 | struct free_segmap_info *free_i = FREE_I(sbi); | 1952 | struct free_segmap_info *free_i = FREE_I(sbi); |
1788 | unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); | 1953 | unsigned int segno = 0, offset = 0; |
1789 | unsigned short valid_blocks; | 1954 | unsigned short valid_blocks; |
1790 | 1955 | ||
1791 | while (1) { | 1956 | while (1) { |
1792 | /* find dirty segment based on free segmap */ | 1957 | /* find dirty segment based on free segmap */ |
1793 | segno = find_next_inuse(free_i, total_segs, offset); | 1958 | segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset); |
1794 | if (segno >= total_segs) | 1959 | if (segno >= MAIN_SEGS(sbi)) |
1795 | break; | 1960 | break; |
1796 | offset = segno + 1; | 1961 | offset = segno + 1; |
1797 | valid_blocks = get_valid_blocks(sbi, segno, 0); | 1962 | valid_blocks = get_valid_blocks(sbi, segno, 0); |
1798 | if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) | 1963 | if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) |
1964 | continue; | ||
1965 | if (valid_blocks > sbi->blocks_per_seg) { | ||
1966 | f2fs_bug_on(sbi, 1); | ||
1799 | continue; | 1967 | continue; |
1968 | } | ||
1800 | mutex_lock(&dirty_i->seglist_lock); | 1969 | mutex_lock(&dirty_i->seglist_lock); |
1801 | __locate_dirty_segment(sbi, segno, DIRTY); | 1970 | __locate_dirty_segment(sbi, segno, DIRTY); |
1802 | mutex_unlock(&dirty_i->seglist_lock); | 1971 | mutex_unlock(&dirty_i->seglist_lock); |
@@ -1806,7 +1975,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) | |||
1806 | static int init_victim_secmap(struct f2fs_sb_info *sbi) | 1975 | static int init_victim_secmap(struct f2fs_sb_info *sbi) |
1807 | { | 1976 | { |
1808 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 1977 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
1809 | unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); | 1978 | unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); |
1810 | 1979 | ||
1811 | dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); | 1980 | dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); |
1812 | if (!dirty_i->victim_secmap) | 1981 | if (!dirty_i->victim_secmap) |
@@ -1827,7 +1996,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) | |||
1827 | SM_I(sbi)->dirty_info = dirty_i; | 1996 | SM_I(sbi)->dirty_info = dirty_i; |
1828 | mutex_init(&dirty_i->seglist_lock); | 1997 | mutex_init(&dirty_i->seglist_lock); |
1829 | 1998 | ||
1830 | bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); | 1999 | bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); |
1831 | 2000 | ||
1832 | for (i = 0; i < NR_DIRTY_TYPE; i++) { | 2001 | for (i = 0; i < NR_DIRTY_TYPE; i++) { |
1833 | dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); | 2002 | dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); |
@@ -1851,7 +2020,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) | |||
1851 | 2020 | ||
1852 | sit_i->min_mtime = LLONG_MAX; | 2021 | sit_i->min_mtime = LLONG_MAX; |
1853 | 2022 | ||
1854 | for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { | 2023 | for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { |
1855 | unsigned int i; | 2024 | unsigned int i; |
1856 | unsigned long long mtime = 0; | 2025 | unsigned long long mtime = 0; |
1857 | 2026 | ||
@@ -1889,13 +2058,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi) | |||
1889 | sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); | 2058 | sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); |
1890 | sm_info->rec_prefree_segments = sm_info->main_segments * | 2059 | sm_info->rec_prefree_segments = sm_info->main_segments * |
1891 | DEF_RECLAIM_PREFREE_SEGMENTS / 100; | 2060 | DEF_RECLAIM_PREFREE_SEGMENTS / 100; |
1892 | sm_info->ipu_policy = F2FS_IPU_DISABLE; | 2061 | sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; |
1893 | sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; | 2062 | sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; |
2063 | sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; | ||
1894 | 2064 | ||
1895 | INIT_LIST_HEAD(&sm_info->discard_list); | 2065 | INIT_LIST_HEAD(&sm_info->discard_list); |
1896 | sm_info->nr_discards = 0; | 2066 | sm_info->nr_discards = 0; |
1897 | sm_info->max_discards = 0; | 2067 | sm_info->max_discards = 0; |
1898 | 2068 | ||
2069 | INIT_LIST_HEAD(&sm_info->sit_entry_set); | ||
2070 | |||
1899 | if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { | 2071 | if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { |
1900 | err = create_flush_cmd_control(sbi); | 2072 | err = create_flush_cmd_control(sbi); |
1901 | if (err) | 2073 | if (err) |
@@ -1991,7 +2163,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) | |||
1991 | return; | 2163 | return; |
1992 | 2164 | ||
1993 | if (sit_i->sentries) { | 2165 | if (sit_i->sentries) { |
1994 | for (start = 0; start < TOTAL_SEGS(sbi); start++) { | 2166 | for (start = 0; start < MAIN_SEGS(sbi); start++) { |
1995 | kfree(sit_i->sentries[start].cur_valid_map); | 2167 | kfree(sit_i->sentries[start].cur_valid_map); |
1996 | kfree(sit_i->sentries[start].ckpt_valid_map); | 2168 | kfree(sit_i->sentries[start].ckpt_valid_map); |
1997 | } | 2169 | } |
@@ -2025,11 +2197,30 @@ int __init create_segment_manager_caches(void) | |||
2025 | discard_entry_slab = f2fs_kmem_cache_create("discard_entry", | 2197 | discard_entry_slab = f2fs_kmem_cache_create("discard_entry", |
2026 | sizeof(struct discard_entry)); | 2198 | sizeof(struct discard_entry)); |
2027 | if (!discard_entry_slab) | 2199 | if (!discard_entry_slab) |
2028 | return -ENOMEM; | 2200 | goto fail; |
2201 | |||
2202 | sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", | ||
2203 | sizeof(struct nat_entry_set)); | ||
2204 | if (!sit_entry_set_slab) | ||
2205 | goto destory_discard_entry; | ||
2206 | |||
2207 | inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", | ||
2208 | sizeof(struct inmem_pages)); | ||
2209 | if (!inmem_entry_slab) | ||
2210 | goto destroy_sit_entry_set; | ||
2029 | return 0; | 2211 | return 0; |
2212 | |||
2213 | destroy_sit_entry_set: | ||
2214 | kmem_cache_destroy(sit_entry_set_slab); | ||
2215 | destory_discard_entry: | ||
2216 | kmem_cache_destroy(discard_entry_slab); | ||
2217 | fail: | ||
2218 | return -ENOMEM; | ||
2030 | } | 2219 | } |
2031 | 2220 | ||
2032 | void destroy_segment_manager_caches(void) | 2221 | void destroy_segment_manager_caches(void) |
2033 | { | 2222 | { |
2223 | kmem_cache_destroy(sit_entry_set_slab); | ||
2034 | kmem_cache_destroy(discard_entry_slab); | 2224 | kmem_cache_destroy(discard_entry_slab); |
2225 | kmem_cache_destroy(inmem_entry_slab); | ||
2035 | } | 2226 | } |
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 55973f7b0330..2495bec1c621 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h | |||
@@ -45,16 +45,26 @@ | |||
45 | (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ | 45 | (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ |
46 | sbi->segs_per_sec)) \ | 46 | sbi->segs_per_sec)) \ |
47 | 47 | ||
48 | #define START_BLOCK(sbi, segno) \ | 48 | #define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) |
49 | (SM_I(sbi)->seg0_blkaddr + \ | 49 | #define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) |
50 | |||
51 | #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) | ||
52 | #define MAIN_SECS(sbi) (sbi->total_sections) | ||
53 | |||
54 | #define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) | ||
55 | #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg) | ||
56 | |||
57 | #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) | ||
58 | #define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \ | ||
59 | sbi->log_blocks_per_seg)) | ||
60 | |||
61 | #define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ | ||
50 | (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) | 62 | (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) |
63 | |||
51 | #define NEXT_FREE_BLKADDR(sbi, curseg) \ | 64 | #define NEXT_FREE_BLKADDR(sbi, curseg) \ |
52 | (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) | 65 | (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) |
53 | 66 | ||
54 | #define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) | 67 | #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) |
55 | |||
56 | #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ | ||
57 | ((blk_addr) - SM_I(sbi)->seg0_blkaddr) | ||
58 | #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ | 68 | #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ |
59 | (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) | 69 | (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) |
60 | #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ | 70 | #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ |
@@ -77,23 +87,21 @@ | |||
77 | 87 | ||
78 | #define SIT_ENTRY_OFFSET(sit_i, segno) \ | 88 | #define SIT_ENTRY_OFFSET(sit_i, segno) \ |
79 | (segno % sit_i->sents_per_block) | 89 | (segno % sit_i->sents_per_block) |
80 | #define SIT_BLOCK_OFFSET(sit_i, segno) \ | 90 | #define SIT_BLOCK_OFFSET(segno) \ |
81 | (segno / SIT_ENTRY_PER_BLOCK) | 91 | (segno / SIT_ENTRY_PER_BLOCK) |
82 | #define START_SEGNO(sit_i, segno) \ | 92 | #define START_SEGNO(segno) \ |
83 | (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) | 93 | (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) |
84 | #define SIT_BLK_CNT(sbi) \ | 94 | #define SIT_BLK_CNT(sbi) \ |
85 | ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) | 95 | ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) |
86 | #define f2fs_bitmap_size(nr) \ | 96 | #define f2fs_bitmap_size(nr) \ |
87 | (BITS_TO_LONGS(nr) * sizeof(unsigned long)) | 97 | (BITS_TO_LONGS(nr) * sizeof(unsigned long)) |
88 | #define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) | ||
89 | #define TOTAL_SECS(sbi) (sbi->total_sections) | ||
90 | 98 | ||
91 | #define SECTOR_FROM_BLOCK(sbi, blk_addr) \ | 99 | #define SECTOR_FROM_BLOCK(blk_addr) \ |
92 | (((sector_t)blk_addr) << (sbi)->log_sectors_per_block) | 100 | (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) |
93 | #define SECTOR_TO_BLOCK(sbi, sectors) \ | 101 | #define SECTOR_TO_BLOCK(sectors) \ |
94 | (sectors >> (sbi)->log_sectors_per_block) | 102 | (sectors >> F2FS_LOG_SECTORS_PER_BLOCK) |
95 | #define MAX_BIO_BLOCKS(max_hw_blocks) \ | 103 | #define MAX_BIO_BLOCKS(sbi) \ |
96 | (min((int)max_hw_blocks, BIO_MAX_PAGES)) | 104 | ((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES)) |
97 | 105 | ||
98 | /* | 106 | /* |
99 | * indicate a block allocation direction: RIGHT and LEFT. | 107 | * indicate a block allocation direction: RIGHT and LEFT. |
@@ -167,6 +175,11 @@ struct segment_allocation { | |||
167 | void (*allocate_segment)(struct f2fs_sb_info *, int, bool); | 175 | void (*allocate_segment)(struct f2fs_sb_info *, int, bool); |
168 | }; | 176 | }; |
169 | 177 | ||
178 | struct inmem_pages { | ||
179 | struct list_head list; | ||
180 | struct page *page; | ||
181 | }; | ||
182 | |||
170 | struct sit_info { | 183 | struct sit_info { |
171 | const struct segment_allocation *s_ops; | 184 | const struct segment_allocation *s_ops; |
172 | 185 | ||
@@ -237,6 +250,12 @@ struct curseg_info { | |||
237 | unsigned int next_segno; /* preallocated segment */ | 250 | unsigned int next_segno; /* preallocated segment */ |
238 | }; | 251 | }; |
239 | 252 | ||
253 | struct sit_entry_set { | ||
254 | struct list_head set_list; /* link with all sit sets */ | ||
255 | unsigned int start_segno; /* start segno of sits in set */ | ||
256 | unsigned int entry_cnt; /* the # of sit entries in set */ | ||
257 | }; | ||
258 | |||
240 | /* | 259 | /* |
241 | * inline functions | 260 | * inline functions |
242 | */ | 261 | */ |
@@ -316,7 +335,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) | |||
316 | clear_bit(segno, free_i->free_segmap); | 335 | clear_bit(segno, free_i->free_segmap); |
317 | free_i->free_segments++; | 336 | free_i->free_segments++; |
318 | 337 | ||
319 | next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); | 338 | next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno); |
320 | if (next >= start_segno + sbi->segs_per_sec) { | 339 | if (next >= start_segno + sbi->segs_per_sec) { |
321 | clear_bit(secno, free_i->free_secmap); | 340 | clear_bit(secno, free_i->free_secmap); |
322 | free_i->free_sections++; | 341 | free_i->free_sections++; |
@@ -430,8 +449,10 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) | |||
430 | 449 | ||
431 | static inline bool need_SSR(struct f2fs_sb_info *sbi) | 450 | static inline bool need_SSR(struct f2fs_sb_info *sbi) |
432 | { | 451 | { |
433 | return (prefree_segments(sbi) / sbi->segs_per_sec) | 452 | int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); |
434 | + free_sections(sbi) < overprovision_sections(sbi); | 453 | int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); |
454 | return free_sections(sbi) <= (node_secs + 2 * dent_secs + | ||
455 | reserved_sections(sbi) + 1); | ||
435 | } | 456 | } |
436 | 457 | ||
437 | static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) | 458 | static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) |
@@ -466,48 +487,47 @@ static inline int utilization(struct f2fs_sb_info *sbi) | |||
466 | * F2FS_IPU_UTIL - if FS utilization is over threashold, | 487 | * F2FS_IPU_UTIL - if FS utilization is over threashold, |
467 | * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over | 488 | * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over |
468 | * threashold, | 489 | * threashold, |
490 | * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash | ||
491 | * storages. IPU will be triggered only if the # of dirty | ||
492 | * pages over min_fsync_blocks. | ||
469 | * F2FS_IPUT_DISABLE - disable IPU. (=default option) | 493 | * F2FS_IPUT_DISABLE - disable IPU. (=default option) |
470 | */ | 494 | */ |
471 | #define DEF_MIN_IPU_UTIL 70 | 495 | #define DEF_MIN_IPU_UTIL 70 |
496 | #define DEF_MIN_FSYNC_BLOCKS 8 | ||
472 | 497 | ||
473 | enum { | 498 | enum { |
474 | F2FS_IPU_FORCE, | 499 | F2FS_IPU_FORCE, |
475 | F2FS_IPU_SSR, | 500 | F2FS_IPU_SSR, |
476 | F2FS_IPU_UTIL, | 501 | F2FS_IPU_UTIL, |
477 | F2FS_IPU_SSR_UTIL, | 502 | F2FS_IPU_SSR_UTIL, |
478 | F2FS_IPU_DISABLE, | 503 | F2FS_IPU_FSYNC, |
479 | }; | 504 | }; |
480 | 505 | ||
481 | static inline bool need_inplace_update(struct inode *inode) | 506 | static inline bool need_inplace_update(struct inode *inode) |
482 | { | 507 | { |
483 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 508 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
509 | unsigned int policy = SM_I(sbi)->ipu_policy; | ||
484 | 510 | ||
485 | /* IPU can be done only for the user data */ | 511 | /* IPU can be done only for the user data */ |
486 | if (S_ISDIR(inode->i_mode)) | 512 | if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) |
487 | return false; | 513 | return false; |
488 | 514 | ||
489 | /* this is only set during fdatasync */ | 515 | if (policy & (0x1 << F2FS_IPU_FORCE)) |
490 | if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) | 516 | return true; |
517 | if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) | ||
518 | return true; | ||
519 | if (policy & (0x1 << F2FS_IPU_UTIL) && | ||
520 | utilization(sbi) > SM_I(sbi)->min_ipu_util) | ||
521 | return true; | ||
522 | if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) && | ||
523 | utilization(sbi) > SM_I(sbi)->min_ipu_util) | ||
491 | return true; | 524 | return true; |
492 | 525 | ||
493 | switch (SM_I(sbi)->ipu_policy) { | 526 | /* this is only set during fdatasync */ |
494 | case F2FS_IPU_FORCE: | 527 | if (policy & (0x1 << F2FS_IPU_FSYNC) && |
528 | is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) | ||
495 | return true; | 529 | return true; |
496 | case F2FS_IPU_SSR: | 530 | |
497 | if (need_SSR(sbi)) | ||
498 | return true; | ||
499 | break; | ||
500 | case F2FS_IPU_UTIL: | ||
501 | if (utilization(sbi) > SM_I(sbi)->min_ipu_util) | ||
502 | return true; | ||
503 | break; | ||
504 | case F2FS_IPU_SSR_UTIL: | ||
505 | if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) | ||
506 | return true; | ||
507 | break; | ||
508 | case F2FS_IPU_DISABLE: | ||
509 | break; | ||
510 | } | ||
511 | return false; | 531 | return false; |
512 | } | 532 | } |
513 | 533 | ||
@@ -534,28 +554,21 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) | |||
534 | #ifdef CONFIG_F2FS_CHECK_FS | 554 | #ifdef CONFIG_F2FS_CHECK_FS |
535 | static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) | 555 | static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) |
536 | { | 556 | { |
537 | unsigned int end_segno = SM_I(sbi)->segment_count - 1; | 557 | BUG_ON(segno > TOTAL_SEGS(sbi) - 1); |
538 | BUG_ON(segno > end_segno); | ||
539 | } | 558 | } |
540 | 559 | ||
541 | static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) | 560 | static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) |
542 | { | 561 | { |
543 | struct f2fs_sm_info *sm_info = SM_I(sbi); | 562 | BUG_ON(blk_addr < SEG0_BLKADDR(sbi)); |
544 | block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; | 563 | BUG_ON(blk_addr >= MAX_BLKADDR(sbi)); |
545 | block_t start_addr = sm_info->seg0_blkaddr; | ||
546 | block_t end_addr = start_addr + total_blks - 1; | ||
547 | BUG_ON(blk_addr < start_addr); | ||
548 | BUG_ON(blk_addr > end_addr); | ||
549 | } | 564 | } |
550 | 565 | ||
551 | /* | 566 | /* |
552 | * Summary block is always treated as invalid block | 567 | * Summary block is always treated as an invalid block |
553 | */ | 568 | */ |
554 | static inline void check_block_count(struct f2fs_sb_info *sbi, | 569 | static inline void check_block_count(struct f2fs_sb_info *sbi, |
555 | int segno, struct f2fs_sit_entry *raw_sit) | 570 | int segno, struct f2fs_sit_entry *raw_sit) |
556 | { | 571 | { |
557 | struct f2fs_sm_info *sm_info = SM_I(sbi); | ||
558 | unsigned int end_segno = sm_info->segment_count - 1; | ||
559 | bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; | 572 | bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; |
560 | int valid_blocks = 0; | 573 | int valid_blocks = 0; |
561 | int cur_pos = 0, next_pos; | 574 | int cur_pos = 0, next_pos; |
@@ -564,7 +577,7 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, | |||
564 | BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); | 577 | BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); |
565 | 578 | ||
566 | /* check boundary of a given segment number */ | 579 | /* check boundary of a given segment number */ |
567 | BUG_ON(segno > end_segno); | 580 | BUG_ON(segno > TOTAL_SEGS(sbi) - 1); |
568 | 581 | ||
569 | /* check bitmap with valid block count */ | 582 | /* check bitmap with valid block count */ |
570 | do { | 583 | do { |
@@ -583,16 +596,39 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, | |||
583 | BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); | 596 | BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); |
584 | } | 597 | } |
585 | #else | 598 | #else |
586 | #define check_seg_range(sbi, segno) | 599 | static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) |
587 | #define verify_block_addr(sbi, blk_addr) | 600 | { |
588 | #define check_block_count(sbi, segno, raw_sit) | 601 | if (segno > TOTAL_SEGS(sbi) - 1) |
602 | sbi->need_fsck = true; | ||
603 | } | ||
604 | |||
605 | static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) | ||
606 | { | ||
607 | if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) | ||
608 | sbi->need_fsck = true; | ||
609 | } | ||
610 | |||
611 | /* | ||
612 | * Summary block is always treated as an invalid block | ||
613 | */ | ||
614 | static inline void check_block_count(struct f2fs_sb_info *sbi, | ||
615 | int segno, struct f2fs_sit_entry *raw_sit) | ||
616 | { | ||
617 | /* check segment usage */ | ||
618 | if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) | ||
619 | sbi->need_fsck = true; | ||
620 | |||
621 | /* check boundary of a given segment number */ | ||
622 | if (segno > TOTAL_SEGS(sbi) - 1) | ||
623 | sbi->need_fsck = true; | ||
624 | } | ||
589 | #endif | 625 | #endif |
590 | 626 | ||
591 | static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, | 627 | static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, |
592 | unsigned int start) | 628 | unsigned int start) |
593 | { | 629 | { |
594 | struct sit_info *sit_i = SIT_I(sbi); | 630 | struct sit_info *sit_i = SIT_I(sbi); |
595 | unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); | 631 | unsigned int offset = SIT_BLOCK_OFFSET(start); |
596 | block_t blk_addr = sit_i->sit_base_addr + offset; | 632 | block_t blk_addr = sit_i->sit_base_addr + offset; |
597 | 633 | ||
598 | check_seg_range(sbi, start); | 634 | check_seg_range(sbi, start); |
@@ -619,7 +655,7 @@ static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, | |||
619 | 655 | ||
620 | static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) | 656 | static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) |
621 | { | 657 | { |
622 | unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); | 658 | unsigned int block_off = SIT_BLOCK_OFFSET(start); |
623 | 659 | ||
624 | if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) | 660 | if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) |
625 | f2fs_clear_bit(block_off, sit_i->sit_bitmap); | 661 | f2fs_clear_bit(block_off, sit_i->sit_bitmap); |
@@ -666,7 +702,7 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) | |||
666 | { | 702 | { |
667 | struct block_device *bdev = sbi->sb->s_bdev; | 703 | struct block_device *bdev = sbi->sb->s_bdev; |
668 | struct request_queue *q = bdev_get_queue(bdev); | 704 | struct request_queue *q = bdev_get_queue(bdev); |
669 | return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); | 705 | return SECTOR_TO_BLOCK(queue_max_sectors(q)); |
670 | } | 706 | } |
671 | 707 | ||
672 | /* | 708 | /* |
@@ -683,7 +719,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) | |||
683 | else if (type == NODE) | 719 | else if (type == NODE) |
684 | return 3 * sbi->blocks_per_seg; | 720 | return 3 * sbi->blocks_per_seg; |
685 | else if (type == META) | 721 | else if (type == META) |
686 | return MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 722 | return MAX_BIO_BLOCKS(sbi); |
687 | else | 723 | else |
688 | return 0; | 724 | return 0; |
689 | } | 725 | } |
@@ -706,7 +742,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, | |||
706 | else if (type == NODE) | 742 | else if (type == NODE) |
707 | desired = 3 * max_hw_blocks(sbi); | 743 | desired = 3 * max_hw_blocks(sbi); |
708 | else | 744 | else |
709 | desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 745 | desired = MAX_BIO_BLOCKS(sbi); |
710 | 746 | ||
711 | wbc->nr_to_write = desired; | 747 | wbc->nr_to_write = desired; |
712 | return desired - nr_to_write; | 748 | return desired - nr_to_write; |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 657582fc7601..41d6f700f4ee 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -190,6 +190,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); | |||
190 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); | 190 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); |
191 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); | 191 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); |
192 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); | 192 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); |
193 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); | ||
193 | F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); | 194 | F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); |
194 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); | 195 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); |
195 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); | 196 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); |
@@ -204,6 +205,7 @@ static struct attribute *f2fs_attrs[] = { | |||
204 | ATTR_LIST(max_small_discards), | 205 | ATTR_LIST(max_small_discards), |
205 | ATTR_LIST(ipu_policy), | 206 | ATTR_LIST(ipu_policy), |
206 | ATTR_LIST(min_ipu_util), | 207 | ATTR_LIST(min_ipu_util), |
208 | ATTR_LIST(min_fsync_blocks), | ||
207 | ATTR_LIST(max_victim_search), | 209 | ATTR_LIST(max_victim_search), |
208 | ATTR_LIST(dir_level), | 210 | ATTR_LIST(dir_level), |
209 | ATTR_LIST(ram_thresh), | 211 | ATTR_LIST(ram_thresh), |
@@ -366,11 +368,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) | |||
366 | 368 | ||
367 | /* Initialize f2fs-specific inode info */ | 369 | /* Initialize f2fs-specific inode info */ |
368 | fi->vfs_inode.i_version = 1; | 370 | fi->vfs_inode.i_version = 1; |
369 | atomic_set(&fi->dirty_dents, 0); | 371 | atomic_set(&fi->dirty_pages, 0); |
370 | fi->i_current_depth = 1; | 372 | fi->i_current_depth = 1; |
371 | fi->i_advise = 0; | 373 | fi->i_advise = 0; |
372 | rwlock_init(&fi->ext.ext_lock); | 374 | rwlock_init(&fi->ext.ext_lock); |
373 | init_rwsem(&fi->i_sem); | 375 | init_rwsem(&fi->i_sem); |
376 | INIT_LIST_HEAD(&fi->inmem_pages); | ||
377 | mutex_init(&fi->inmem_lock); | ||
374 | 378 | ||
375 | set_inode_flag(fi, FI_NEW_INODE); | 379 | set_inode_flag(fi, FI_NEW_INODE); |
376 | 380 | ||
@@ -432,8 +436,19 @@ static void f2fs_put_super(struct super_block *sb) | |||
432 | stop_gc_thread(sbi); | 436 | stop_gc_thread(sbi); |
433 | 437 | ||
434 | /* We don't need to do checkpoint when it's clean */ | 438 | /* We don't need to do checkpoint when it's clean */ |
435 | if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES)) | 439 | if (sbi->s_dirty) { |
436 | write_checkpoint(sbi, true); | 440 | struct cp_control cpc = { |
441 | .reason = CP_UMOUNT, | ||
442 | }; | ||
443 | write_checkpoint(sbi, &cpc); | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * normally superblock is clean, so we need to release this. | ||
448 | * In addition, EIO will skip do checkpoint, we need this as well. | ||
449 | */ | ||
450 | release_dirty_inode(sbi); | ||
451 | release_discard_addrs(sbi); | ||
437 | 452 | ||
438 | iput(sbi->node_inode); | 453 | iput(sbi->node_inode); |
439 | iput(sbi->meta_inode); | 454 | iput(sbi->meta_inode); |
@@ -457,12 +472,12 @@ int f2fs_sync_fs(struct super_block *sb, int sync) | |||
457 | 472 | ||
458 | trace_f2fs_sync_fs(sb, sync); | 473 | trace_f2fs_sync_fs(sb, sync); |
459 | 474 | ||
460 | if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) | ||
461 | return 0; | ||
462 | |||
463 | if (sync) { | 475 | if (sync) { |
476 | struct cp_control cpc = { | ||
477 | .reason = CP_SYNC, | ||
478 | }; | ||
464 | mutex_lock(&sbi->gc_mutex); | 479 | mutex_lock(&sbi->gc_mutex); |
465 | write_checkpoint(sbi, false); | 480 | write_checkpoint(sbi, &cpc); |
466 | mutex_unlock(&sbi->gc_mutex); | 481 | mutex_unlock(&sbi->gc_mutex); |
467 | } else { | 482 | } else { |
468 | f2fs_balance_fs(sbi); | 483 | f2fs_balance_fs(sbi); |
@@ -505,8 +520,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
505 | buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; | 520 | buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; |
506 | buf->f_bavail = user_block_count - valid_user_blocks(sbi); | 521 | buf->f_bavail = user_block_count - valid_user_blocks(sbi); |
507 | 522 | ||
508 | buf->f_files = sbi->total_node_count; | 523 | buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; |
509 | buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); | 524 | buf->f_ffree = buf->f_files - valid_inode_count(sbi); |
510 | 525 | ||
511 | buf->f_namelen = F2FS_NAME_LEN; | 526 | buf->f_namelen = F2FS_NAME_LEN; |
512 | buf->f_fsid.val[0] = (u32)id; | 527 | buf->f_fsid.val[0] = (u32)id; |
@@ -613,6 +628,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) | |||
613 | org_mount_opt = sbi->mount_opt; | 628 | org_mount_opt = sbi->mount_opt; |
614 | active_logs = sbi->active_logs; | 629 | active_logs = sbi->active_logs; |
615 | 630 | ||
631 | sbi->mount_opt.opt = 0; | ||
632 | sbi->active_logs = NR_CURSEG_TYPE; | ||
633 | |||
616 | /* parse mount options */ | 634 | /* parse mount options */ |
617 | err = parse_options(sb, data); | 635 | err = parse_options(sb, data); |
618 | if (err) | 636 | if (err) |
@@ -663,7 +681,7 @@ restore_gc: | |||
663 | if (need_restart_gc) { | 681 | if (need_restart_gc) { |
664 | if (start_gc_thread(sbi)) | 682 | if (start_gc_thread(sbi)) |
665 | f2fs_msg(sbi->sb, KERN_WARNING, | 683 | f2fs_msg(sbi->sb, KERN_WARNING, |
666 | "background gc thread is stop"); | 684 | "background gc thread has stopped"); |
667 | } else if (need_stop_gc) { | 685 | } else if (need_stop_gc) { |
668 | stop_gc_thread(sbi); | 686 | stop_gc_thread(sbi); |
669 | } | 687 | } |
@@ -783,14 +801,22 @@ static int sanity_check_raw_super(struct super_block *sb, | |||
783 | return 1; | 801 | return 1; |
784 | } | 802 | } |
785 | 803 | ||
786 | if (le32_to_cpu(raw_super->log_sectorsize) != | 804 | /* Currently, support 512/1024/2048/4096 bytes sector size */ |
787 | F2FS_LOG_SECTOR_SIZE) { | 805 | if (le32_to_cpu(raw_super->log_sectorsize) > |
788 | f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize"); | 806 | F2FS_MAX_LOG_SECTOR_SIZE || |
807 | le32_to_cpu(raw_super->log_sectorsize) < | ||
808 | F2FS_MIN_LOG_SECTOR_SIZE) { | ||
809 | f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)", | ||
810 | le32_to_cpu(raw_super->log_sectorsize)); | ||
789 | return 1; | 811 | return 1; |
790 | } | 812 | } |
791 | if (le32_to_cpu(raw_super->log_sectors_per_block) != | 813 | if (le32_to_cpu(raw_super->log_sectors_per_block) + |
792 | F2FS_LOG_SECTORS_PER_BLOCK) { | 814 | le32_to_cpu(raw_super->log_sectorsize) != |
793 | f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block"); | 815 | F2FS_MAX_LOG_SECTOR_SIZE) { |
816 | f2fs_msg(sb, KERN_INFO, | ||
817 | "Invalid log sectors per block(%u) log sectorsize(%u)", | ||
818 | le32_to_cpu(raw_super->log_sectors_per_block), | ||
819 | le32_to_cpu(raw_super->log_sectorsize)); | ||
794 | return 1; | 820 | return 1; |
795 | } | 821 | } |
796 | return 0; | 822 | return 0; |
@@ -812,7 +838,7 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi) | |||
812 | if (unlikely(fsmeta >= total)) | 838 | if (unlikely(fsmeta >= total)) |
813 | return 1; | 839 | return 1; |
814 | 840 | ||
815 | if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { | 841 | if (unlikely(f2fs_cp_error(sbi))) { |
816 | f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); | 842 | f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); |
817 | return 1; | 843 | return 1; |
818 | } | 844 | } |
@@ -846,6 +872,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) | |||
846 | atomic_set(&sbi->nr_pages[i], 0); | 872 | atomic_set(&sbi->nr_pages[i], 0); |
847 | 873 | ||
848 | sbi->dir_level = DEF_DIR_LEVEL; | 874 | sbi->dir_level = DEF_DIR_LEVEL; |
875 | sbi->need_fsck = false; | ||
849 | } | 876 | } |
850 | 877 | ||
851 | /* | 878 | /* |
@@ -899,8 +926,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
899 | struct buffer_head *raw_super_buf; | 926 | struct buffer_head *raw_super_buf; |
900 | struct inode *root; | 927 | struct inode *root; |
901 | long err = -EINVAL; | 928 | long err = -EINVAL; |
929 | bool retry = true; | ||
902 | int i; | 930 | int i; |
903 | 931 | ||
932 | try_onemore: | ||
904 | /* allocate memory for f2fs-specific super block info */ | 933 | /* allocate memory for f2fs-specific super block info */ |
905 | sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); | 934 | sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); |
906 | if (!sbi) | 935 | if (!sbi) |
@@ -1077,12 +1106,17 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
1077 | if (err) | 1106 | if (err) |
1078 | goto free_proc; | 1107 | goto free_proc; |
1079 | 1108 | ||
1109 | if (!retry) | ||
1110 | sbi->need_fsck = true; | ||
1111 | |||
1080 | /* recover fsynced data */ | 1112 | /* recover fsynced data */ |
1081 | if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { | 1113 | if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { |
1082 | err = recover_fsync_data(sbi); | 1114 | err = recover_fsync_data(sbi); |
1083 | if (err) | 1115 | if (err) { |
1084 | f2fs_msg(sb, KERN_ERR, | 1116 | f2fs_msg(sb, KERN_ERR, |
1085 | "Cannot recover all fsync data errno=%ld", err); | 1117 | "Cannot recover all fsync data errno=%ld", err); |
1118 | goto free_kobj; | ||
1119 | } | ||
1086 | } | 1120 | } |
1087 | 1121 | ||
1088 | /* | 1122 | /* |
@@ -1123,6 +1157,13 @@ free_sb_buf: | |||
1123 | brelse(raw_super_buf); | 1157 | brelse(raw_super_buf); |
1124 | free_sbi: | 1158 | free_sbi: |
1125 | kfree(sbi); | 1159 | kfree(sbi); |
1160 | |||
1161 | /* give only one another chance */ | ||
1162 | if (retry) { | ||
1163 | retry = 0; | ||
1164 | shrink_dcache_sb(sb); | ||
1165 | goto try_onemore; | ||
1166 | } | ||
1126 | return err; | 1167 | return err; |
1127 | } | 1168 | } |
1128 | 1169 | ||
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 8bea941ee309..deca8728117b 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c | |||
@@ -266,7 +266,7 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, | |||
266 | 266 | ||
267 | static void *read_all_xattrs(struct inode *inode, struct page *ipage) | 267 | static void *read_all_xattrs(struct inode *inode, struct page *ipage) |
268 | { | 268 | { |
269 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 269 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
270 | struct f2fs_xattr_header *header; | 270 | struct f2fs_xattr_header *header; |
271 | size_t size = PAGE_SIZE, inline_size = 0; | 271 | size_t size = PAGE_SIZE, inline_size = 0; |
272 | void *txattr_addr; | 272 | void *txattr_addr; |
@@ -325,7 +325,7 @@ fail: | |||
325 | static inline int write_all_xattrs(struct inode *inode, __u32 hsize, | 325 | static inline int write_all_xattrs(struct inode *inode, __u32 hsize, |
326 | void *txattr_addr, struct page *ipage) | 326 | void *txattr_addr, struct page *ipage) |
327 | { | 327 | { |
328 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 328 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
329 | size_t inline_size = 0; | 329 | size_t inline_size = 0; |
330 | void *xattr_addr; | 330 | void *xattr_addr; |
331 | struct page *xpage; | 331 | struct page *xpage; |
@@ -373,7 +373,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, | |||
373 | alloc_nid_failed(sbi, new_nid); | 373 | alloc_nid_failed(sbi, new_nid); |
374 | return PTR_ERR(xpage); | 374 | return PTR_ERR(xpage); |
375 | } | 375 | } |
376 | f2fs_bug_on(new_nid); | 376 | f2fs_bug_on(sbi, new_nid); |
377 | f2fs_wait_on_page_writeback(xpage, NODE); | 377 | f2fs_wait_on_page_writeback(xpage, NODE); |
378 | } else { | 378 | } else { |
379 | struct dnode_of_data dn; | 379 | struct dnode_of_data dn; |
@@ -528,7 +528,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, | |||
528 | int free; | 528 | int free; |
529 | /* | 529 | /* |
530 | * If value is NULL, it is remove operation. | 530 | * If value is NULL, it is remove operation. |
531 | * In case of update operation, we caculate free. | 531 | * In case of update operation, we calculate free. |
532 | */ | 532 | */ |
533 | free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); | 533 | free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); |
534 | if (found) | 534 | if (found) |
@@ -596,7 +596,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, | |||
596 | const void *value, size_t size, | 596 | const void *value, size_t size, |
597 | struct page *ipage, int flags) | 597 | struct page *ipage, int flags) |
598 | { | 598 | { |
599 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 599 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
600 | int err; | 600 | int err; |
601 | 601 | ||
602 | /* this case is only from init_inode_metadata */ | 602 | /* this case is only from init_inode_metadata */ |
diff --git a/fs/file_table.c b/fs/file_table.c index 385bfd31512a..0bab12b20460 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -331,5 +331,5 @@ void __init files_init(unsigned long mempages) | |||
331 | 331 | ||
332 | n = (mempages * (PAGE_SIZE / 1024)) / 10; | 332 | n = (mempages * (PAGE_SIZE / 1024)) / 10; |
333 | files_stat.max_files = max_t(unsigned long, n, NR_FILE); | 333 | files_stat.max_files = max_t(unsigned long, n, NR_FILE); |
334 | percpu_counter_init(&nr_files, 0); | 334 | percpu_counter_init(&nr_files, 0, GFP_KERNEL); |
335 | } | 335 | } |
diff --git a/fs/fscache/object.c b/fs/fscache/object.c index d3b4539f1651..da032daf0e0d 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c | |||
@@ -982,6 +982,7 @@ nomem: | |||
982 | submit_op_failed: | 982 | submit_op_failed: |
983 | clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); | 983 | clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); |
984 | spin_unlock(&cookie->lock); | 984 | spin_unlock(&cookie->lock); |
985 | fscache_unuse_cookie(object); | ||
985 | kfree(op); | 986 | kfree(op); |
986 | _leave(" [EIO]"); | 987 | _leave(" [EIO]"); |
987 | return transit_to(KILL_OBJECT); | 988 | return transit_to(KILL_OBJECT); |
diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 85332b9d19d1..de33b3fccca6 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c | |||
@@ -44,6 +44,19 @@ void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *pa | |||
44 | EXPORT_SYMBOL(__fscache_wait_on_page_write); | 44 | EXPORT_SYMBOL(__fscache_wait_on_page_write); |
45 | 45 | ||
46 | /* | 46 | /* |
47 | * wait for a page to finish being written to the cache. Put a timeout here | ||
48 | * since we might be called recursively via parent fs. | ||
49 | */ | ||
50 | static | ||
51 | bool release_page_wait_timeout(struct fscache_cookie *cookie, struct page *page) | ||
52 | { | ||
53 | wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0); | ||
54 | |||
55 | return wait_event_timeout(*wq, !__fscache_check_page_write(cookie, page), | ||
56 | HZ); | ||
57 | } | ||
58 | |||
59 | /* | ||
47 | * decide whether a page can be released, possibly by cancelling a store to it | 60 | * decide whether a page can be released, possibly by cancelling a store to it |
48 | * - we're allowed to sleep if __GFP_WAIT is flagged | 61 | * - we're allowed to sleep if __GFP_WAIT is flagged |
49 | */ | 62 | */ |
@@ -115,7 +128,10 @@ page_busy: | |||
115 | } | 128 | } |
116 | 129 | ||
117 | fscache_stat(&fscache_n_store_vmscan_wait); | 130 | fscache_stat(&fscache_n_store_vmscan_wait); |
118 | __fscache_wait_on_page_write(cookie, page); | 131 | if (!release_page_wait_timeout(cookie, page)) |
132 | _debug("fscache writeout timeout page: %p{%lx}", | ||
133 | page, page->index); | ||
134 | |||
119 | gfp &= ~__GFP_WAIT; | 135 | gfp &= ~__GFP_WAIT; |
120 | goto try_again; | 136 | goto try_again; |
121 | } | 137 | } |
@@ -182,7 +198,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) | |||
182 | { | 198 | { |
183 | struct fscache_operation *op; | 199 | struct fscache_operation *op; |
184 | struct fscache_object *object; | 200 | struct fscache_object *object; |
185 | bool wake_cookie; | 201 | bool wake_cookie = false; |
186 | 202 | ||
187 | _enter("%p", cookie); | 203 | _enter("%p", cookie); |
188 | 204 | ||
@@ -212,15 +228,16 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) | |||
212 | 228 | ||
213 | __fscache_use_cookie(cookie); | 229 | __fscache_use_cookie(cookie); |
214 | if (fscache_submit_exclusive_op(object, op) < 0) | 230 | if (fscache_submit_exclusive_op(object, op) < 0) |
215 | goto nobufs; | 231 | goto nobufs_dec; |
216 | spin_unlock(&cookie->lock); | 232 | spin_unlock(&cookie->lock); |
217 | fscache_stat(&fscache_n_attr_changed_ok); | 233 | fscache_stat(&fscache_n_attr_changed_ok); |
218 | fscache_put_operation(op); | 234 | fscache_put_operation(op); |
219 | _leave(" = 0"); | 235 | _leave(" = 0"); |
220 | return 0; | 236 | return 0; |
221 | 237 | ||
222 | nobufs: | 238 | nobufs_dec: |
223 | wake_cookie = __fscache_unuse_cookie(cookie); | 239 | wake_cookie = __fscache_unuse_cookie(cookie); |
240 | nobufs: | ||
224 | spin_unlock(&cookie->lock); | 241 | spin_unlock(&cookie->lock); |
225 | kfree(op); | 242 | kfree(op); |
226 | if (wake_cookie) | 243 | if (wake_cookie) |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 912061ac4baf..caa8d95b24e8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -1305,6 +1305,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, | |||
1305 | size_t start; | 1305 | size_t start; |
1306 | ssize_t ret = iov_iter_get_pages(ii, | 1306 | ssize_t ret = iov_iter_get_pages(ii, |
1307 | &req->pages[req->num_pages], | 1307 | &req->pages[req->num_pages], |
1308 | *nbytesp - nbytes, | ||
1308 | req->max_pages - req->num_pages, | 1309 | req->max_pages - req->num_pages, |
1309 | &start); | 1310 | &start); |
1310 | if (ret < 0) | 1311 | if (ret < 0) |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index e6ee5b6e8d99..f0b945ab853e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -359,7 +359,7 @@ static inline void release_metapath(struct metapath *mp) | |||
359 | * Returns: The length of the extent (minimum of one block) | 359 | * Returns: The length of the extent (minimum of one block) |
360 | */ | 360 | */ |
361 | 361 | ||
362 | static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, unsigned limit, int *eob) | 362 | static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob) |
363 | { | 363 | { |
364 | const __be64 *end = (start + len); | 364 | const __be64 *end = (start + len); |
365 | const __be64 *first = ptr; | 365 | const __be64 *first = ptr; |
@@ -449,7 +449,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | |||
449 | struct buffer_head *bh_map, struct metapath *mp, | 449 | struct buffer_head *bh_map, struct metapath *mp, |
450 | const unsigned int sheight, | 450 | const unsigned int sheight, |
451 | const unsigned int height, | 451 | const unsigned int height, |
452 | const unsigned int maxlen) | 452 | const size_t maxlen) |
453 | { | 453 | { |
454 | struct gfs2_inode *ip = GFS2_I(inode); | 454 | struct gfs2_inode *ip = GFS2_I(inode); |
455 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 455 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
@@ -483,7 +483,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | |||
483 | } else { | 483 | } else { |
484 | /* Need to allocate indirect blocks */ | 484 | /* Need to allocate indirect blocks */ |
485 | ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; | 485 | ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; |
486 | dblks = min(maxlen, ptrs_per_blk - mp->mp_list[end_of_metadata]); | 486 | dblks = min(maxlen, (size_t)(ptrs_per_blk - |
487 | mp->mp_list[end_of_metadata])); | ||
487 | if (height == ip->i_height) { | 488 | if (height == ip->i_height) { |
488 | /* Writing into existing tree, extend tree down */ | 489 | /* Writing into existing tree, extend tree down */ |
489 | iblks = height - sheight; | 490 | iblks = height - sheight; |
@@ -605,7 +606,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, | |||
605 | struct gfs2_inode *ip = GFS2_I(inode); | 606 | struct gfs2_inode *ip = GFS2_I(inode); |
606 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 607 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
607 | unsigned int bsize = sdp->sd_sb.sb_bsize; | 608 | unsigned int bsize = sdp->sd_sb.sb_bsize; |
608 | const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; | 609 | const size_t maxlen = bh_map->b_size >> inode->i_blkbits; |
609 | const u64 *arr = sdp->sd_heightsize; | 610 | const u64 *arr = sdp->sd_heightsize; |
610 | __be64 *ptr; | 611 | __be64 *ptr; |
611 | u64 size; | 612 | u64 size; |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 1a349f9a9685..5d4261ff5d23 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -2100,8 +2100,13 @@ int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name, | |||
2100 | } | 2100 | } |
2101 | if (IS_ERR(dent)) | 2101 | if (IS_ERR(dent)) |
2102 | return PTR_ERR(dent); | 2102 | return PTR_ERR(dent); |
2103 | da->bh = bh; | 2103 | |
2104 | da->dent = dent; | 2104 | if (da->save_loc) { |
2105 | da->bh = bh; | ||
2106 | da->dent = dent; | ||
2107 | } else { | ||
2108 | brelse(bh); | ||
2109 | } | ||
2105 | return 0; | 2110 | return 0; |
2106 | } | 2111 | } |
2107 | 2112 | ||
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 126c65dda028..e1b309c24dab 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
@@ -23,6 +23,7 @@ struct gfs2_diradd { | |||
23 | unsigned nr_blocks; | 23 | unsigned nr_blocks; |
24 | struct gfs2_dirent *dent; | 24 | struct gfs2_dirent *dent; |
25 | struct buffer_head *bh; | 25 | struct buffer_head *bh; |
26 | int save_loc; | ||
26 | }; | 27 | }; |
27 | 28 | ||
28 | extern struct inode *gfs2_dir_search(struct inode *dir, | 29 | extern struct inode *gfs2_dir_search(struct inode *dir, |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 2c02478a86b0..80dd44dca028 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/dlm.h> | 26 | #include <linux/dlm.h> |
27 | #include <linux/dlm_plock.h> | 27 | #include <linux/dlm_plock.h> |
28 | #include <linux/aio.h> | 28 | #include <linux/aio.h> |
29 | #include <linux/delay.h> | ||
29 | 30 | ||
30 | #include "gfs2.h" | 31 | #include "gfs2.h" |
31 | #include "incore.h" | 32 | #include "incore.h" |
@@ -959,9 +960,10 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) | |||
959 | unsigned int state; | 960 | unsigned int state; |
960 | int flags; | 961 | int flags; |
961 | int error = 0; | 962 | int error = 0; |
963 | int sleeptime; | ||
962 | 964 | ||
963 | state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; | 965 | state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; |
964 | flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT; | 966 | flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT; |
965 | 967 | ||
966 | mutex_lock(&fp->f_fl_mutex); | 968 | mutex_lock(&fp->f_fl_mutex); |
967 | 969 | ||
@@ -981,7 +983,14 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) | |||
981 | gfs2_holder_init(gl, state, flags, fl_gh); | 983 | gfs2_holder_init(gl, state, flags, fl_gh); |
982 | gfs2_glock_put(gl); | 984 | gfs2_glock_put(gl); |
983 | } | 985 | } |
984 | error = gfs2_glock_nq(fl_gh); | 986 | for (sleeptime = 1; sleeptime <= 4; sleeptime <<= 1) { |
987 | error = gfs2_glock_nq(fl_gh); | ||
988 | if (error != GLR_TRYFAILED) | ||
989 | break; | ||
990 | fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT; | ||
991 | fl_gh->gh_error = 0; | ||
992 | msleep(sleeptime); | ||
993 | } | ||
985 | if (error) { | 994 | if (error) { |
986 | gfs2_holder_uninit(fl_gh); | 995 | gfs2_holder_uninit(fl_gh); |
987 | if (error == GLR_TRYFAILED) | 996 | if (error == GLR_TRYFAILED) |
@@ -1004,7 +1013,7 @@ static void do_unflock(struct file *file, struct file_lock *fl) | |||
1004 | mutex_lock(&fp->f_fl_mutex); | 1013 | mutex_lock(&fp->f_fl_mutex); |
1005 | flock_lock_file_wait(file, fl); | 1014 | flock_lock_file_wait(file, fl); |
1006 | if (fl_gh->gh_gl) { | 1015 | if (fl_gh->gh_gl) { |
1007 | gfs2_glock_dq_wait(fl_gh); | 1016 | gfs2_glock_dq(fl_gh); |
1008 | gfs2_holder_uninit(fl_gh); | 1017 | gfs2_holder_uninit(fl_gh); |
1009 | } | 1018 | } |
1010 | mutex_unlock(&fp->f_fl_mutex); | 1019 | mutex_unlock(&fp->f_fl_mutex); |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7f513b1ceb2c..8f0c19d1d943 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -811,7 +811,7 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | |||
811 | { | 811 | { |
812 | INIT_LIST_HEAD(&gh->gh_list); | 812 | INIT_LIST_HEAD(&gh->gh_list); |
813 | gh->gh_gl = gl; | 813 | gh->gh_gl = gl; |
814 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | 814 | gh->gh_ip = _RET_IP_; |
815 | gh->gh_owner_pid = get_pid(task_pid(current)); | 815 | gh->gh_owner_pid = get_pid(task_pid(current)); |
816 | gh->gh_state = state; | 816 | gh->gh_state = state; |
817 | gh->gh_flags = flags; | 817 | gh->gh_flags = flags; |
@@ -835,7 +835,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * | |||
835 | gh->gh_state = state; | 835 | gh->gh_state = state; |
836 | gh->gh_flags = flags; | 836 | gh->gh_flags = flags; |
837 | gh->gh_iflags = 0; | 837 | gh->gh_iflags = 0; |
838 | gh->gh_ip = (unsigned long)__builtin_return_address(0); | 838 | gh->gh_ip = _RET_IP_; |
839 | if (gh->gh_owner_pid) | 839 | if (gh->gh_owner_pid) |
840 | put_pid(gh->gh_owner_pid); | 840 | put_pid(gh->gh_owner_pid); |
841 | gh->gh_owner_pid = get_pid(task_pid(current)); | 841 | gh->gh_owner_pid = get_pid(task_pid(current)); |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 2ffc67dce87f..1cc0bba6313f 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -93,7 +93,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) | |||
93 | * tr->alloced is not set since the transaction structure is | 93 | * tr->alloced is not set since the transaction structure is |
94 | * on the stack */ | 94 | * on the stack */ |
95 | tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); | 95 | tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); |
96 | tr.tr_ip = (unsigned long)__builtin_return_address(0); | 96 | tr.tr_ip = _RET_IP_; |
97 | sb_start_intwrite(sdp->sd_vfs); | 97 | sb_start_intwrite(sdp->sd_vfs); |
98 | if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) { | 98 | if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) { |
99 | sb_end_intwrite(sdp->sd_vfs); | 99 | sb_end_intwrite(sdp->sd_vfs); |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 67d310c9ada3..39e7e9959b74 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -262,6 +262,9 @@ struct gfs2_holder { | |||
262 | unsigned long gh_ip; | 262 | unsigned long gh_ip; |
263 | }; | 263 | }; |
264 | 264 | ||
265 | /* Number of quota types we support */ | ||
266 | #define GFS2_MAXQUOTAS 2 | ||
267 | |||
265 | /* Resource group multi-block reservation, in order of appearance: | 268 | /* Resource group multi-block reservation, in order of appearance: |
266 | 269 | ||
267 | Step 1. Function prepares to write, allocates a mb, sets the size hint. | 270 | Step 1. Function prepares to write, allocates a mb, sets the size hint. |
@@ -282,8 +285,8 @@ struct gfs2_blkreserv { | |||
282 | u64 rs_inum; /* Inode number for reservation */ | 285 | u64 rs_inum; /* Inode number for reservation */ |
283 | 286 | ||
284 | /* ancillary quota stuff */ | 287 | /* ancillary quota stuff */ |
285 | struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; | 288 | struct gfs2_quota_data *rs_qa_qd[2 * GFS2_MAXQUOTAS]; |
286 | struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS]; | 289 | struct gfs2_holder rs_qa_qd_ghs[2 * GFS2_MAXQUOTAS]; |
287 | unsigned int rs_qa_qd_num; | 290 | unsigned int rs_qa_qd_num; |
288 | }; | 291 | }; |
289 | 292 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index e62e59477884..fcf42eadb69c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -600,7 +600,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
600 | int error, free_vfs_inode = 0; | 600 | int error, free_vfs_inode = 0; |
601 | u32 aflags = 0; | 601 | u32 aflags = 0; |
602 | unsigned blocks = 1; | 602 | unsigned blocks = 1; |
603 | struct gfs2_diradd da = { .bh = NULL, }; | 603 | struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, }; |
604 | 604 | ||
605 | if (!name->len || name->len > GFS2_FNAMESIZE) | 605 | if (!name->len || name->len > GFS2_FNAMESIZE) |
606 | return -ENAMETOOLONG; | 606 | return -ENAMETOOLONG; |
@@ -626,8 +626,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
626 | if (!IS_ERR(inode)) { | 626 | if (!IS_ERR(inode)) { |
627 | d = d_splice_alias(inode, dentry); | 627 | d = d_splice_alias(inode, dentry); |
628 | error = PTR_ERR(d); | 628 | error = PTR_ERR(d); |
629 | if (IS_ERR(d)) | 629 | if (IS_ERR(d)) { |
630 | inode = ERR_CAST(d); | ||
630 | goto fail_gunlock; | 631 | goto fail_gunlock; |
632 | } | ||
631 | error = 0; | 633 | error = 0; |
632 | if (file) { | 634 | if (file) { |
633 | if (S_ISREG(inode->i_mode)) { | 635 | if (S_ISREG(inode->i_mode)) { |
@@ -670,6 +672,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
670 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 672 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
671 | gfs2_set_inode_blocks(inode, 1); | 673 | gfs2_set_inode_blocks(inode, 1); |
672 | munge_mode_uid_gid(dip, inode); | 674 | munge_mode_uid_gid(dip, inode); |
675 | check_and_update_goal(dip); | ||
673 | ip->i_goal = dip->i_goal; | 676 | ip->i_goal = dip->i_goal; |
674 | ip->i_diskflags = 0; | 677 | ip->i_diskflags = 0; |
675 | ip->i_eattr = 0; | 678 | ip->i_eattr = 0; |
@@ -840,8 +843,10 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
840 | int error; | 843 | int error; |
841 | 844 | ||
842 | inode = gfs2_lookupi(dir, &dentry->d_name, 0); | 845 | inode = gfs2_lookupi(dir, &dentry->d_name, 0); |
843 | if (!inode) | 846 | if (inode == NULL) { |
847 | d_add(dentry, NULL); | ||
844 | return NULL; | 848 | return NULL; |
849 | } | ||
845 | if (IS_ERR(inode)) | 850 | if (IS_ERR(inode)) |
846 | return ERR_CAST(inode); | 851 | return ERR_CAST(inode); |
847 | 852 | ||
@@ -854,7 +859,6 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, | |||
854 | 859 | ||
855 | d = d_splice_alias(inode, dentry); | 860 | d = d_splice_alias(inode, dentry); |
856 | if (IS_ERR(d)) { | 861 | if (IS_ERR(d)) { |
857 | iput(inode); | ||
858 | gfs2_glock_dq_uninit(&gh); | 862 | gfs2_glock_dq_uninit(&gh); |
859 | return d; | 863 | return d; |
860 | } | 864 | } |
@@ -896,7 +900,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
896 | struct gfs2_inode *ip = GFS2_I(inode); | 900 | struct gfs2_inode *ip = GFS2_I(inode); |
897 | struct gfs2_holder ghs[2]; | 901 | struct gfs2_holder ghs[2]; |
898 | struct buffer_head *dibh; | 902 | struct buffer_head *dibh; |
899 | struct gfs2_diradd da = { .bh = NULL, }; | 903 | struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, }; |
900 | int error; | 904 | int error; |
901 | 905 | ||
902 | if (S_ISDIR(inode->i_mode)) | 906 | if (S_ISDIR(inode->i_mode)) |
@@ -1334,7 +1338,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1334 | struct gfs2_rgrpd *nrgd; | 1338 | struct gfs2_rgrpd *nrgd; |
1335 | unsigned int num_gh; | 1339 | unsigned int num_gh; |
1336 | int dir_rename = 0; | 1340 | int dir_rename = 0; |
1337 | struct gfs2_diradd da = { .nr_blocks = 0, }; | 1341 | struct gfs2_diradd da = { .nr_blocks = 0, .save_loc = 0, }; |
1338 | unsigned int x; | 1342 | unsigned int x; |
1339 | int error; | 1343 | int error; |
1340 | 1344 | ||
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f4cb9c0d6bbd..7474c413ffd1 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -577,6 +577,13 @@ struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) | |||
577 | return rgd; | 577 | return rgd; |
578 | } | 578 | } |
579 | 579 | ||
580 | void check_and_update_goal(struct gfs2_inode *ip) | ||
581 | { | ||
582 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
583 | if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL) | ||
584 | ip->i_goal = ip->i_no_addr; | ||
585 | } | ||
586 | |||
580 | void gfs2_free_clones(struct gfs2_rgrpd *rgd) | 587 | void gfs2_free_clones(struct gfs2_rgrpd *rgd) |
581 | { | 588 | { |
582 | int x; | 589 | int x; |
@@ -1910,6 +1917,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a | |||
1910 | } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { | 1917 | } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { |
1911 | rs->rs_rbm.rgd = begin = ip->i_rgd; | 1918 | rs->rs_rbm.rgd = begin = ip->i_rgd; |
1912 | } else { | 1919 | } else { |
1920 | check_and_update_goal(ip); | ||
1913 | rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); | 1921 | rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); |
1914 | } | 1922 | } |
1915 | if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV)) | 1923 | if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV)) |
@@ -2089,7 +2097,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
2089 | u32 blen, unsigned char new_state) | 2097 | u32 blen, unsigned char new_state) |
2090 | { | 2098 | { |
2091 | struct gfs2_rbm rbm; | 2099 | struct gfs2_rbm rbm; |
2092 | struct gfs2_bitmap *bi; | 2100 | struct gfs2_bitmap *bi, *bi_prev = NULL; |
2093 | 2101 | ||
2094 | rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); | 2102 | rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); |
2095 | if (!rbm.rgd) { | 2103 | if (!rbm.rgd) { |
@@ -2098,18 +2106,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
2098 | return NULL; | 2106 | return NULL; |
2099 | } | 2107 | } |
2100 | 2108 | ||
2109 | gfs2_rbm_from_block(&rbm, bstart); | ||
2101 | while (blen--) { | 2110 | while (blen--) { |
2102 | gfs2_rbm_from_block(&rbm, bstart); | ||
2103 | bi = rbm_bi(&rbm); | 2111 | bi = rbm_bi(&rbm); |
2104 | bstart++; | 2112 | if (bi != bi_prev) { |
2105 | if (!bi->bi_clone) { | 2113 | if (!bi->bi_clone) { |
2106 | bi->bi_clone = kmalloc(bi->bi_bh->b_size, | 2114 | bi->bi_clone = kmalloc(bi->bi_bh->b_size, |
2107 | GFP_NOFS | __GFP_NOFAIL); | 2115 | GFP_NOFS | __GFP_NOFAIL); |
2108 | memcpy(bi->bi_clone + bi->bi_offset, | 2116 | memcpy(bi->bi_clone + bi->bi_offset, |
2109 | bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); | 2117 | bi->bi_bh->b_data + bi->bi_offset, |
2118 | bi->bi_len); | ||
2119 | } | ||
2120 | gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh); | ||
2121 | bi_prev = bi; | ||
2110 | } | 2122 | } |
2111 | gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh); | ||
2112 | gfs2_setbit(&rbm, false, new_state); | 2123 | gfs2_setbit(&rbm, false, new_state); |
2124 | gfs2_rbm_incr(&rbm); | ||
2113 | } | 2125 | } |
2114 | 2126 | ||
2115 | return rbm.rgd; | 2127 | return rbm.rgd; |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 463ab2e95d1c..5d8f085f7ade 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -80,4 +80,5 @@ static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs) | |||
80 | return rs && !RB_EMPTY_NODE(&rs->rs_node); | 80 | return rs && !RB_EMPTY_NODE(&rs->rs_node); |
81 | } | 81 | } |
82 | 82 | ||
83 | extern void check_and_update_goal(struct gfs2_inode *ip); | ||
83 | #endif /* __RGRP_DOT_H__ */ | 84 | #endif /* __RGRP_DOT_H__ */ |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 2607ff13d486..a346f56c4c6d 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -1294,7 +1294,7 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) | |||
1294 | int val; | 1294 | int val; |
1295 | 1295 | ||
1296 | if (is_ancestor(root, sdp->sd_master_dir)) | 1296 | if (is_ancestor(root, sdp->sd_master_dir)) |
1297 | seq_printf(s, ",meta"); | 1297 | seq_puts(s, ",meta"); |
1298 | if (args->ar_lockproto[0]) | 1298 | if (args->ar_lockproto[0]) |
1299 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); | 1299 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); |
1300 | if (args->ar_locktable[0]) | 1300 | if (args->ar_locktable[0]) |
@@ -1302,13 +1302,13 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) | |||
1302 | if (args->ar_hostdata[0]) | 1302 | if (args->ar_hostdata[0]) |
1303 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); | 1303 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); |
1304 | if (args->ar_spectator) | 1304 | if (args->ar_spectator) |
1305 | seq_printf(s, ",spectator"); | 1305 | seq_puts(s, ",spectator"); |
1306 | if (args->ar_localflocks) | 1306 | if (args->ar_localflocks) |
1307 | seq_printf(s, ",localflocks"); | 1307 | seq_puts(s, ",localflocks"); |
1308 | if (args->ar_debug) | 1308 | if (args->ar_debug) |
1309 | seq_printf(s, ",debug"); | 1309 | seq_puts(s, ",debug"); |
1310 | if (args->ar_posix_acl) | 1310 | if (args->ar_posix_acl) |
1311 | seq_printf(s, ",acl"); | 1311 | seq_puts(s, ",acl"); |
1312 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { | 1312 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { |
1313 | char *state; | 1313 | char *state; |
1314 | switch (args->ar_quota) { | 1314 | switch (args->ar_quota) { |
@@ -1328,7 +1328,7 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) | |||
1328 | seq_printf(s, ",quota=%s", state); | 1328 | seq_printf(s, ",quota=%s", state); |
1329 | } | 1329 | } |
1330 | if (args->ar_suiddir) | 1330 | if (args->ar_suiddir) |
1331 | seq_printf(s, ",suiddir"); | 1331 | seq_puts(s, ",suiddir"); |
1332 | if (args->ar_data != GFS2_DATA_DEFAULT) { | 1332 | if (args->ar_data != GFS2_DATA_DEFAULT) { |
1333 | char *state; | 1333 | char *state; |
1334 | switch (args->ar_data) { | 1334 | switch (args->ar_data) { |
@@ -1345,7 +1345,7 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) | |||
1345 | seq_printf(s, ",data=%s", state); | 1345 | seq_printf(s, ",data=%s", state); |
1346 | } | 1346 | } |
1347 | if (args->ar_discard) | 1347 | if (args->ar_discard) |
1348 | seq_printf(s, ",discard"); | 1348 | seq_puts(s, ",discard"); |
1349 | val = sdp->sd_tune.gt_logd_secs; | 1349 | val = sdp->sd_tune.gt_logd_secs; |
1350 | if (val != 30) | 1350 | if (val != 30) |
1351 | seq_printf(s, ",commit=%d", val); | 1351 | seq_printf(s, ",commit=%d", val); |
@@ -1376,11 +1376,11 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) | |||
1376 | seq_printf(s, ",errors=%s", state); | 1376 | seq_printf(s, ",errors=%s", state); |
1377 | } | 1377 | } |
1378 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) | 1378 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
1379 | seq_printf(s, ",nobarrier"); | 1379 | seq_puts(s, ",nobarrier"); |
1380 | if (test_bit(SDF_DEMOTE, &sdp->sd_flags)) | 1380 | if (test_bit(SDF_DEMOTE, &sdp->sd_flags)) |
1381 | seq_printf(s, ",demote_interface_used"); | 1381 | seq_puts(s, ",demote_interface_used"); |
1382 | if (args->ar_rgrplvb) | 1382 | if (args->ar_rgrplvb) |
1383 | seq_printf(s, ",rgrplvb"); | 1383 | seq_puts(s, ",rgrplvb"); |
1384 | return 0; | 1384 | return 0; |
1385 | } | 1385 | } |
1386 | 1386 | ||
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 0546ab4e28e8..42bfd3361979 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
@@ -44,7 +44,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | |||
44 | if (!tr) | 44 | if (!tr) |
45 | return -ENOMEM; | 45 | return -ENOMEM; |
46 | 46 | ||
47 | tr->tr_ip = (unsigned long)__builtin_return_address(0); | 47 | tr->tr_ip = _RET_IP_; |
48 | tr->tr_blocks = blocks; | 48 | tr->tr_blocks = blocks; |
49 | tr->tr_revokes = revokes; | 49 | tr->tr_revokes = revokes; |
50 | tr->tr_reserved = 1; | 50 | tr->tr_reserved = 1; |
diff --git a/fs/internal.h b/fs/internal.h index e325b4f9c799..b2623200107b 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -35,6 +35,11 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) | |||
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * buffer.c | ||
39 | */ | ||
40 | extern void guard_bio_eod(int rw, struct bio *bio); | ||
41 | |||
42 | /* | ||
38 | * char_dev.c | 43 | * char_dev.c |
39 | */ | 44 | */ |
40 | extern void __init chrdev_init(void); | 45 | extern void __init chrdev_init(void); |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 4556ce1af5b0..5ddaf8625d3b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -61,7 +61,7 @@ static void isofs_put_super(struct super_block *sb) | |||
61 | return; | 61 | return; |
62 | } | 62 | } |
63 | 63 | ||
64 | static int isofs_read_inode(struct inode *); | 64 | static int isofs_read_inode(struct inode *, int relocated); |
65 | static int isofs_statfs (struct dentry *, struct kstatfs *); | 65 | static int isofs_statfs (struct dentry *, struct kstatfs *); |
66 | 66 | ||
67 | static struct kmem_cache *isofs_inode_cachep; | 67 | static struct kmem_cache *isofs_inode_cachep; |
@@ -1259,7 +1259,7 @@ out_toomany: | |||
1259 | goto out; | 1259 | goto out; |
1260 | } | 1260 | } |
1261 | 1261 | ||
1262 | static int isofs_read_inode(struct inode *inode) | 1262 | static int isofs_read_inode(struct inode *inode, int relocated) |
1263 | { | 1263 | { |
1264 | struct super_block *sb = inode->i_sb; | 1264 | struct super_block *sb = inode->i_sb; |
1265 | struct isofs_sb_info *sbi = ISOFS_SB(sb); | 1265 | struct isofs_sb_info *sbi = ISOFS_SB(sb); |
@@ -1404,7 +1404,7 @@ static int isofs_read_inode(struct inode *inode) | |||
1404 | */ | 1404 | */ |
1405 | 1405 | ||
1406 | if (!high_sierra) { | 1406 | if (!high_sierra) { |
1407 | parse_rock_ridge_inode(de, inode); | 1407 | parse_rock_ridge_inode(de, inode, relocated); |
1408 | /* if we want uid/gid set, override the rock ridge setting */ | 1408 | /* if we want uid/gid set, override the rock ridge setting */ |
1409 | if (sbi->s_uid_set) | 1409 | if (sbi->s_uid_set) |
1410 | inode->i_uid = sbi->s_uid; | 1410 | inode->i_uid = sbi->s_uid; |
@@ -1483,9 +1483,10 @@ static int isofs_iget5_set(struct inode *ino, void *data) | |||
1483 | * offset that point to the underlying meta-data for the inode. The | 1483 | * offset that point to the underlying meta-data for the inode. The |
1484 | * code below is otherwise similar to the iget() code in | 1484 | * code below is otherwise similar to the iget() code in |
1485 | * include/linux/fs.h */ | 1485 | * include/linux/fs.h */ |
1486 | struct inode *isofs_iget(struct super_block *sb, | 1486 | struct inode *__isofs_iget(struct super_block *sb, |
1487 | unsigned long block, | 1487 | unsigned long block, |
1488 | unsigned long offset) | 1488 | unsigned long offset, |
1489 | int relocated) | ||
1489 | { | 1490 | { |
1490 | unsigned long hashval; | 1491 | unsigned long hashval; |
1491 | struct inode *inode; | 1492 | struct inode *inode; |
@@ -1507,7 +1508,7 @@ struct inode *isofs_iget(struct super_block *sb, | |||
1507 | return ERR_PTR(-ENOMEM); | 1508 | return ERR_PTR(-ENOMEM); |
1508 | 1509 | ||
1509 | if (inode->i_state & I_NEW) { | 1510 | if (inode->i_state & I_NEW) { |
1510 | ret = isofs_read_inode(inode); | 1511 | ret = isofs_read_inode(inode, relocated); |
1511 | if (ret < 0) { | 1512 | if (ret < 0) { |
1512 | iget_failed(inode); | 1513 | iget_failed(inode); |
1513 | inode = ERR_PTR(ret); | 1514 | inode = ERR_PTR(ret); |
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 99167238518d..0ac4c1f73fbd 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h | |||
@@ -107,7 +107,7 @@ extern int iso_date(char *, int); | |||
107 | 107 | ||
108 | struct inode; /* To make gcc happy */ | 108 | struct inode; /* To make gcc happy */ |
109 | 109 | ||
110 | extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *); | 110 | extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *, int relocated); |
111 | extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *); | 111 | extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *); |
112 | extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *); | 112 | extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *); |
113 | 113 | ||
@@ -118,9 +118,24 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int | |||
118 | extern struct buffer_head *isofs_bread(struct inode *, sector_t); | 118 | extern struct buffer_head *isofs_bread(struct inode *, sector_t); |
119 | extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); | 119 | extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); |
120 | 120 | ||
121 | extern struct inode *isofs_iget(struct super_block *sb, | 121 | struct inode *__isofs_iget(struct super_block *sb, |
122 | unsigned long block, | 122 | unsigned long block, |
123 | unsigned long offset); | 123 | unsigned long offset, |
124 | int relocated); | ||
125 | |||
126 | static inline struct inode *isofs_iget(struct super_block *sb, | ||
127 | unsigned long block, | ||
128 | unsigned long offset) | ||
129 | { | ||
130 | return __isofs_iget(sb, block, offset, 0); | ||
131 | } | ||
132 | |||
133 | static inline struct inode *isofs_iget_reloc(struct super_block *sb, | ||
134 | unsigned long block, | ||
135 | unsigned long offset) | ||
136 | { | ||
137 | return __isofs_iget(sb, block, offset, 1); | ||
138 | } | ||
124 | 139 | ||
125 | /* Because the inode number is no longer relevant to finding the | 140 | /* Because the inode number is no longer relevant to finding the |
126 | * underlying meta-data for an inode, we are free to choose a more | 141 | * underlying meta-data for an inode, we are free to choose a more |
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index c0bf42472e40..f488bbae541a 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c | |||
@@ -288,12 +288,16 @@ eio: | |||
288 | goto out; | 288 | goto out; |
289 | } | 289 | } |
290 | 290 | ||
291 | #define RR_REGARD_XA 1 | ||
292 | #define RR_RELOC_DE 2 | ||
293 | |||
291 | static int | 294 | static int |
292 | parse_rock_ridge_inode_internal(struct iso_directory_record *de, | 295 | parse_rock_ridge_inode_internal(struct iso_directory_record *de, |
293 | struct inode *inode, int regard_xa) | 296 | struct inode *inode, int flags) |
294 | { | 297 | { |
295 | int symlink_len = 0; | 298 | int symlink_len = 0; |
296 | int cnt, sig; | 299 | int cnt, sig; |
300 | unsigned int reloc_block; | ||
297 | struct inode *reloc; | 301 | struct inode *reloc; |
298 | struct rock_ridge *rr; | 302 | struct rock_ridge *rr; |
299 | int rootflag; | 303 | int rootflag; |
@@ -305,7 +309,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de, | |||
305 | 309 | ||
306 | init_rock_state(&rs, inode); | 310 | init_rock_state(&rs, inode); |
307 | setup_rock_ridge(de, inode, &rs); | 311 | setup_rock_ridge(de, inode, &rs); |
308 | if (regard_xa) { | 312 | if (flags & RR_REGARD_XA) { |
309 | rs.chr += 14; | 313 | rs.chr += 14; |
310 | rs.len -= 14; | 314 | rs.len -= 14; |
311 | if (rs.len < 0) | 315 | if (rs.len < 0) |
@@ -485,12 +489,22 @@ repeat: | |||
485 | "relocated directory\n"); | 489 | "relocated directory\n"); |
486 | goto out; | 490 | goto out; |
487 | case SIG('C', 'L'): | 491 | case SIG('C', 'L'): |
488 | ISOFS_I(inode)->i_first_extent = | 492 | if (flags & RR_RELOC_DE) { |
489 | isonum_733(rr->u.CL.location); | 493 | printk(KERN_ERR |
490 | reloc = | 494 | "ISOFS: Recursive directory relocation " |
491 | isofs_iget(inode->i_sb, | 495 | "is not supported\n"); |
492 | ISOFS_I(inode)->i_first_extent, | 496 | goto eio; |
493 | 0); | 497 | } |
498 | reloc_block = isonum_733(rr->u.CL.location); | ||
499 | if (reloc_block == ISOFS_I(inode)->i_iget5_block && | ||
500 | ISOFS_I(inode)->i_iget5_offset == 0) { | ||
501 | printk(KERN_ERR | ||
502 | "ISOFS: Directory relocation points to " | ||
503 | "itself\n"); | ||
504 | goto eio; | ||
505 | } | ||
506 | ISOFS_I(inode)->i_first_extent = reloc_block; | ||
507 | reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0); | ||
494 | if (IS_ERR(reloc)) { | 508 | if (IS_ERR(reloc)) { |
495 | ret = PTR_ERR(reloc); | 509 | ret = PTR_ERR(reloc); |
496 | goto out; | 510 | goto out; |
@@ -637,9 +651,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) | |||
637 | return rpnt; | 651 | return rpnt; |
638 | } | 652 | } |
639 | 653 | ||
640 | int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) | 654 | int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode, |
655 | int relocated) | ||
641 | { | 656 | { |
642 | int result = parse_rock_ridge_inode_internal(de, inode, 0); | 657 | int flags = relocated ? RR_RELOC_DE : 0; |
658 | int result = parse_rock_ridge_inode_internal(de, inode, flags); | ||
643 | 659 | ||
644 | /* | 660 | /* |
645 | * if rockridge flag was reset and we didn't look for attributes | 661 | * if rockridge flag was reset and we didn't look for attributes |
@@ -647,7 +663,8 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) | |||
647 | */ | 663 | */ |
648 | if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1) | 664 | if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1) |
649 | && (ISOFS_SB(inode->i_sb)->s_rock == 2)) { | 665 | && (ISOFS_SB(inode->i_sb)->s_rock == 2)) { |
650 | result = parse_rock_ridge_inode_internal(de, inode, 14); | 666 | result = parse_rock_ridge_inode_internal(de, inode, |
667 | flags | RR_REGARD_XA); | ||
651 | } | 668 | } |
652 | return result; | 669 | return result; |
653 | } | 670 | } |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6fac74349856..b73e0215baa7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) | |||
97 | struct commit_header *h; | 97 | struct commit_header *h; |
98 | __u32 csum; | 98 | __u32 csum; |
99 | 99 | ||
100 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 100 | if (!jbd2_journal_has_csum_v2or3(j)) |
101 | return; | 101 | return; |
102 | 102 | ||
103 | h = (struct commit_header *)(bh->b_data); | 103 | h = (struct commit_header *)(bh->b_data); |
@@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) | |||
313 | return checksum; | 313 | return checksum; |
314 | } | 314 | } |
315 | 315 | ||
316 | static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, | 316 | static void write_tag_block(journal_t *j, journal_block_tag_t *tag, |
317 | unsigned long long block) | 317 | unsigned long long block) |
318 | { | 318 | { |
319 | tag->t_blocknr = cpu_to_be32(block & (u32)~0); | 319 | tag->t_blocknr = cpu_to_be32(block & (u32)~0); |
320 | if (tag_bytes > JBD2_TAG_SIZE32) | 320 | if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT)) |
321 | tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); | 321 | tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); |
322 | } | 322 | } |
323 | 323 | ||
@@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j, | |||
327 | struct jbd2_journal_block_tail *tail; | 327 | struct jbd2_journal_block_tail *tail; |
328 | __u32 csum; | 328 | __u32 csum; |
329 | 329 | ||
330 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 330 | if (!jbd2_journal_has_csum_v2or3(j)) |
331 | return; | 331 | return; |
332 | 332 | ||
333 | tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - | 333 | tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - |
@@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j, | |||
340 | static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, | 340 | static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, |
341 | struct buffer_head *bh, __u32 sequence) | 341 | struct buffer_head *bh, __u32 sequence) |
342 | { | 342 | { |
343 | journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; | ||
343 | struct page *page = bh->b_page; | 344 | struct page *page = bh->b_page; |
344 | __u8 *addr; | 345 | __u8 *addr; |
345 | __u32 csum32; | 346 | __u32 csum32; |
346 | __be32 seq; | 347 | __be32 seq; |
347 | 348 | ||
348 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 349 | if (!jbd2_journal_has_csum_v2or3(j)) |
349 | return; | 350 | return; |
350 | 351 | ||
351 | seq = cpu_to_be32(sequence); | 352 | seq = cpu_to_be32(sequence); |
@@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, | |||
355 | bh->b_size); | 356 | bh->b_size); |
356 | kunmap_atomic(addr); | 357 | kunmap_atomic(addr); |
357 | 358 | ||
358 | /* We only have space to store the lower 16 bits of the crc32c. */ | 359 | if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) |
359 | tag->t_checksum = cpu_to_be16(csum32); | 360 | tag3->t_checksum = cpu_to_be32(csum32); |
361 | else | ||
362 | tag->t_checksum = cpu_to_be16(csum32); | ||
360 | } | 363 | } |
361 | /* | 364 | /* |
362 | * jbd2_journal_commit_transaction | 365 | * jbd2_journal_commit_transaction |
@@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
396 | LIST_HEAD(io_bufs); | 399 | LIST_HEAD(io_bufs); |
397 | LIST_HEAD(log_bufs); | 400 | LIST_HEAD(log_bufs); |
398 | 401 | ||
399 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 402 | if (jbd2_journal_has_csum_v2or3(journal)) |
400 | csum_size = sizeof(struct jbd2_journal_block_tail); | 403 | csum_size = sizeof(struct jbd2_journal_block_tail); |
401 | 404 | ||
402 | /* | 405 | /* |
@@ -690,7 +693,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
690 | tag_flag |= JBD2_FLAG_SAME_UUID; | 693 | tag_flag |= JBD2_FLAG_SAME_UUID; |
691 | 694 | ||
692 | tag = (journal_block_tag_t *) tagp; | 695 | tag = (journal_block_tag_t *) tagp; |
693 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); | 696 | write_tag_block(journal, tag, jh2bh(jh)->b_blocknr); |
694 | tag->t_flags = cpu_to_be16(tag_flag); | 697 | tag->t_flags = cpu_to_be16(tag_flag); |
695 | jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], | 698 | jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], |
696 | commit_transaction->t_tid); | 699 | commit_transaction->t_tid); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 67b8e303946c..19d74d86d99c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug); | |||
124 | /* Checksumming functions */ | 124 | /* Checksumming functions */ |
125 | static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) | 125 | static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) |
126 | { | 126 | { |
127 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 127 | if (!jbd2_journal_has_csum_v2or3(j)) |
128 | return 1; | 128 | return 1; |
129 | 129 | ||
130 | return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; | 130 | return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; |
@@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) | |||
145 | 145 | ||
146 | static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) | 146 | static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) |
147 | { | 147 | { |
148 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 148 | if (!jbd2_journal_has_csum_v2or3(j)) |
149 | return 1; | 149 | return 1; |
150 | 150 | ||
151 | return sb->s_checksum == jbd2_superblock_csum(j, sb); | 151 | return sb->s_checksum == jbd2_superblock_csum(j, sb); |
@@ -153,7 +153,7 @@ static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) | |||
153 | 153 | ||
154 | static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) | 154 | static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) |
155 | { | 155 | { |
156 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 156 | if (!jbd2_journal_has_csum_v2or3(j)) |
157 | return; | 157 | return; |
158 | 158 | ||
159 | sb->s_checksum = jbd2_superblock_csum(j, sb); | 159 | sb->s_checksum = jbd2_superblock_csum(j, sb); |
@@ -1522,21 +1522,29 @@ static int journal_get_superblock(journal_t *journal) | |||
1522 | goto out; | 1522 | goto out; |
1523 | } | 1523 | } |
1524 | 1524 | ||
1525 | if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && | 1525 | if (jbd2_journal_has_csum_v2or3(journal) && |
1526 | JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { | 1526 | JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { |
1527 | /* Can't have checksum v1 and v2 on at the same time! */ | 1527 | /* Can't have checksum v1 and v2 on at the same time! */ |
1528 | printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " | 1528 | printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " |
1529 | "at the same time!\n"); | 1529 | "at the same time!\n"); |
1530 | goto out; | 1530 | goto out; |
1531 | } | 1531 | } |
1532 | 1532 | ||
1533 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && | ||
1534 | JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { | ||
1535 | /* Can't have checksum v2 and v3 at the same time! */ | ||
1536 | printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 " | ||
1537 | "at the same time!\n"); | ||
1538 | goto out; | ||
1539 | } | ||
1540 | |||
1533 | if (!jbd2_verify_csum_type(journal, sb)) { | 1541 | if (!jbd2_verify_csum_type(journal, sb)) { |
1534 | printk(KERN_ERR "JBD2: Unknown checksum type\n"); | 1542 | printk(KERN_ERR "JBD2: Unknown checksum type\n"); |
1535 | goto out; | 1543 | goto out; |
1536 | } | 1544 | } |
1537 | 1545 | ||
1538 | /* Load the checksum driver */ | 1546 | /* Load the checksum driver */ |
1539 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { | 1547 | if (jbd2_journal_has_csum_v2or3(journal)) { |
1540 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); | 1548 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); |
1541 | if (IS_ERR(journal->j_chksum_driver)) { | 1549 | if (IS_ERR(journal->j_chksum_driver)) { |
1542 | printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); | 1550 | printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); |
@@ -1553,7 +1561,7 @@ static int journal_get_superblock(journal_t *journal) | |||
1553 | } | 1561 | } |
1554 | 1562 | ||
1555 | /* Precompute checksum seed for all metadata */ | 1563 | /* Precompute checksum seed for all metadata */ |
1556 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 1564 | if (jbd2_journal_has_csum_v2or3(journal)) |
1557 | journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, | 1565 | journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, |
1558 | sizeof(sb->s_uuid)); | 1566 | sizeof(sb->s_uuid)); |
1559 | 1567 | ||
@@ -1813,8 +1821,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, | |||
1813 | if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) | 1821 | if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) |
1814 | return 0; | 1822 | return 0; |
1815 | 1823 | ||
1816 | /* Asking for checksumming v2 and v1? Only give them v2. */ | 1824 | /* If enabling v2 checksums, turn on v3 instead */ |
1817 | if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && | 1825 | if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) { |
1826 | incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2; | ||
1827 | incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3; | ||
1828 | } | ||
1829 | |||
1830 | /* Asking for checksumming v3 and v1? Only give them v3. */ | ||
1831 | if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 && | ||
1818 | compat & JBD2_FEATURE_COMPAT_CHECKSUM) | 1832 | compat & JBD2_FEATURE_COMPAT_CHECKSUM) |
1819 | compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; | 1833 | compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; |
1820 | 1834 | ||
@@ -1823,8 +1837,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, | |||
1823 | 1837 | ||
1824 | sb = journal->j_superblock; | 1838 | sb = journal->j_superblock; |
1825 | 1839 | ||
1826 | /* If enabling v2 checksums, update superblock */ | 1840 | /* If enabling v3 checksums, update superblock */ |
1827 | if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { | 1841 | if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { |
1828 | sb->s_checksum_type = JBD2_CRC32C_CHKSUM; | 1842 | sb->s_checksum_type = JBD2_CRC32C_CHKSUM; |
1829 | sb->s_feature_compat &= | 1843 | sb->s_feature_compat &= |
1830 | ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); | 1844 | ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); |
@@ -1842,8 +1856,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, | |||
1842 | } | 1856 | } |
1843 | 1857 | ||
1844 | /* Precompute checksum seed for all metadata */ | 1858 | /* Precompute checksum seed for all metadata */ |
1845 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 1859 | if (jbd2_journal_has_csum_v2or3(journal)) |
1846 | JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
1847 | journal->j_csum_seed = jbd2_chksum(journal, ~0, | 1860 | journal->j_csum_seed = jbd2_chksum(journal, ~0, |
1848 | sb->s_uuid, | 1861 | sb->s_uuid, |
1849 | sizeof(sb->s_uuid)); | 1862 | sizeof(sb->s_uuid)); |
@@ -1852,7 +1865,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, | |||
1852 | /* If enabling v1 checksums, downgrade superblock */ | 1865 | /* If enabling v1 checksums, downgrade superblock */ |
1853 | if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) | 1866 | if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) |
1854 | sb->s_feature_incompat &= | 1867 | sb->s_feature_incompat &= |
1855 | ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); | 1868 | ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 | |
1869 | JBD2_FEATURE_INCOMPAT_CSUM_V3); | ||
1856 | 1870 | ||
1857 | sb->s_feature_compat |= cpu_to_be32(compat); | 1871 | sb->s_feature_compat |= cpu_to_be32(compat); |
1858 | sb->s_feature_ro_compat |= cpu_to_be32(ro); | 1872 | sb->s_feature_ro_compat |= cpu_to_be32(ro); |
@@ -2165,16 +2179,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode) | |||
2165 | */ | 2179 | */ |
2166 | size_t journal_tag_bytes(journal_t *journal) | 2180 | size_t journal_tag_bytes(journal_t *journal) |
2167 | { | 2181 | { |
2168 | journal_block_tag_t tag; | 2182 | size_t sz; |
2169 | size_t x = 0; | 2183 | |
2184 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) | ||
2185 | return sizeof(journal_block_tag3_t); | ||
2186 | |||
2187 | sz = sizeof(journal_block_tag_t); | ||
2170 | 2188 | ||
2171 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 2189 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
2172 | x += sizeof(tag.t_checksum); | 2190 | sz += sizeof(__u16); |
2173 | 2191 | ||
2174 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) | 2192 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) |
2175 | return x + JBD2_TAG_SIZE64; | 2193 | return sz; |
2176 | else | 2194 | else |
2177 | return x + JBD2_TAG_SIZE32; | 2195 | return sz - sizeof(__u32); |
2178 | } | 2196 | } |
2179 | 2197 | ||
2180 | /* | 2198 | /* |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 3b6bb19d60b1..9b329b55ffe3 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j, | |||
181 | __be32 provided; | 181 | __be32 provided; |
182 | __u32 calculated; | 182 | __u32 calculated; |
183 | 183 | ||
184 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 184 | if (!jbd2_journal_has_csum_v2or3(j)) |
185 | return 1; | 185 | return 1; |
186 | 186 | ||
187 | tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - | 187 | tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - |
@@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) | |||
205 | int nr = 0, size = journal->j_blocksize; | 205 | int nr = 0, size = journal->j_blocksize; |
206 | int tag_bytes = journal_tag_bytes(journal); | 206 | int tag_bytes = journal_tag_bytes(journal); |
207 | 207 | ||
208 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 208 | if (jbd2_journal_has_csum_v2or3(journal)) |
209 | size -= sizeof(struct jbd2_journal_block_tail); | 209 | size -= sizeof(struct jbd2_journal_block_tail); |
210 | 210 | ||
211 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 211 | tagp = &bh->b_data[sizeof(journal_header_t)]; |
@@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal) | |||
338 | return err; | 338 | return err; |
339 | } | 339 | } |
340 | 340 | ||
341 | static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) | 341 | static inline unsigned long long read_tag_block(journal_t *journal, |
342 | journal_block_tag_t *tag) | ||
342 | { | 343 | { |
343 | unsigned long long block = be32_to_cpu(tag->t_blocknr); | 344 | unsigned long long block = be32_to_cpu(tag->t_blocknr); |
344 | if (tag_bytes > JBD2_TAG_SIZE32) | 345 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) |
345 | block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; | 346 | block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; |
346 | return block; | 347 | return block; |
347 | } | 348 | } |
@@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) | |||
384 | __be32 provided; | 385 | __be32 provided; |
385 | __u32 calculated; | 386 | __u32 calculated; |
386 | 387 | ||
387 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 388 | if (!jbd2_journal_has_csum_v2or3(j)) |
388 | return 1; | 389 | return 1; |
389 | 390 | ||
390 | h = buf; | 391 | h = buf; |
@@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) | |||
399 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, | 400 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, |
400 | void *buf, __u32 sequence) | 401 | void *buf, __u32 sequence) |
401 | { | 402 | { |
403 | journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; | ||
402 | __u32 csum32; | 404 | __u32 csum32; |
403 | __be32 seq; | 405 | __be32 seq; |
404 | 406 | ||
405 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 407 | if (!jbd2_journal_has_csum_v2or3(j)) |
406 | return 1; | 408 | return 1; |
407 | 409 | ||
408 | seq = cpu_to_be32(sequence); | 410 | seq = cpu_to_be32(sequence); |
409 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); | 411 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); |
410 | csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); | 412 | csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); |
411 | 413 | ||
412 | return tag->t_checksum == cpu_to_be16(csum32); | 414 | if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) |
415 | return tag3->t_checksum == cpu_to_be32(csum32); | ||
416 | else | ||
417 | return tag->t_checksum == cpu_to_be16(csum32); | ||
413 | } | 418 | } |
414 | 419 | ||
415 | static int do_one_pass(journal_t *journal, | 420 | static int do_one_pass(journal_t *journal, |
@@ -426,6 +431,7 @@ static int do_one_pass(journal_t *journal, | |||
426 | int tag_bytes = journal_tag_bytes(journal); | 431 | int tag_bytes = journal_tag_bytes(journal); |
427 | __u32 crc32_sum = ~0; /* Transactional Checksums */ | 432 | __u32 crc32_sum = ~0; /* Transactional Checksums */ |
428 | int descr_csum_size = 0; | 433 | int descr_csum_size = 0; |
434 | int block_error = 0; | ||
429 | 435 | ||
430 | /* | 436 | /* |
431 | * First thing is to establish what we expect to find in the log | 437 | * First thing is to establish what we expect to find in the log |
@@ -512,8 +518,7 @@ static int do_one_pass(journal_t *journal, | |||
512 | switch(blocktype) { | 518 | switch(blocktype) { |
513 | case JBD2_DESCRIPTOR_BLOCK: | 519 | case JBD2_DESCRIPTOR_BLOCK: |
514 | /* Verify checksum first */ | 520 | /* Verify checksum first */ |
515 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 521 | if (jbd2_journal_has_csum_v2or3(journal)) |
516 | JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
517 | descr_csum_size = | 522 | descr_csum_size = |
518 | sizeof(struct jbd2_journal_block_tail); | 523 | sizeof(struct jbd2_journal_block_tail); |
519 | if (descr_csum_size > 0 && | 524 | if (descr_csum_size > 0 && |
@@ -574,7 +579,7 @@ static int do_one_pass(journal_t *journal, | |||
574 | unsigned long long blocknr; | 579 | unsigned long long blocknr; |
575 | 580 | ||
576 | J_ASSERT(obh != NULL); | 581 | J_ASSERT(obh != NULL); |
577 | blocknr = read_tag_block(tag_bytes, | 582 | blocknr = read_tag_block(journal, |
578 | tag); | 583 | tag); |
579 | 584 | ||
580 | /* If the block has been | 585 | /* If the block has been |
@@ -598,7 +603,8 @@ static int do_one_pass(journal_t *journal, | |||
598 | "checksum recovering " | 603 | "checksum recovering " |
599 | "block %llu in log\n", | 604 | "block %llu in log\n", |
600 | blocknr); | 605 | blocknr); |
601 | continue; | 606 | block_error = 1; |
607 | goto skip_write; | ||
602 | } | 608 | } |
603 | 609 | ||
604 | /* Find a buffer for the new | 610 | /* Find a buffer for the new |
@@ -797,7 +803,8 @@ static int do_one_pass(journal_t *journal, | |||
797 | success = -EIO; | 803 | success = -EIO; |
798 | } | 804 | } |
799 | } | 805 | } |
800 | 806 | if (block_error && success == 0) | |
807 | success = -EIO; | ||
801 | return success; | 808 | return success; |
802 | 809 | ||
803 | failed: | 810 | failed: |
@@ -811,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, | |||
811 | __be32 provided; | 818 | __be32 provided; |
812 | __u32 calculated; | 819 | __u32 calculated; |
813 | 820 | ||
814 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 821 | if (!jbd2_journal_has_csum_v2or3(j)) |
815 | return 1; | 822 | return 1; |
816 | 823 | ||
817 | tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - | 824 | tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 198c9c10276d..d5e95a175c92 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -91,8 +91,8 @@ | |||
91 | #include <linux/list.h> | 91 | #include <linux/list.h> |
92 | #include <linux/init.h> | 92 | #include <linux/init.h> |
93 | #include <linux/bio.h> | 93 | #include <linux/bio.h> |
94 | #endif | ||
95 | #include <linux/log2.h> | 94 | #include <linux/log2.h> |
95 | #endif | ||
96 | 96 | ||
97 | static struct kmem_cache *jbd2_revoke_record_cache; | 97 | static struct kmem_cache *jbd2_revoke_record_cache; |
98 | static struct kmem_cache *jbd2_revoke_table_cache; | 98 | static struct kmem_cache *jbd2_revoke_table_cache; |
@@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
597 | offset = *offsetp; | 597 | offset = *offsetp; |
598 | 598 | ||
599 | /* Do we need to leave space at the end for a checksum? */ | 599 | /* Do we need to leave space at the end for a checksum? */ |
600 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 600 | if (jbd2_journal_has_csum_v2or3(journal)) |
601 | csum_size = sizeof(struct jbd2_journal_revoke_tail); | 601 | csum_size = sizeof(struct jbd2_journal_revoke_tail); |
602 | 602 | ||
603 | /* Make sure we have a descriptor with space left for the record */ | 603 | /* Make sure we have a descriptor with space left for the record */ |
@@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) | |||
644 | struct jbd2_journal_revoke_tail *tail; | 644 | struct jbd2_journal_revoke_tail *tail; |
645 | __u32 csum; | 645 | __u32 csum; |
646 | 646 | ||
647 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 647 | if (!jbd2_journal_has_csum_v2or3(j)) |
648 | return; | 648 | return; |
649 | 649 | ||
650 | tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - | 650 | tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - |
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile index ca58d64374ca..9b320cc2a8cf 100644 --- a/fs/lockd/Makefile +++ b/fs/lockd/Makefile | |||
@@ -5,6 +5,7 @@ | |||
5 | obj-$(CONFIG_LOCKD) += lockd.o | 5 | obj-$(CONFIG_LOCKD) += lockd.o |
6 | 6 | ||
7 | lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \ | 7 | lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \ |
8 | svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o | 8 | svcshare.o svcproc.o svcsubs.o mon.o xdr.o |
9 | lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o | 9 | lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o |
10 | lockd-objs-$(CONFIG_PROC_FS) += procfs.o | ||
10 | lockd-objs := $(lockd-objs-y) | 11 | lockd-objs := $(lockd-objs-y) |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index daa8e7514eae..9106f42c472c 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -159,6 +159,12 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, | |||
159 | 159 | ||
160 | msg.rpc_proc = &clnt->cl_procinfo[proc]; | 160 | msg.rpc_proc = &clnt->cl_procinfo[proc]; |
161 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); | 161 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); |
162 | if (status == -ECONNREFUSED) { | ||
163 | dprintk("lockd: NSM upcall RPC failed, status=%d, forcing rebind\n", | ||
164 | status); | ||
165 | rpc_force_rebind(clnt); | ||
166 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); | ||
167 | } | ||
162 | if (status < 0) | 168 | if (status < 0) |
163 | dprintk("lockd: NSM upcall RPC failed, status=%d\n", | 169 | dprintk("lockd: NSM upcall RPC failed, status=%d\n", |
164 | status); | 170 | status); |
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 5010b55628b4..097bfa3adb1c 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h | |||
@@ -11,7 +11,6 @@ struct lockd_net { | |||
11 | 11 | ||
12 | struct delayed_work grace_period_end; | 12 | struct delayed_work grace_period_end; |
13 | struct lock_manager lockd_manager; | 13 | struct lock_manager lockd_manager; |
14 | struct list_head grace_list; | ||
15 | 14 | ||
16 | spinlock_t nsm_clnt_lock; | 15 | spinlock_t nsm_clnt_lock; |
17 | unsigned int nsm_users; | 16 | unsigned int nsm_users; |
diff --git a/fs/lockd/procfs.c b/fs/lockd/procfs.c new file mode 100644 index 000000000000..2a0a98480e39 --- /dev/null +++ b/fs/lockd/procfs.c | |||
@@ -0,0 +1,92 @@ | |||
1 | /* | ||
2 | * Procfs support for lockd | ||
3 | * | ||
4 | * Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com> | ||
5 | */ | ||
6 | |||
7 | #include <linux/fs.h> | ||
8 | #include <linux/proc_fs.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/nsproxy.h> | ||
11 | #include <net/net_namespace.h> | ||
12 | |||
13 | #include "netns.h" | ||
14 | #include "procfs.h" | ||
15 | |||
16 | /* | ||
17 | * We only allow strings that start with 'Y', 'y', or '1'. | ||
18 | */ | ||
19 | static ssize_t | ||
20 | nlm_end_grace_write(struct file *file, const char __user *buf, size_t size, | ||
21 | loff_t *pos) | ||
22 | { | ||
23 | char *data; | ||
24 | struct lockd_net *ln = net_generic(current->nsproxy->net_ns, | ||
25 | lockd_net_id); | ||
26 | |||
27 | if (size < 1) | ||
28 | return -EINVAL; | ||
29 | |||
30 | data = simple_transaction_get(file, buf, size); | ||
31 | if (IS_ERR(data)) | ||
32 | return PTR_ERR(data); | ||
33 | |||
34 | switch(data[0]) { | ||
35 | case 'Y': | ||
36 | case 'y': | ||
37 | case '1': | ||
38 | locks_end_grace(&ln->lockd_manager); | ||
39 | break; | ||
40 | default: | ||
41 | return -EINVAL; | ||
42 | } | ||
43 | |||
44 | return size; | ||
45 | } | ||
46 | |||
47 | static ssize_t | ||
48 | nlm_end_grace_read(struct file *file, char __user *buf, size_t size, | ||
49 | loff_t *pos) | ||
50 | { | ||
51 | struct lockd_net *ln = net_generic(current->nsproxy->net_ns, | ||
52 | lockd_net_id); | ||
53 | char resp[3]; | ||
54 | |||
55 | resp[0] = list_empty(&ln->lockd_manager.list) ? 'Y' : 'N'; | ||
56 | resp[1] = '\n'; | ||
57 | resp[2] = '\0'; | ||
58 | |||
59 | return simple_read_from_buffer(buf, size, pos, resp, sizeof(resp)); | ||
60 | } | ||
61 | |||
62 | static const struct file_operations lockd_end_grace_operations = { | ||
63 | .write = nlm_end_grace_write, | ||
64 | .read = nlm_end_grace_read, | ||
65 | .llseek = default_llseek, | ||
66 | .release = simple_transaction_release, | ||
67 | .owner = THIS_MODULE, | ||
68 | }; | ||
69 | |||
70 | int __init | ||
71 | lockd_create_procfs(void) | ||
72 | { | ||
73 | struct proc_dir_entry *entry; | ||
74 | |||
75 | entry = proc_mkdir("fs/lockd", NULL); | ||
76 | if (!entry) | ||
77 | return -ENOMEM; | ||
78 | entry = proc_create("nlm_end_grace", S_IRUGO|S_IWUSR, entry, | ||
79 | &lockd_end_grace_operations); | ||
80 | if (!entry) { | ||
81 | remove_proc_entry("fs/lockd", NULL); | ||
82 | return -ENOMEM; | ||
83 | } | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | void __exit | ||
88 | lockd_remove_procfs(void) | ||
89 | { | ||
90 | remove_proc_entry("fs/lockd/nlm_end_grace", NULL); | ||
91 | remove_proc_entry("fs/lockd", NULL); | ||
92 | } | ||
diff --git a/fs/lockd/procfs.h b/fs/lockd/procfs.h new file mode 100644 index 000000000000..2257a1311027 --- /dev/null +++ b/fs/lockd/procfs.h | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | * Procfs support for lockd | ||
3 | * | ||
4 | * Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com> | ||
5 | */ | ||
6 | #ifndef _LOCKD_PROCFS_H | ||
7 | #define _LOCKD_PROCFS_H | ||
8 | |||
9 | #include <linux/kconfig.h> | ||
10 | |||
11 | #if IS_ENABLED(CONFIG_PROC_FS) | ||
12 | int lockd_create_procfs(void); | ||
13 | void lockd_remove_procfs(void); | ||
14 | #else | ||
15 | static inline int | ||
16 | lockd_create_procfs(void) | ||
17 | { | ||
18 | return 0; | ||
19 | } | ||
20 | |||
21 | static inline void | ||
22 | lockd_remove_procfs(void) | ||
23 | { | ||
24 | return; | ||
25 | } | ||
26 | #endif /* IS_ENABLED(CONFIG_PROC_FS) */ | ||
27 | |||
28 | #endif /* _LOCKD_PROCFS_H */ | ||
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 09857b48d0c3..d1bb7ecfd201 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/nfs.h> | 36 | #include <linux/nfs.h> |
37 | 37 | ||
38 | #include "netns.h" | 38 | #include "netns.h" |
39 | #include "procfs.h" | ||
39 | 40 | ||
40 | #define NLMDBG_FACILITY NLMDBG_SVC | 41 | #define NLMDBG_FACILITY NLMDBG_SVC |
41 | #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) | 42 | #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) |
@@ -253,13 +254,11 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net) | |||
253 | 254 | ||
254 | error = make_socks(serv, net); | 255 | error = make_socks(serv, net); |
255 | if (error < 0) | 256 | if (error < 0) |
256 | goto err_socks; | 257 | goto err_bind; |
257 | set_grace_period(net); | 258 | set_grace_period(net); |
258 | dprintk("lockd_up_net: per-net data created; net=%p\n", net); | 259 | dprintk("lockd_up_net: per-net data created; net=%p\n", net); |
259 | return 0; | 260 | return 0; |
260 | 261 | ||
261 | err_socks: | ||
262 | svc_rpcb_cleanup(serv, net); | ||
263 | err_bind: | 262 | err_bind: |
264 | ln->nlmsvc_users--; | 263 | ln->nlmsvc_users--; |
265 | return error; | 264 | return error; |
@@ -586,7 +585,7 @@ static int lockd_init_net(struct net *net) | |||
586 | struct lockd_net *ln = net_generic(net, lockd_net_id); | 585 | struct lockd_net *ln = net_generic(net, lockd_net_id); |
587 | 586 | ||
588 | INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); | 587 | INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); |
589 | INIT_LIST_HEAD(&ln->grace_list); | 588 | INIT_LIST_HEAD(&ln->lockd_manager.list); |
590 | spin_lock_init(&ln->nsm_clnt_lock); | 589 | spin_lock_init(&ln->nsm_clnt_lock); |
591 | return 0; | 590 | return 0; |
592 | } | 591 | } |
@@ -620,8 +619,15 @@ static int __init init_nlm(void) | |||
620 | err = register_pernet_subsys(&lockd_net_ops); | 619 | err = register_pernet_subsys(&lockd_net_ops); |
621 | if (err) | 620 | if (err) |
622 | goto err_pernet; | 621 | goto err_pernet; |
622 | |||
623 | err = lockd_create_procfs(); | ||
624 | if (err) | ||
625 | goto err_procfs; | ||
626 | |||
623 | return 0; | 627 | return 0; |
624 | 628 | ||
629 | err_procfs: | ||
630 | unregister_pernet_subsys(&lockd_net_ops); | ||
625 | err_pernet: | 631 | err_pernet: |
626 | #ifdef CONFIG_SYSCTL | 632 | #ifdef CONFIG_SYSCTL |
627 | unregister_sysctl_table(nlm_sysctl_table); | 633 | unregister_sysctl_table(nlm_sysctl_table); |
@@ -634,6 +640,7 @@ static void __exit exit_nlm(void) | |||
634 | { | 640 | { |
635 | /* FIXME: delete all NLM clients */ | 641 | /* FIXME: delete all NLM clients */ |
636 | nlm_shutdown_hosts(); | 642 | nlm_shutdown_hosts(); |
643 | lockd_remove_procfs(); | ||
637 | unregister_pernet_subsys(&lockd_net_ops); | 644 | unregister_pernet_subsys(&lockd_net_ops); |
638 | #ifdef CONFIG_SYSCTL | 645 | #ifdef CONFIG_SYSCTL |
639 | unregister_sysctl_table(nlm_sysctl_table); | 646 | unregister_sysctl_table(nlm_sysctl_table); |
diff --git a/fs/mpage.c b/fs/mpage.c index 5f9ed622274f..3e79220babac 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/backing-dev.h> | 28 | #include <linux/backing-dev.h> |
29 | #include <linux/pagevec.h> | 29 | #include <linux/pagevec.h> |
30 | #include <linux/cleancache.h> | 30 | #include <linux/cleancache.h> |
31 | #include "internal.h" | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * I/O completion handler for multipage BIOs. | 34 | * I/O completion handler for multipage BIOs. |
@@ -57,6 +58,7 @@ static void mpage_end_io(struct bio *bio, int err) | |||
57 | static struct bio *mpage_bio_submit(int rw, struct bio *bio) | 58 | static struct bio *mpage_bio_submit(int rw, struct bio *bio) |
58 | { | 59 | { |
59 | bio->bi_end_io = mpage_end_io; | 60 | bio->bi_end_io = mpage_end_io; |
61 | guard_bio_eod(rw, bio); | ||
60 | submit_bio(rw, bio); | 62 | submit_bio(rw, bio); |
61 | return NULL; | 63 | return NULL; |
62 | } | 64 | } |
diff --git a/fs/namei.c b/fs/namei.c index a996bb48dfab..a7b05bf82d31 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/device_cgroup.h> | 34 | #include <linux/device_cgroup.h> |
35 | #include <linux/fs_struct.h> | 35 | #include <linux/fs_struct.h> |
36 | #include <linux/posix_acl.h> | 36 | #include <linux/posix_acl.h> |
37 | #include <linux/hash.h> | ||
37 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
38 | 39 | ||
39 | #include "internal.h" | 40 | #include "internal.h" |
@@ -643,24 +644,22 @@ static int complete_walk(struct nameidata *nd) | |||
643 | 644 | ||
644 | static __always_inline void set_root(struct nameidata *nd) | 645 | static __always_inline void set_root(struct nameidata *nd) |
645 | { | 646 | { |
646 | if (!nd->root.mnt) | 647 | get_fs_root(current->fs, &nd->root); |
647 | get_fs_root(current->fs, &nd->root); | ||
648 | } | 648 | } |
649 | 649 | ||
650 | static int link_path_walk(const char *, struct nameidata *); | 650 | static int link_path_walk(const char *, struct nameidata *); |
651 | 651 | ||
652 | static __always_inline void set_root_rcu(struct nameidata *nd) | 652 | static __always_inline unsigned set_root_rcu(struct nameidata *nd) |
653 | { | 653 | { |
654 | if (!nd->root.mnt) { | 654 | struct fs_struct *fs = current->fs; |
655 | struct fs_struct *fs = current->fs; | 655 | unsigned seq, res; |
656 | unsigned seq; | ||
657 | 656 | ||
658 | do { | 657 | do { |
659 | seq = read_seqcount_begin(&fs->seq); | 658 | seq = read_seqcount_begin(&fs->seq); |
660 | nd->root = fs->root; | 659 | nd->root = fs->root; |
661 | nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq); | 660 | res = __read_seqcount_begin(&nd->root.dentry->d_seq); |
662 | } while (read_seqcount_retry(&fs->seq, seq)); | 661 | } while (read_seqcount_retry(&fs->seq, seq)); |
663 | } | 662 | return res; |
664 | } | 663 | } |
665 | 664 | ||
666 | static void path_put_conditional(struct path *path, struct nameidata *nd) | 665 | static void path_put_conditional(struct path *path, struct nameidata *nd) |
@@ -860,7 +859,8 @@ follow_link(struct path *link, struct nameidata *nd, void **p) | |||
860 | return PTR_ERR(s); | 859 | return PTR_ERR(s); |
861 | } | 860 | } |
862 | if (*s == '/') { | 861 | if (*s == '/') { |
863 | set_root(nd); | 862 | if (!nd->root.mnt) |
863 | set_root(nd); | ||
864 | path_put(&nd->path); | 864 | path_put(&nd->path); |
865 | nd->path = nd->root; | 865 | nd->path = nd->root; |
866 | path_get(&nd->root); | 866 | path_get(&nd->root); |
@@ -1137,13 +1137,15 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | |||
1137 | */ | 1137 | */ |
1138 | *inode = path->dentry->d_inode; | 1138 | *inode = path->dentry->d_inode; |
1139 | } | 1139 | } |
1140 | return read_seqretry(&mount_lock, nd->m_seq) && | 1140 | return !read_seqretry(&mount_lock, nd->m_seq) && |
1141 | !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); | 1141 | !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); |
1142 | } | 1142 | } |
1143 | 1143 | ||
1144 | static int follow_dotdot_rcu(struct nameidata *nd) | 1144 | static int follow_dotdot_rcu(struct nameidata *nd) |
1145 | { | 1145 | { |
1146 | set_root_rcu(nd); | 1146 | struct inode *inode = nd->inode; |
1147 | if (!nd->root.mnt) | ||
1148 | set_root_rcu(nd); | ||
1147 | 1149 | ||
1148 | while (1) { | 1150 | while (1) { |
1149 | if (nd->path.dentry == nd->root.dentry && | 1151 | if (nd->path.dentry == nd->root.dentry && |
@@ -1155,6 +1157,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
1155 | struct dentry *parent = old->d_parent; | 1157 | struct dentry *parent = old->d_parent; |
1156 | unsigned seq; | 1158 | unsigned seq; |
1157 | 1159 | ||
1160 | inode = parent->d_inode; | ||
1158 | seq = read_seqcount_begin(&parent->d_seq); | 1161 | seq = read_seqcount_begin(&parent->d_seq); |
1159 | if (read_seqcount_retry(&old->d_seq, nd->seq)) | 1162 | if (read_seqcount_retry(&old->d_seq, nd->seq)) |
1160 | goto failed; | 1163 | goto failed; |
@@ -1164,6 +1167,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
1164 | } | 1167 | } |
1165 | if (!follow_up_rcu(&nd->path)) | 1168 | if (!follow_up_rcu(&nd->path)) |
1166 | break; | 1169 | break; |
1170 | inode = nd->path.dentry->d_inode; | ||
1167 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | 1171 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); |
1168 | } | 1172 | } |
1169 | while (d_mountpoint(nd->path.dentry)) { | 1173 | while (d_mountpoint(nd->path.dentry)) { |
@@ -1173,11 +1177,12 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
1173 | break; | 1177 | break; |
1174 | nd->path.mnt = &mounted->mnt; | 1178 | nd->path.mnt = &mounted->mnt; |
1175 | nd->path.dentry = mounted->mnt.mnt_root; | 1179 | nd->path.dentry = mounted->mnt.mnt_root; |
1180 | inode = nd->path.dentry->d_inode; | ||
1176 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | 1181 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); |
1177 | if (!read_seqretry(&mount_lock, nd->m_seq)) | 1182 | if (read_seqretry(&mount_lock, nd->m_seq)) |
1178 | goto failed; | 1183 | goto failed; |
1179 | } | 1184 | } |
1180 | nd->inode = nd->path.dentry->d_inode; | 1185 | nd->inode = inode; |
1181 | return 0; | 1186 | return 0; |
1182 | 1187 | ||
1183 | failed: | 1188 | failed: |
@@ -1256,7 +1261,8 @@ static void follow_mount(struct path *path) | |||
1256 | 1261 | ||
1257 | static void follow_dotdot(struct nameidata *nd) | 1262 | static void follow_dotdot(struct nameidata *nd) |
1258 | { | 1263 | { |
1259 | set_root(nd); | 1264 | if (!nd->root.mnt) |
1265 | set_root(nd); | ||
1260 | 1266 | ||
1261 | while(1) { | 1267 | while(1) { |
1262 | struct dentry *old = nd->path.dentry; | 1268 | struct dentry *old = nd->path.dentry; |
@@ -1634,8 +1640,7 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) | |||
1634 | 1640 | ||
1635 | static inline unsigned int fold_hash(unsigned long hash) | 1641 | static inline unsigned int fold_hash(unsigned long hash) |
1636 | { | 1642 | { |
1637 | hash += hash >> (8*sizeof(int)); | 1643 | return hash_64(hash, 32); |
1638 | return hash; | ||
1639 | } | 1644 | } |
1640 | 1645 | ||
1641 | #else /* 32-bit case */ | 1646 | #else /* 32-bit case */ |
@@ -1669,9 +1674,9 @@ EXPORT_SYMBOL(full_name_hash); | |||
1669 | 1674 | ||
1670 | /* | 1675 | /* |
1671 | * Calculate the length and hash of the path component, and | 1676 | * Calculate the length and hash of the path component, and |
1672 | * return the length of the component; | 1677 | * return the "hash_len" as the result. |
1673 | */ | 1678 | */ |
1674 | static inline unsigned long hash_name(const char *name, unsigned int *hashp) | 1679 | static inline u64 hash_name(const char *name) |
1675 | { | 1680 | { |
1676 | unsigned long a, b, adata, bdata, mask, hash, len; | 1681 | unsigned long a, b, adata, bdata, mask, hash, len; |
1677 | const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; | 1682 | const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; |
@@ -1691,9 +1696,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) | |||
1691 | mask = create_zero_mask(adata | bdata); | 1696 | mask = create_zero_mask(adata | bdata); |
1692 | 1697 | ||
1693 | hash += a & zero_bytemask(mask); | 1698 | hash += a & zero_bytemask(mask); |
1694 | *hashp = fold_hash(hash); | 1699 | len += find_zero(mask); |
1695 | 1700 | return hashlen_create(fold_hash(hash), len); | |
1696 | return len + find_zero(mask); | ||
1697 | } | 1701 | } |
1698 | 1702 | ||
1699 | #else | 1703 | #else |
@@ -1711,7 +1715,7 @@ EXPORT_SYMBOL(full_name_hash); | |||
1711 | * We know there's a real path component here of at least | 1715 | * We know there's a real path component here of at least |
1712 | * one character. | 1716 | * one character. |
1713 | */ | 1717 | */ |
1714 | static inline unsigned long hash_name(const char *name, unsigned int *hashp) | 1718 | static inline u64 hash_name(const char *name) |
1715 | { | 1719 | { |
1716 | unsigned long hash = init_name_hash(); | 1720 | unsigned long hash = init_name_hash(); |
1717 | unsigned long len = 0, c; | 1721 | unsigned long len = 0, c; |
@@ -1722,8 +1726,7 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) | |||
1722 | hash = partial_name_hash(c, hash); | 1726 | hash = partial_name_hash(c, hash); |
1723 | c = (unsigned char)name[len]; | 1727 | c = (unsigned char)name[len]; |
1724 | } while (c && c != '/'); | 1728 | } while (c && c != '/'); |
1725 | *hashp = end_name_hash(hash); | 1729 | return hashlen_create(end_name_hash(hash), len); |
1726 | return len; | ||
1727 | } | 1730 | } |
1728 | 1731 | ||
1729 | #endif | 1732 | #endif |
@@ -1748,20 +1751,17 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
1748 | 1751 | ||
1749 | /* At this point we know we have a real path component. */ | 1752 | /* At this point we know we have a real path component. */ |
1750 | for(;;) { | 1753 | for(;;) { |
1751 | struct qstr this; | 1754 | u64 hash_len; |
1752 | long len; | ||
1753 | int type; | 1755 | int type; |
1754 | 1756 | ||
1755 | err = may_lookup(nd); | 1757 | err = may_lookup(nd); |
1756 | if (err) | 1758 | if (err) |
1757 | break; | 1759 | break; |
1758 | 1760 | ||
1759 | len = hash_name(name, &this.hash); | 1761 | hash_len = hash_name(name); |
1760 | this.name = name; | ||
1761 | this.len = len; | ||
1762 | 1762 | ||
1763 | type = LAST_NORM; | 1763 | type = LAST_NORM; |
1764 | if (name[0] == '.') switch (len) { | 1764 | if (name[0] == '.') switch (hashlen_len(hash_len)) { |
1765 | case 2: | 1765 | case 2: |
1766 | if (name[1] == '.') { | 1766 | if (name[1] == '.') { |
1767 | type = LAST_DOTDOT; | 1767 | type = LAST_DOTDOT; |
@@ -1775,29 +1775,32 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
1775 | struct dentry *parent = nd->path.dentry; | 1775 | struct dentry *parent = nd->path.dentry; |
1776 | nd->flags &= ~LOOKUP_JUMPED; | 1776 | nd->flags &= ~LOOKUP_JUMPED; |
1777 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { | 1777 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { |
1778 | struct qstr this = { { .hash_len = hash_len }, .name = name }; | ||
1778 | err = parent->d_op->d_hash(parent, &this); | 1779 | err = parent->d_op->d_hash(parent, &this); |
1779 | if (err < 0) | 1780 | if (err < 0) |
1780 | break; | 1781 | break; |
1782 | hash_len = this.hash_len; | ||
1783 | name = this.name; | ||
1781 | } | 1784 | } |
1782 | } | 1785 | } |
1783 | 1786 | ||
1784 | nd->last = this; | 1787 | nd->last.hash_len = hash_len; |
1788 | nd->last.name = name; | ||
1785 | nd->last_type = type; | 1789 | nd->last_type = type; |
1786 | 1790 | ||
1787 | if (!name[len]) | 1791 | name += hashlen_len(hash_len); |
1792 | if (!*name) | ||
1788 | return 0; | 1793 | return 0; |
1789 | /* | 1794 | /* |
1790 | * If it wasn't NUL, we know it was '/'. Skip that | 1795 | * If it wasn't NUL, we know it was '/'. Skip that |
1791 | * slash, and continue until no more slashes. | 1796 | * slash, and continue until no more slashes. |
1792 | */ | 1797 | */ |
1793 | do { | 1798 | do { |
1794 | len++; | 1799 | name++; |
1795 | } while (unlikely(name[len] == '/')); | 1800 | } while (unlikely(*name == '/')); |
1796 | if (!name[len]) | 1801 | if (!*name) |
1797 | return 0; | 1802 | return 0; |
1798 | 1803 | ||
1799 | name += len; | ||
1800 | |||
1801 | err = walk_component(nd, &next, LOOKUP_FOLLOW); | 1804 | err = walk_component(nd, &next, LOOKUP_FOLLOW); |
1802 | if (err < 0) | 1805 | if (err < 0) |
1803 | return err; | 1806 | return err; |
@@ -1852,7 +1855,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1852 | if (*name=='/') { | 1855 | if (*name=='/') { |
1853 | if (flags & LOOKUP_RCU) { | 1856 | if (flags & LOOKUP_RCU) { |
1854 | rcu_read_lock(); | 1857 | rcu_read_lock(); |
1855 | set_root_rcu(nd); | 1858 | nd->seq = set_root_rcu(nd); |
1856 | } else { | 1859 | } else { |
1857 | set_root(nd); | 1860 | set_root(nd); |
1858 | path_get(&nd->root); | 1861 | path_get(&nd->root); |
@@ -1903,7 +1906,14 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1903 | } | 1906 | } |
1904 | 1907 | ||
1905 | nd->inode = nd->path.dentry->d_inode; | 1908 | nd->inode = nd->path.dentry->d_inode; |
1906 | return 0; | 1909 | if (!(flags & LOOKUP_RCU)) |
1910 | return 0; | ||
1911 | if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq))) | ||
1912 | return 0; | ||
1913 | if (!(nd->flags & LOOKUP_ROOT)) | ||
1914 | nd->root.mnt = NULL; | ||
1915 | rcu_read_unlock(); | ||
1916 | return -ECHILD; | ||
1907 | } | 1917 | } |
1908 | 1918 | ||
1909 | static inline int lookup_last(struct nameidata *nd, struct path *path) | 1919 | static inline int lookup_last(struct nameidata *nd, struct path *path) |
diff --git a/fs/namespace.c b/fs/namespace.c index a01c7730e9af..ef42d9bee212 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1217,6 +1217,11 @@ static void namespace_unlock(void) | |||
1217 | head.first->pprev = &head.first; | 1217 | head.first->pprev = &head.first; |
1218 | INIT_HLIST_HEAD(&unmounted); | 1218 | INIT_HLIST_HEAD(&unmounted); |
1219 | 1219 | ||
1220 | /* undo decrements we'd done in umount_tree() */ | ||
1221 | hlist_for_each_entry(mnt, &head, mnt_hash) | ||
1222 | if (mnt->mnt_ex_mountpoint.mnt) | ||
1223 | mntget(mnt->mnt_ex_mountpoint.mnt); | ||
1224 | |||
1220 | up_write(&namespace_sem); | 1225 | up_write(&namespace_sem); |
1221 | 1226 | ||
1222 | synchronize_rcu(); | 1227 | synchronize_rcu(); |
@@ -1253,6 +1258,9 @@ void umount_tree(struct mount *mnt, int how) | |||
1253 | hlist_add_head(&p->mnt_hash, &tmp_list); | 1258 | hlist_add_head(&p->mnt_hash, &tmp_list); |
1254 | } | 1259 | } |
1255 | 1260 | ||
1261 | hlist_for_each_entry(p, &tmp_list, mnt_hash) | ||
1262 | list_del_init(&p->mnt_child); | ||
1263 | |||
1256 | if (how) | 1264 | if (how) |
1257 | propagate_umount(&tmp_list); | 1265 | propagate_umount(&tmp_list); |
1258 | 1266 | ||
@@ -1263,9 +1271,9 @@ void umount_tree(struct mount *mnt, int how) | |||
1263 | p->mnt_ns = NULL; | 1271 | p->mnt_ns = NULL; |
1264 | if (how < 2) | 1272 | if (how < 2) |
1265 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; | 1273 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; |
1266 | list_del_init(&p->mnt_child); | ||
1267 | if (mnt_has_parent(p)) { | 1274 | if (mnt_has_parent(p)) { |
1268 | put_mountpoint(p->mnt_mp); | 1275 | put_mountpoint(p->mnt_mp); |
1276 | mnt_add_count(p->mnt_parent, -1); | ||
1269 | /* move the reference to mountpoint into ->mnt_ex_mountpoint */ | 1277 | /* move the reference to mountpoint into ->mnt_ex_mountpoint */ |
1270 | p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; | 1278 | p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; |
1271 | p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; | 1279 | p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; |
diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile index d5815505c020..3ca14c36d08b 100644 --- a/fs/nfs/blocklayout/Makefile +++ b/fs/nfs/blocklayout/Makefile | |||
@@ -2,4 +2,5 @@ | |||
2 | # Makefile for the pNFS block layout driver kernel module | 2 | # Makefile for the pNFS block layout driver kernel module |
3 | # | 3 | # |
4 | obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o | 4 | obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o |
5 | blocklayoutdriver-objs := blocklayout.o extents.o blocklayoutdev.o blocklayoutdm.o | 5 | |
6 | blocklayoutdriver-y += blocklayout.o dev.o extent_tree.o rpc_pipefs.o | ||
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index cbb1797149d5..5228f201d3d5 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -35,7 +35,6 @@ | |||
35 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
36 | #include <linux/namei.h> | 36 | #include <linux/namei.h> |
37 | #include <linux/bio.h> /* struct bio */ | 37 | #include <linux/bio.h> /* struct bio */ |
38 | #include <linux/buffer_head.h> /* various write calls */ | ||
39 | #include <linux/prefetch.h> | 38 | #include <linux/prefetch.h> |
40 | #include <linux/pagevec.h> | 39 | #include <linux/pagevec.h> |
41 | 40 | ||
@@ -50,40 +49,16 @@ MODULE_LICENSE("GPL"); | |||
50 | MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); | 49 | MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); |
51 | MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); | 50 | MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); |
52 | 51 | ||
53 | static void print_page(struct page *page) | 52 | static bool is_hole(struct pnfs_block_extent *be) |
54 | { | 53 | { |
55 | dprintk("PRINTPAGE page %p\n", page); | 54 | switch (be->be_state) { |
56 | dprintk(" PagePrivate %d\n", PagePrivate(page)); | 55 | case PNFS_BLOCK_NONE_DATA: |
57 | dprintk(" PageUptodate %d\n", PageUptodate(page)); | 56 | return true; |
58 | dprintk(" PageError %d\n", PageError(page)); | 57 | case PNFS_BLOCK_INVALID_DATA: |
59 | dprintk(" PageDirty %d\n", PageDirty(page)); | 58 | return be->be_tag ? false : true; |
60 | dprintk(" PageReferenced %d\n", PageReferenced(page)); | 59 | default: |
61 | dprintk(" PageLocked %d\n", PageLocked(page)); | 60 | return false; |
62 | dprintk(" PageWriteback %d\n", PageWriteback(page)); | 61 | } |
63 | dprintk(" PageMappedToDisk %d\n", PageMappedToDisk(page)); | ||
64 | dprintk("\n"); | ||
65 | } | ||
66 | |||
67 | /* Given the be associated with isect, determine if page data needs to be | ||
68 | * initialized. | ||
69 | */ | ||
70 | static int is_hole(struct pnfs_block_extent *be, sector_t isect) | ||
71 | { | ||
72 | if (be->be_state == PNFS_BLOCK_NONE_DATA) | ||
73 | return 1; | ||
74 | else if (be->be_state != PNFS_BLOCK_INVALID_DATA) | ||
75 | return 0; | ||
76 | else | ||
77 | return !bl_is_sector_init(be->be_inval, isect); | ||
78 | } | ||
79 | |||
80 | /* Given the be associated with isect, determine if page data can be | ||
81 | * written to disk. | ||
82 | */ | ||
83 | static int is_writable(struct pnfs_block_extent *be, sector_t isect) | ||
84 | { | ||
85 | return (be->be_state == PNFS_BLOCK_READWRITE_DATA || | ||
86 | be->be_state == PNFS_BLOCK_INVALID_DATA); | ||
87 | } | 62 | } |
88 | 63 | ||
89 | /* The data we are handed might be spread across several bios. We need | 64 | /* The data we are handed might be spread across several bios. We need |
@@ -91,9 +66,8 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect) | |||
91 | */ | 66 | */ |
92 | struct parallel_io { | 67 | struct parallel_io { |
93 | struct kref refcnt; | 68 | struct kref refcnt; |
94 | void (*pnfs_callback) (void *data, int num_se); | 69 | void (*pnfs_callback) (void *data); |
95 | void *data; | 70 | void *data; |
96 | int bse_count; | ||
97 | }; | 71 | }; |
98 | 72 | ||
99 | static inline struct parallel_io *alloc_parallel(void *data) | 73 | static inline struct parallel_io *alloc_parallel(void *data) |
@@ -104,7 +78,6 @@ static inline struct parallel_io *alloc_parallel(void *data) | |||
104 | if (rv) { | 78 | if (rv) { |
105 | rv->data = data; | 79 | rv->data = data; |
106 | kref_init(&rv->refcnt); | 80 | kref_init(&rv->refcnt); |
107 | rv->bse_count = 0; | ||
108 | } | 81 | } |
109 | return rv; | 82 | return rv; |
110 | } | 83 | } |
@@ -119,7 +92,7 @@ static void destroy_parallel(struct kref *kref) | |||
119 | struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); | 92 | struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); |
120 | 93 | ||
121 | dprintk("%s enter\n", __func__); | 94 | dprintk("%s enter\n", __func__); |
122 | p->pnfs_callback(p->data, p->bse_count); | 95 | p->pnfs_callback(p->data); |
123 | kfree(p); | 96 | kfree(p); |
124 | } | 97 | } |
125 | 98 | ||
@@ -141,10 +114,9 @@ bl_submit_bio(int rw, struct bio *bio) | |||
141 | return NULL; | 114 | return NULL; |
142 | } | 115 | } |
143 | 116 | ||
144 | static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | 117 | static struct bio * |
145 | struct pnfs_block_extent *be, | 118 | bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, |
146 | void (*end_io)(struct bio *, int err), | 119 | void (*end_io)(struct bio *, int err), struct parallel_io *par) |
147 | struct parallel_io *par) | ||
148 | { | 120 | { |
149 | struct bio *bio; | 121 | struct bio *bio; |
150 | 122 | ||
@@ -156,58 +128,64 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | |||
156 | } | 128 | } |
157 | 129 | ||
158 | if (bio) { | 130 | if (bio) { |
159 | bio->bi_iter.bi_sector = isect - be->be_f_offset + | 131 | bio->bi_iter.bi_sector = disk_sector; |
160 | be->be_v_offset; | 132 | bio->bi_bdev = bdev; |
161 | bio->bi_bdev = be->be_mdev; | ||
162 | bio->bi_end_io = end_io; | 133 | bio->bi_end_io = end_io; |
163 | bio->bi_private = par; | 134 | bio->bi_private = par; |
164 | } | 135 | } |
165 | return bio; | 136 | return bio; |
166 | } | 137 | } |
167 | 138 | ||
168 | static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, | 139 | static struct bio * |
169 | sector_t isect, struct page *page, | 140 | do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, |
170 | struct pnfs_block_extent *be, | 141 | struct page *page, struct pnfs_block_dev_map *map, |
171 | void (*end_io)(struct bio *, int err), | 142 | struct pnfs_block_extent *be, |
172 | struct parallel_io *par, | 143 | void (*end_io)(struct bio *, int err), |
173 | unsigned int offset, int len) | 144 | struct parallel_io *par, unsigned int offset, int *len) |
174 | { | 145 | { |
175 | isect = isect + (offset >> SECTOR_SHIFT); | 146 | struct pnfs_block_dev *dev = |
147 | container_of(be->be_device, struct pnfs_block_dev, node); | ||
148 | u64 disk_addr, end; | ||
149 | |||
176 | dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, | 150 | dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, |
177 | npg, rw, (unsigned long long)isect, offset, len); | 151 | npg, rw, (unsigned long long)isect, offset, *len); |
152 | |||
153 | /* translate to device offset */ | ||
154 | isect += be->be_v_offset; | ||
155 | isect -= be->be_f_offset; | ||
156 | |||
157 | /* translate to physical disk offset */ | ||
158 | disk_addr = (u64)isect << SECTOR_SHIFT; | ||
159 | if (disk_addr < map->start || disk_addr >= map->start + map->len) { | ||
160 | if (!dev->map(dev, disk_addr, map)) | ||
161 | return ERR_PTR(-EIO); | ||
162 | bio = bl_submit_bio(rw, bio); | ||
163 | } | ||
164 | disk_addr += map->disk_offset; | ||
165 | disk_addr -= map->start; | ||
166 | |||
167 | /* limit length to what the device mapping allows */ | ||
168 | end = disk_addr + *len; | ||
169 | if (end >= map->start + map->len) | ||
170 | *len = map->start + map->len - disk_addr; | ||
171 | |||
178 | retry: | 172 | retry: |
179 | if (!bio) { | 173 | if (!bio) { |
180 | bio = bl_alloc_init_bio(npg, isect, be, end_io, par); | 174 | bio = bl_alloc_init_bio(npg, map->bdev, |
175 | disk_addr >> SECTOR_SHIFT, end_io, par); | ||
181 | if (!bio) | 176 | if (!bio) |
182 | return ERR_PTR(-ENOMEM); | 177 | return ERR_PTR(-ENOMEM); |
183 | } | 178 | } |
184 | if (bio_add_page(bio, page, len, offset) < len) { | 179 | if (bio_add_page(bio, page, *len, offset) < *len) { |
185 | bio = bl_submit_bio(rw, bio); | 180 | bio = bl_submit_bio(rw, bio); |
186 | goto retry; | 181 | goto retry; |
187 | } | 182 | } |
188 | return bio; | 183 | return bio; |
189 | } | 184 | } |
190 | 185 | ||
191 | static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, | ||
192 | sector_t isect, struct page *page, | ||
193 | struct pnfs_block_extent *be, | ||
194 | void (*end_io)(struct bio *, int err), | ||
195 | struct parallel_io *par) | ||
196 | { | ||
197 | return do_add_page_to_bio(bio, npg, rw, isect, page, be, | ||
198 | end_io, par, 0, PAGE_CACHE_SIZE); | ||
199 | } | ||
200 | |||
201 | /* This is basically copied from mpage_end_io_read */ | ||
202 | static void bl_end_io_read(struct bio *bio, int err) | 186 | static void bl_end_io_read(struct bio *bio, int err) |
203 | { | 187 | { |
204 | struct parallel_io *par = bio->bi_private; | 188 | struct parallel_io *par = bio->bi_private; |
205 | struct bio_vec *bvec; | ||
206 | int i; | ||
207 | |||
208 | if (!err) | ||
209 | bio_for_each_segment_all(bvec, bio, i) | ||
210 | SetPageUptodate(bvec->bv_page); | ||
211 | 189 | ||
212 | if (err) { | 190 | if (err) { |
213 | struct nfs_pgio_header *header = par->data; | 191 | struct nfs_pgio_header *header = par->data; |
@@ -216,6 +194,7 @@ static void bl_end_io_read(struct bio *bio, int err) | |||
216 | header->pnfs_error = -EIO; | 194 | header->pnfs_error = -EIO; |
217 | pnfs_set_lo_fail(header->lseg); | 195 | pnfs_set_lo_fail(header->lseg); |
218 | } | 196 | } |
197 | |||
219 | bio_put(bio); | 198 | bio_put(bio); |
220 | put_parallel(par); | 199 | put_parallel(par); |
221 | } | 200 | } |
@@ -231,7 +210,7 @@ static void bl_read_cleanup(struct work_struct *work) | |||
231 | } | 210 | } |
232 | 211 | ||
233 | static void | 212 | static void |
234 | bl_end_par_io_read(void *data, int unused) | 213 | bl_end_par_io_read(void *data) |
235 | { | 214 | { |
236 | struct nfs_pgio_header *hdr = data; | 215 | struct nfs_pgio_header *hdr = data; |
237 | 216 | ||
@@ -241,88 +220,78 @@ bl_end_par_io_read(void *data, int unused) | |||
241 | } | 220 | } |
242 | 221 | ||
243 | static enum pnfs_try_status | 222 | static enum pnfs_try_status |
244 | bl_read_pagelist(struct nfs_pgio_header *hdr) | 223 | bl_read_pagelist(struct nfs_pgio_header *header) |
245 | { | 224 | { |
246 | struct nfs_pgio_header *header = hdr; | 225 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); |
247 | int i, hole; | 226 | struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; |
248 | struct bio *bio = NULL; | 227 | struct bio *bio = NULL; |
249 | struct pnfs_block_extent *be = NULL, *cow_read = NULL; | 228 | struct pnfs_block_extent be; |
250 | sector_t isect, extent_length = 0; | 229 | sector_t isect, extent_length = 0; |
251 | struct parallel_io *par; | 230 | struct parallel_io *par; |
252 | loff_t f_offset = hdr->args.offset; | 231 | loff_t f_offset = header->args.offset; |
253 | size_t bytes_left = hdr->args.count; | 232 | size_t bytes_left = header->args.count; |
254 | unsigned int pg_offset, pg_len; | 233 | unsigned int pg_offset, pg_len; |
255 | struct page **pages = hdr->args.pages; | 234 | struct page **pages = header->args.pages; |
256 | int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT; | 235 | int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; |
257 | const bool is_dio = (header->dreq != NULL); | 236 | const bool is_dio = (header->dreq != NULL); |
237 | struct blk_plug plug; | ||
238 | int i; | ||
258 | 239 | ||
259 | dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, | 240 | dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, |
260 | hdr->page_array.npages, f_offset, | 241 | header->page_array.npages, f_offset, |
261 | (unsigned int)hdr->args.count); | 242 | (unsigned int)header->args.count); |
262 | 243 | ||
263 | par = alloc_parallel(hdr); | 244 | par = alloc_parallel(header); |
264 | if (!par) | 245 | if (!par) |
265 | goto use_mds; | 246 | return PNFS_NOT_ATTEMPTED; |
266 | par->pnfs_callback = bl_end_par_io_read; | 247 | par->pnfs_callback = bl_end_par_io_read; |
267 | /* At this point, we can no longer jump to use_mds */ | 248 | |
249 | blk_start_plug(&plug); | ||
268 | 250 | ||
269 | isect = (sector_t) (f_offset >> SECTOR_SHIFT); | 251 | isect = (sector_t) (f_offset >> SECTOR_SHIFT); |
270 | /* Code assumes extents are page-aligned */ | 252 | /* Code assumes extents are page-aligned */ |
271 | for (i = pg_index; i < hdr->page_array.npages; i++) { | 253 | for (i = pg_index; i < header->page_array.npages; i++) { |
272 | if (!extent_length) { | 254 | if (extent_length <= 0) { |
273 | /* We've used up the previous extent */ | 255 | /* We've used up the previous extent */ |
274 | bl_put_extent(be); | ||
275 | bl_put_extent(cow_read); | ||
276 | bio = bl_submit_bio(READ, bio); | 256 | bio = bl_submit_bio(READ, bio); |
257 | |||
277 | /* Get the next one */ | 258 | /* Get the next one */ |
278 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), | 259 | if (!ext_tree_lookup(bl, isect, &be, false)) { |
279 | isect, &cow_read); | ||
280 | if (!be) { | ||
281 | header->pnfs_error = -EIO; | 260 | header->pnfs_error = -EIO; |
282 | goto out; | 261 | goto out; |
283 | } | 262 | } |
284 | extent_length = be->be_length - | 263 | extent_length = be.be_length - (isect - be.be_f_offset); |
285 | (isect - be->be_f_offset); | ||
286 | if (cow_read) { | ||
287 | sector_t cow_length = cow_read->be_length - | ||
288 | (isect - cow_read->be_f_offset); | ||
289 | extent_length = min(extent_length, cow_length); | ||
290 | } | ||
291 | } | 264 | } |
292 | 265 | ||
266 | pg_offset = f_offset & ~PAGE_CACHE_MASK; | ||
293 | if (is_dio) { | 267 | if (is_dio) { |
294 | pg_offset = f_offset & ~PAGE_CACHE_MASK; | ||
295 | if (pg_offset + bytes_left > PAGE_CACHE_SIZE) | 268 | if (pg_offset + bytes_left > PAGE_CACHE_SIZE) |
296 | pg_len = PAGE_CACHE_SIZE - pg_offset; | 269 | pg_len = PAGE_CACHE_SIZE - pg_offset; |
297 | else | 270 | else |
298 | pg_len = bytes_left; | 271 | pg_len = bytes_left; |
299 | |||
300 | f_offset += pg_len; | ||
301 | bytes_left -= pg_len; | ||
302 | isect += (pg_offset >> SECTOR_SHIFT); | ||
303 | } else { | 272 | } else { |
304 | pg_offset = 0; | 273 | BUG_ON(pg_offset != 0); |
305 | pg_len = PAGE_CACHE_SIZE; | 274 | pg_len = PAGE_CACHE_SIZE; |
306 | } | 275 | } |
307 | 276 | ||
308 | hole = is_hole(be, isect); | 277 | isect += (pg_offset >> SECTOR_SHIFT); |
309 | if (hole && !cow_read) { | 278 | extent_length -= (pg_offset >> SECTOR_SHIFT); |
279 | |||
280 | if (is_hole(&be)) { | ||
310 | bio = bl_submit_bio(READ, bio); | 281 | bio = bl_submit_bio(READ, bio); |
311 | /* Fill hole w/ zeroes w/o accessing device */ | 282 | /* Fill hole w/ zeroes w/o accessing device */ |
312 | dprintk("%s Zeroing page for hole\n", __func__); | 283 | dprintk("%s Zeroing page for hole\n", __func__); |
313 | zero_user_segment(pages[i], pg_offset, pg_len); | 284 | zero_user_segment(pages[i], pg_offset, pg_len); |
314 | print_page(pages[i]); | ||
315 | SetPageUptodate(pages[i]); | ||
316 | } else { | ||
317 | struct pnfs_block_extent *be_read; | ||
318 | 285 | ||
319 | be_read = (hole && cow_read) ? cow_read : be; | 286 | /* invalidate map */ |
287 | map.start = NFS4_MAX_UINT64; | ||
288 | } else { | ||
320 | bio = do_add_page_to_bio(bio, | 289 | bio = do_add_page_to_bio(bio, |
321 | hdr->page_array.npages - i, | 290 | header->page_array.npages - i, |
322 | READ, | 291 | READ, |
323 | isect, pages[i], be_read, | 292 | isect, pages[i], &map, &be, |
324 | bl_end_io_read, par, | 293 | bl_end_io_read, par, |
325 | pg_offset, pg_len); | 294 | pg_offset, &pg_len); |
326 | if (IS_ERR(bio)) { | 295 | if (IS_ERR(bio)) { |
327 | header->pnfs_error = PTR_ERR(bio); | 296 | header->pnfs_error = PTR_ERR(bio); |
328 | bio = NULL; | 297 | bio = NULL; |
@@ -330,75 +299,21 @@ bl_read_pagelist(struct nfs_pgio_header *hdr) | |||
330 | } | 299 | } |
331 | } | 300 | } |
332 | isect += (pg_len >> SECTOR_SHIFT); | 301 | isect += (pg_len >> SECTOR_SHIFT); |
333 | extent_length -= PAGE_CACHE_SECTORS; | 302 | extent_length -= (pg_len >> SECTOR_SHIFT); |
303 | f_offset += pg_len; | ||
304 | bytes_left -= pg_len; | ||
334 | } | 305 | } |
335 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { | 306 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { |
336 | hdr->res.eof = 1; | 307 | header->res.eof = 1; |
337 | hdr->res.count = header->inode->i_size - hdr->args.offset; | 308 | header->res.count = header->inode->i_size - header->args.offset; |
338 | } else { | 309 | } else { |
339 | hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset; | 310 | header->res.count = (isect << SECTOR_SHIFT) - header->args.offset; |
340 | } | 311 | } |
341 | out: | 312 | out: |
342 | bl_put_extent(be); | ||
343 | bl_put_extent(cow_read); | ||
344 | bl_submit_bio(READ, bio); | 313 | bl_submit_bio(READ, bio); |
314 | blk_finish_plug(&plug); | ||
345 | put_parallel(par); | 315 | put_parallel(par); |
346 | return PNFS_ATTEMPTED; | 316 | return PNFS_ATTEMPTED; |
347 | |||
348 | use_mds: | ||
349 | dprintk("Giving up and using normal NFS\n"); | ||
350 | return PNFS_NOT_ATTEMPTED; | ||
351 | } | ||
352 | |||
353 | static void mark_extents_written(struct pnfs_block_layout *bl, | ||
354 | __u64 offset, __u32 count) | ||
355 | { | ||
356 | sector_t isect, end; | ||
357 | struct pnfs_block_extent *be; | ||
358 | struct pnfs_block_short_extent *se; | ||
359 | |||
360 | dprintk("%s(%llu, %u)\n", __func__, offset, count); | ||
361 | if (count == 0) | ||
362 | return; | ||
363 | isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT; | ||
364 | end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK); | ||
365 | end >>= SECTOR_SHIFT; | ||
366 | while (isect < end) { | ||
367 | sector_t len; | ||
368 | be = bl_find_get_extent(bl, isect, NULL); | ||
369 | BUG_ON(!be); /* FIXME */ | ||
370 | len = min(end, be->be_f_offset + be->be_length) - isect; | ||
371 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
372 | se = bl_pop_one_short_extent(be->be_inval); | ||
373 | BUG_ON(!se); | ||
374 | bl_mark_for_commit(be, isect, len, se); | ||
375 | } | ||
376 | isect += len; | ||
377 | bl_put_extent(be); | ||
378 | } | ||
379 | } | ||
380 | |||
381 | static void bl_end_io_write_zero(struct bio *bio, int err) | ||
382 | { | ||
383 | struct parallel_io *par = bio->bi_private; | ||
384 | struct bio_vec *bvec; | ||
385 | int i; | ||
386 | |||
387 | bio_for_each_segment_all(bvec, bio, i) { | ||
388 | /* This is the zeroing page we added */ | ||
389 | end_page_writeback(bvec->bv_page); | ||
390 | page_cache_release(bvec->bv_page); | ||
391 | } | ||
392 | |||
393 | if (unlikely(err)) { | ||
394 | struct nfs_pgio_header *header = par->data; | ||
395 | |||
396 | if (!header->pnfs_error) | ||
397 | header->pnfs_error = -EIO; | ||
398 | pnfs_set_lo_fail(header->lseg); | ||
399 | } | ||
400 | bio_put(bio); | ||
401 | put_parallel(par); | ||
402 | } | 317 | } |
403 | 318 | ||
404 | static void bl_end_io_write(struct bio *bio, int err) | 319 | static void bl_end_io_write(struct bio *bio, int err) |
@@ -421,533 +336,118 @@ static void bl_end_io_write(struct bio *bio, int err) | |||
421 | */ | 336 | */ |
422 | static void bl_write_cleanup(struct work_struct *work) | 337 | static void bl_write_cleanup(struct work_struct *work) |
423 | { | 338 | { |
424 | struct rpc_task *task; | 339 | struct rpc_task *task = container_of(work, struct rpc_task, u.tk_work); |
425 | struct nfs_pgio_header *hdr; | 340 | struct nfs_pgio_header *hdr = |
341 | container_of(task, struct nfs_pgio_header, task); | ||
342 | |||
426 | dprintk("%s enter\n", __func__); | 343 | dprintk("%s enter\n", __func__); |
427 | task = container_of(work, struct rpc_task, u.tk_work); | 344 | |
428 | hdr = container_of(task, struct nfs_pgio_header, task); | ||
429 | if (likely(!hdr->pnfs_error)) { | 345 | if (likely(!hdr->pnfs_error)) { |
430 | /* Marks for LAYOUTCOMMIT */ | 346 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(hdr->lseg); |
431 | mark_extents_written(BLK_LSEG2EXT(hdr->lseg), | 347 | u64 start = hdr->args.offset & (loff_t)PAGE_CACHE_MASK; |
432 | hdr->args.offset, hdr->args.count); | 348 | u64 end = (hdr->args.offset + hdr->args.count + |
349 | PAGE_CACHE_SIZE - 1) & (loff_t)PAGE_CACHE_MASK; | ||
350 | |||
351 | ext_tree_mark_written(bl, start >> SECTOR_SHIFT, | ||
352 | (end - start) >> SECTOR_SHIFT); | ||
433 | } | 353 | } |
354 | |||
434 | pnfs_ld_write_done(hdr); | 355 | pnfs_ld_write_done(hdr); |
435 | } | 356 | } |
436 | 357 | ||
437 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ | 358 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ |
438 | static void bl_end_par_io_write(void *data, int num_se) | 359 | static void bl_end_par_io_write(void *data) |
439 | { | 360 | { |
440 | struct nfs_pgio_header *hdr = data; | 361 | struct nfs_pgio_header *hdr = data; |
441 | 362 | ||
442 | if (unlikely(hdr->pnfs_error)) { | ||
443 | bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval, | ||
444 | num_se); | ||
445 | } | ||
446 | |||
447 | hdr->task.tk_status = hdr->pnfs_error; | 363 | hdr->task.tk_status = hdr->pnfs_error; |
448 | hdr->verf.committed = NFS_FILE_SYNC; | 364 | hdr->verf.committed = NFS_FILE_SYNC; |
449 | INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup); | 365 | INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup); |
450 | schedule_work(&hdr->task.u.tk_work); | 366 | schedule_work(&hdr->task.u.tk_work); |
451 | } | 367 | } |
452 | 368 | ||
453 | /* FIXME STUB - mark intersection of layout and page as bad, so is not | ||
454 | * used again. | ||
455 | */ | ||
456 | static void mark_bad_read(void) | ||
457 | { | ||
458 | return; | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * map_block: map a requested I/0 block (isect) into an offset in the LVM | ||
463 | * block_device | ||
464 | */ | ||
465 | static void | ||
466 | map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) | ||
467 | { | ||
468 | dprintk("%s enter be=%p\n", __func__, be); | ||
469 | |||
470 | set_buffer_mapped(bh); | ||
471 | bh->b_bdev = be->be_mdev; | ||
472 | bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >> | ||
473 | (be->be_mdev->bd_inode->i_blkbits - SECTOR_SHIFT); | ||
474 | |||
475 | dprintk("%s isect %llu, bh->b_blocknr %ld, using bsize %Zd\n", | ||
476 | __func__, (unsigned long long)isect, (long)bh->b_blocknr, | ||
477 | bh->b_size); | ||
478 | return; | ||
479 | } | ||
480 | |||
481 | static void | ||
482 | bl_read_single_end_io(struct bio *bio, int error) | ||
483 | { | ||
484 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
485 | struct page *page = bvec->bv_page; | ||
486 | |||
487 | /* Only one page in bvec */ | ||
488 | unlock_page(page); | ||
489 | } | ||
490 | |||
491 | static int | ||
492 | bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, | ||
493 | unsigned int offset, unsigned int len) | ||
494 | { | ||
495 | struct bio *bio; | ||
496 | struct page *shadow_page; | ||
497 | sector_t isect; | ||
498 | char *kaddr, *kshadow_addr; | ||
499 | int ret = 0; | ||
500 | |||
501 | dprintk("%s: offset %u len %u\n", __func__, offset, len); | ||
502 | |||
503 | shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
504 | if (shadow_page == NULL) | ||
505 | return -ENOMEM; | ||
506 | |||
507 | bio = bio_alloc(GFP_NOIO, 1); | ||
508 | if (bio == NULL) | ||
509 | return -ENOMEM; | ||
510 | |||
511 | isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + | ||
512 | (offset / SECTOR_SIZE); | ||
513 | |||
514 | bio->bi_iter.bi_sector = isect - be->be_f_offset + be->be_v_offset; | ||
515 | bio->bi_bdev = be->be_mdev; | ||
516 | bio->bi_end_io = bl_read_single_end_io; | ||
517 | |||
518 | lock_page(shadow_page); | ||
519 | if (bio_add_page(bio, shadow_page, | ||
520 | SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) { | ||
521 | unlock_page(shadow_page); | ||
522 | bio_put(bio); | ||
523 | return -EIO; | ||
524 | } | ||
525 | |||
526 | submit_bio(READ, bio); | ||
527 | wait_on_page_locked(shadow_page); | ||
528 | if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { | ||
529 | ret = -EIO; | ||
530 | } else { | ||
531 | kaddr = kmap_atomic(page); | ||
532 | kshadow_addr = kmap_atomic(shadow_page); | ||
533 | memcpy(kaddr + offset, kshadow_addr + offset, len); | ||
534 | kunmap_atomic(kshadow_addr); | ||
535 | kunmap_atomic(kaddr); | ||
536 | } | ||
537 | __free_page(shadow_page); | ||
538 | bio_put(bio); | ||
539 | |||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | static int | ||
544 | bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be, | ||
545 | unsigned int dirty_offset, unsigned int dirty_len, | ||
546 | bool full_page) | ||
547 | { | ||
548 | int ret = 0; | ||
549 | unsigned int start, end; | ||
550 | |||
551 | if (full_page) { | ||
552 | start = 0; | ||
553 | end = PAGE_CACHE_SIZE; | ||
554 | } else { | ||
555 | start = round_down(dirty_offset, SECTOR_SIZE); | ||
556 | end = round_up(dirty_offset + dirty_len, SECTOR_SIZE); | ||
557 | } | ||
558 | |||
559 | dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len); | ||
560 | if (!be) { | ||
561 | zero_user_segments(page, start, dirty_offset, | ||
562 | dirty_offset + dirty_len, end); | ||
563 | if (start == 0 && end == PAGE_CACHE_SIZE && | ||
564 | trylock_page(page)) { | ||
565 | SetPageUptodate(page); | ||
566 | unlock_page(page); | ||
567 | } | ||
568 | return ret; | ||
569 | } | ||
570 | |||
571 | if (start != dirty_offset) | ||
572 | ret = bl_do_readpage_sync(page, be, start, dirty_offset - start); | ||
573 | |||
574 | if (!ret && (dirty_offset + dirty_len < end)) | ||
575 | ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len, | ||
576 | end - dirty_offset - dirty_len); | ||
577 | |||
578 | return ret; | ||
579 | } | ||
580 | |||
581 | /* Given an unmapped page, zero it or read in page for COW, page is locked | ||
582 | * by caller. | ||
583 | */ | ||
584 | static int | ||
585 | init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) | ||
586 | { | ||
587 | struct buffer_head *bh = NULL; | ||
588 | int ret = 0; | ||
589 | sector_t isect; | ||
590 | |||
591 | dprintk("%s enter, %p\n", __func__, page); | ||
592 | BUG_ON(PageUptodate(page)); | ||
593 | if (!cow_read) { | ||
594 | zero_user_segment(page, 0, PAGE_SIZE); | ||
595 | SetPageUptodate(page); | ||
596 | goto cleanup; | ||
597 | } | ||
598 | |||
599 | bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0); | ||
600 | if (!bh) { | ||
601 | ret = -ENOMEM; | ||
602 | goto cleanup; | ||
603 | } | ||
604 | |||
605 | isect = (sector_t) page->index << PAGE_CACHE_SECTOR_SHIFT; | ||
606 | map_block(bh, isect, cow_read); | ||
607 | if (!bh_uptodate_or_lock(bh)) | ||
608 | ret = bh_submit_read(bh); | ||
609 | if (ret) | ||
610 | goto cleanup; | ||
611 | SetPageUptodate(page); | ||
612 | |||
613 | cleanup: | ||
614 | if (bh) | ||
615 | free_buffer_head(bh); | ||
616 | if (ret) { | ||
617 | /* Need to mark layout with bad read...should now | ||
618 | * just use nfs4 for reads and writes. | ||
619 | */ | ||
620 | mark_bad_read(); | ||
621 | } | ||
622 | return ret; | ||
623 | } | ||
624 | |||
625 | /* Find or create a zeroing page marked being writeback. | ||
626 | * Return ERR_PTR on error, NULL to indicate skip this page and page itself | ||
627 | * to indicate write out. | ||
628 | */ | ||
629 | static struct page * | ||
630 | bl_find_get_zeroing_page(struct inode *inode, pgoff_t index, | ||
631 | struct pnfs_block_extent *cow_read) | ||
632 | { | ||
633 | struct page *page; | ||
634 | int locked = 0; | ||
635 | page = find_get_page(inode->i_mapping, index); | ||
636 | if (page) | ||
637 | goto check_page; | ||
638 | |||
639 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | ||
640 | if (unlikely(!page)) { | ||
641 | dprintk("%s oom\n", __func__); | ||
642 | return ERR_PTR(-ENOMEM); | ||
643 | } | ||
644 | locked = 1; | ||
645 | |||
646 | check_page: | ||
647 | /* PageDirty: Other will write this out | ||
648 | * PageWriteback: Other is writing this out | ||
649 | * PageUptodate: It was read before | ||
650 | */ | ||
651 | if (PageDirty(page) || PageWriteback(page)) { | ||
652 | print_page(page); | ||
653 | if (locked) | ||
654 | unlock_page(page); | ||
655 | page_cache_release(page); | ||
656 | return NULL; | ||
657 | } | ||
658 | |||
659 | if (!locked) { | ||
660 | lock_page(page); | ||
661 | locked = 1; | ||
662 | goto check_page; | ||
663 | } | ||
664 | if (!PageUptodate(page)) { | ||
665 | /* New page, readin or zero it */ | ||
666 | init_page_for_write(page, cow_read); | ||
667 | } | ||
668 | set_page_writeback(page); | ||
669 | unlock_page(page); | ||
670 | |||
671 | return page; | ||
672 | } | ||
673 | |||
674 | static enum pnfs_try_status | 369 | static enum pnfs_try_status |
675 | bl_write_pagelist(struct nfs_pgio_header *header, int sync) | 370 | bl_write_pagelist(struct nfs_pgio_header *header, int sync) |
676 | { | 371 | { |
677 | int i, ret, npg_zero, pg_index, last = 0; | 372 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); |
373 | struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; | ||
678 | struct bio *bio = NULL; | 374 | struct bio *bio = NULL; |
679 | struct pnfs_block_extent *be = NULL, *cow_read = NULL; | 375 | struct pnfs_block_extent be; |
680 | sector_t isect, last_isect = 0, extent_length = 0; | 376 | sector_t isect, extent_length = 0; |
681 | struct parallel_io *par = NULL; | 377 | struct parallel_io *par = NULL; |
682 | loff_t offset = header->args.offset; | 378 | loff_t offset = header->args.offset; |
683 | size_t count = header->args.count; | 379 | size_t count = header->args.count; |
684 | unsigned int pg_offset, pg_len, saved_len; | ||
685 | struct page **pages = header->args.pages; | 380 | struct page **pages = header->args.pages; |
686 | struct page *page; | 381 | int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; |
687 | pgoff_t index; | 382 | unsigned int pg_len; |
688 | u64 temp; | 383 | struct blk_plug plug; |
689 | int npg_per_block = | 384 | int i; |
690 | NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; | ||
691 | 385 | ||
692 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); | 386 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); |
693 | 387 | ||
694 | if (header->dreq != NULL && | ||
695 | (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) || | ||
696 | !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) { | ||
697 | dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); | ||
698 | goto out_mds; | ||
699 | } | ||
700 | /* At this point, header->page_aray is a (sequential) list of nfs_pages. | 388 | /* At this point, header->page_aray is a (sequential) list of nfs_pages. |
701 | * We want to write each, and if there is an error set pnfs_error | 389 | * We want to write each, and if there is an error set pnfs_error |
702 | * to have it redone using nfs. | 390 | * to have it redone using nfs. |
703 | */ | 391 | */ |
704 | par = alloc_parallel(header); | 392 | par = alloc_parallel(header); |
705 | if (!par) | 393 | if (!par) |
706 | goto out_mds; | 394 | return PNFS_NOT_ATTEMPTED; |
707 | par->pnfs_callback = bl_end_par_io_write; | 395 | par->pnfs_callback = bl_end_par_io_write; |
708 | /* At this point, have to be more careful with error handling */ | ||
709 | 396 | ||
710 | isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); | 397 | blk_start_plug(&plug); |
711 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read); | ||
712 | if (!be || !is_writable(be, isect)) { | ||
713 | dprintk("%s no matching extents!\n", __func__); | ||
714 | goto out_mds; | ||
715 | } | ||
716 | 398 | ||
717 | /* First page inside INVALID extent */ | 399 | /* we always write out the whole page */ |
718 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | 400 | offset = offset & (loff_t)PAGE_CACHE_MASK; |
719 | if (likely(!bl_push_one_short_extent(be->be_inval))) | 401 | isect = offset >> SECTOR_SHIFT; |
720 | par->bse_count++; | ||
721 | else | ||
722 | goto out_mds; | ||
723 | temp = offset >> PAGE_CACHE_SHIFT; | ||
724 | npg_zero = do_div(temp, npg_per_block); | ||
725 | isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & | ||
726 | (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); | ||
727 | extent_length = be->be_length - (isect - be->be_f_offset); | ||
728 | |||
729 | fill_invalid_ext: | ||
730 | dprintk("%s need to zero %d pages\n", __func__, npg_zero); | ||
731 | for (;npg_zero > 0; npg_zero--) { | ||
732 | if (bl_is_sector_init(be->be_inval, isect)) { | ||
733 | dprintk("isect %llu already init\n", | ||
734 | (unsigned long long)isect); | ||
735 | goto next_page; | ||
736 | } | ||
737 | /* page ref released in bl_end_io_write_zero */ | ||
738 | index = isect >> PAGE_CACHE_SECTOR_SHIFT; | ||
739 | dprintk("%s zero %dth page: index %lu isect %llu\n", | ||
740 | __func__, npg_zero, index, | ||
741 | (unsigned long long)isect); | ||
742 | page = bl_find_get_zeroing_page(header->inode, index, | ||
743 | cow_read); | ||
744 | if (unlikely(IS_ERR(page))) { | ||
745 | header->pnfs_error = PTR_ERR(page); | ||
746 | goto out; | ||
747 | } else if (page == NULL) | ||
748 | goto next_page; | ||
749 | |||
750 | ret = bl_mark_sectors_init(be->be_inval, isect, | ||
751 | PAGE_CACHE_SECTORS); | ||
752 | if (unlikely(ret)) { | ||
753 | dprintk("%s bl_mark_sectors_init fail %d\n", | ||
754 | __func__, ret); | ||
755 | end_page_writeback(page); | ||
756 | page_cache_release(page); | ||
757 | header->pnfs_error = ret; | ||
758 | goto out; | ||
759 | } | ||
760 | if (likely(!bl_push_one_short_extent(be->be_inval))) | ||
761 | par->bse_count++; | ||
762 | else { | ||
763 | end_page_writeback(page); | ||
764 | page_cache_release(page); | ||
765 | header->pnfs_error = -ENOMEM; | ||
766 | goto out; | ||
767 | } | ||
768 | /* FIXME: This should be done in bi_end_io */ | ||
769 | mark_extents_written(BLK_LSEG2EXT(header->lseg), | ||
770 | page->index << PAGE_CACHE_SHIFT, | ||
771 | PAGE_CACHE_SIZE); | ||
772 | |||
773 | bio = bl_add_page_to_bio(bio, npg_zero, WRITE, | ||
774 | isect, page, be, | ||
775 | bl_end_io_write_zero, par); | ||
776 | if (IS_ERR(bio)) { | ||
777 | header->pnfs_error = PTR_ERR(bio); | ||
778 | bio = NULL; | ||
779 | goto out; | ||
780 | } | ||
781 | next_page: | ||
782 | isect += PAGE_CACHE_SECTORS; | ||
783 | extent_length -= PAGE_CACHE_SECTORS; | ||
784 | } | ||
785 | if (last) | ||
786 | goto write_done; | ||
787 | } | ||
788 | bio = bl_submit_bio(WRITE, bio); | ||
789 | 402 | ||
790 | /* Middle pages */ | ||
791 | pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; | ||
792 | for (i = pg_index; i < header->page_array.npages; i++) { | 403 | for (i = pg_index; i < header->page_array.npages; i++) { |
793 | if (!extent_length) { | 404 | if (extent_length <= 0) { |
794 | /* We've used up the previous extent */ | 405 | /* We've used up the previous extent */ |
795 | bl_put_extent(be); | ||
796 | bl_put_extent(cow_read); | ||
797 | bio = bl_submit_bio(WRITE, bio); | 406 | bio = bl_submit_bio(WRITE, bio); |
798 | /* Get the next one */ | 407 | /* Get the next one */ |
799 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), | 408 | if (!ext_tree_lookup(bl, isect, &be, true)) { |
800 | isect, &cow_read); | ||
801 | if (!be || !is_writable(be, isect)) { | ||
802 | header->pnfs_error = -EINVAL; | 409 | header->pnfs_error = -EINVAL; |
803 | goto out; | 410 | goto out; |
804 | } | 411 | } |
805 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
806 | if (likely(!bl_push_one_short_extent( | ||
807 | be->be_inval))) | ||
808 | par->bse_count++; | ||
809 | else { | ||
810 | header->pnfs_error = -ENOMEM; | ||
811 | goto out; | ||
812 | } | ||
813 | } | ||
814 | extent_length = be->be_length - | ||
815 | (isect - be->be_f_offset); | ||
816 | } | ||
817 | |||
818 | dprintk("%s offset %lld count %Zu\n", __func__, offset, count); | ||
819 | pg_offset = offset & ~PAGE_CACHE_MASK; | ||
820 | if (pg_offset + count > PAGE_CACHE_SIZE) | ||
821 | pg_len = PAGE_CACHE_SIZE - pg_offset; | ||
822 | else | ||
823 | pg_len = count; | ||
824 | |||
825 | saved_len = pg_len; | ||
826 | if (be->be_state == PNFS_BLOCK_INVALID_DATA && | ||
827 | !bl_is_sector_init(be->be_inval, isect)) { | ||
828 | ret = bl_read_partial_page_sync(pages[i], cow_read, | ||
829 | pg_offset, pg_len, true); | ||
830 | if (ret) { | ||
831 | dprintk("%s bl_read_partial_page_sync fail %d\n", | ||
832 | __func__, ret); | ||
833 | header->pnfs_error = ret; | ||
834 | goto out; | ||
835 | } | ||
836 | |||
837 | ret = bl_mark_sectors_init(be->be_inval, isect, | ||
838 | PAGE_CACHE_SECTORS); | ||
839 | if (unlikely(ret)) { | ||
840 | dprintk("%s bl_mark_sectors_init fail %d\n", | ||
841 | __func__, ret); | ||
842 | header->pnfs_error = ret; | ||
843 | goto out; | ||
844 | } | ||
845 | 412 | ||
846 | /* Expand to full page write */ | 413 | extent_length = be.be_length - (isect - be.be_f_offset); |
847 | pg_offset = 0; | ||
848 | pg_len = PAGE_CACHE_SIZE; | ||
849 | } else if ((pg_offset & (SECTOR_SIZE - 1)) || | ||
850 | (pg_len & (SECTOR_SIZE - 1))){ | ||
851 | /* ahh, nasty case. We have to do sync full sector | ||
852 | * read-modify-write cycles. | ||
853 | */ | ||
854 | unsigned int saved_offset = pg_offset; | ||
855 | ret = bl_read_partial_page_sync(pages[i], be, pg_offset, | ||
856 | pg_len, false); | ||
857 | pg_offset = round_down(pg_offset, SECTOR_SIZE); | ||
858 | pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE) | ||
859 | - pg_offset; | ||
860 | } | 414 | } |
861 | 415 | ||
862 | 416 | pg_len = PAGE_CACHE_SIZE; | |
863 | bio = do_add_page_to_bio(bio, header->page_array.npages - i, | 417 | bio = do_add_page_to_bio(bio, header->page_array.npages - i, |
864 | WRITE, | 418 | WRITE, isect, pages[i], &map, &be, |
865 | isect, pages[i], be, | ||
866 | bl_end_io_write, par, | 419 | bl_end_io_write, par, |
867 | pg_offset, pg_len); | 420 | 0, &pg_len); |
868 | if (IS_ERR(bio)) { | 421 | if (IS_ERR(bio)) { |
869 | header->pnfs_error = PTR_ERR(bio); | 422 | header->pnfs_error = PTR_ERR(bio); |
870 | bio = NULL; | 423 | bio = NULL; |
871 | goto out; | 424 | goto out; |
872 | } | 425 | } |
873 | offset += saved_len; | ||
874 | count -= saved_len; | ||
875 | isect += PAGE_CACHE_SECTORS; | ||
876 | last_isect = isect; | ||
877 | extent_length -= PAGE_CACHE_SECTORS; | ||
878 | } | ||
879 | 426 | ||
880 | /* Last page inside INVALID extent */ | 427 | offset += pg_len; |
881 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | 428 | count -= pg_len; |
882 | bio = bl_submit_bio(WRITE, bio); | 429 | isect += (pg_len >> SECTOR_SHIFT); |
883 | temp = last_isect >> PAGE_CACHE_SECTOR_SHIFT; | 430 | extent_length -= (pg_len >> SECTOR_SHIFT); |
884 | npg_zero = npg_per_block - do_div(temp, npg_per_block); | ||
885 | if (npg_zero < npg_per_block) { | ||
886 | last = 1; | ||
887 | goto fill_invalid_ext; | ||
888 | } | ||
889 | } | 431 | } |
890 | 432 | ||
891 | write_done: | ||
892 | header->res.count = header->args.count; | 433 | header->res.count = header->args.count; |
893 | out: | 434 | out: |
894 | bl_put_extent(be); | ||
895 | bl_put_extent(cow_read); | ||
896 | bl_submit_bio(WRITE, bio); | 435 | bl_submit_bio(WRITE, bio); |
436 | blk_finish_plug(&plug); | ||
897 | put_parallel(par); | 437 | put_parallel(par); |
898 | return PNFS_ATTEMPTED; | 438 | return PNFS_ATTEMPTED; |
899 | out_mds: | ||
900 | bl_put_extent(be); | ||
901 | bl_put_extent(cow_read); | ||
902 | kfree(par); | ||
903 | return PNFS_NOT_ATTEMPTED; | ||
904 | } | ||
905 | |||
906 | /* FIXME - range ignored */ | ||
907 | static void | ||
908 | release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range) | ||
909 | { | ||
910 | int i; | ||
911 | struct pnfs_block_extent *be; | ||
912 | |||
913 | spin_lock(&bl->bl_ext_lock); | ||
914 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
915 | while (!list_empty(&bl->bl_extents[i])) { | ||
916 | be = list_first_entry(&bl->bl_extents[i], | ||
917 | struct pnfs_block_extent, | ||
918 | be_node); | ||
919 | list_del(&be->be_node); | ||
920 | bl_put_extent(be); | ||
921 | } | ||
922 | } | ||
923 | spin_unlock(&bl->bl_ext_lock); | ||
924 | } | ||
925 | |||
926 | static void | ||
927 | release_inval_marks(struct pnfs_inval_markings *marks) | ||
928 | { | ||
929 | struct pnfs_inval_tracking *pos, *temp; | ||
930 | struct pnfs_block_short_extent *se, *stemp; | ||
931 | |||
932 | list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { | ||
933 | list_del(&pos->it_link); | ||
934 | kfree(pos); | ||
935 | } | ||
936 | |||
937 | list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) { | ||
938 | list_del(&se->bse_node); | ||
939 | kfree(se); | ||
940 | } | ||
941 | return; | ||
942 | } | 439 | } |
943 | 440 | ||
944 | static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo) | 441 | static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo) |
945 | { | 442 | { |
946 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | 443 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); |
444 | int err; | ||
947 | 445 | ||
948 | dprintk("%s enter\n", __func__); | 446 | dprintk("%s enter\n", __func__); |
949 | release_extents(bl, NULL); | 447 | |
950 | release_inval_marks(&bl->bl_inval); | 448 | err = ext_tree_remove(bl, true, 0, LLONG_MAX); |
449 | WARN_ON(err); | ||
450 | |||
951 | kfree(bl); | 451 | kfree(bl); |
952 | } | 452 | } |
953 | 453 | ||
@@ -960,14 +460,11 @@ static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, | |||
960 | bl = kzalloc(sizeof(*bl), gfp_flags); | 460 | bl = kzalloc(sizeof(*bl), gfp_flags); |
961 | if (!bl) | 461 | if (!bl) |
962 | return NULL; | 462 | return NULL; |
463 | |||
464 | bl->bl_ext_rw = RB_ROOT; | ||
465 | bl->bl_ext_ro = RB_ROOT; | ||
963 | spin_lock_init(&bl->bl_ext_lock); | 466 | spin_lock_init(&bl->bl_ext_lock); |
964 | INIT_LIST_HEAD(&bl->bl_extents[0]); | 467 | |
965 | INIT_LIST_HEAD(&bl->bl_extents[1]); | ||
966 | INIT_LIST_HEAD(&bl->bl_commit); | ||
967 | INIT_LIST_HEAD(&bl->bl_committing); | ||
968 | bl->bl_count = 0; | ||
969 | bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> SECTOR_SHIFT; | ||
970 | BL_INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize); | ||
971 | return &bl->bl_layout; | 468 | return &bl->bl_layout; |
972 | } | 469 | } |
973 | 470 | ||
@@ -977,215 +474,318 @@ static void bl_free_lseg(struct pnfs_layout_segment *lseg) | |||
977 | kfree(lseg); | 474 | kfree(lseg); |
978 | } | 475 | } |
979 | 476 | ||
980 | /* We pretty much ignore lseg, and store all data layout wide, so we | 477 | /* Tracks info needed to ensure extents in layout obey constraints of spec */ |
981 | * can correctly merge. | 478 | struct layout_verification { |
982 | */ | 479 | u32 mode; /* R or RW */ |
983 | static struct pnfs_layout_segment *bl_alloc_lseg(struct pnfs_layout_hdr *lo, | 480 | u64 start; /* Expected start of next non-COW extent */ |
984 | struct nfs4_layoutget_res *lgr, | 481 | u64 inval; /* Start of INVAL coverage */ |
985 | gfp_t gfp_flags) | 482 | u64 cowread; /* End of COW read coverage */ |
986 | { | 483 | }; |
987 | struct pnfs_layout_segment *lseg; | ||
988 | int status; | ||
989 | 484 | ||
990 | dprintk("%s enter\n", __func__); | 485 | /* Verify the extent meets the layout requirements of the pnfs-block draft, |
991 | lseg = kzalloc(sizeof(*lseg), gfp_flags); | 486 | * section 2.3.1. |
992 | if (!lseg) | 487 | */ |
993 | return ERR_PTR(-ENOMEM); | 488 | static int verify_extent(struct pnfs_block_extent *be, |
994 | status = nfs4_blk_process_layoutget(lo, lgr, gfp_flags); | 489 | struct layout_verification *lv) |
995 | if (status) { | 490 | { |
996 | /* We don't want to call the full-blown bl_free_lseg, | 491 | if (lv->mode == IOMODE_READ) { |
997 | * since on error extents were not touched. | 492 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA || |
998 | */ | 493 | be->be_state == PNFS_BLOCK_INVALID_DATA) |
999 | kfree(lseg); | 494 | return -EIO; |
1000 | return ERR_PTR(status); | 495 | if (be->be_f_offset != lv->start) |
496 | return -EIO; | ||
497 | lv->start += be->be_length; | ||
498 | return 0; | ||
1001 | } | 499 | } |
1002 | return lseg; | 500 | /* lv->mode == IOMODE_RW */ |
501 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { | ||
502 | if (be->be_f_offset != lv->start) | ||
503 | return -EIO; | ||
504 | if (lv->cowread > lv->start) | ||
505 | return -EIO; | ||
506 | lv->start += be->be_length; | ||
507 | lv->inval = lv->start; | ||
508 | return 0; | ||
509 | } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
510 | if (be->be_f_offset != lv->start) | ||
511 | return -EIO; | ||
512 | lv->start += be->be_length; | ||
513 | return 0; | ||
514 | } else if (be->be_state == PNFS_BLOCK_READ_DATA) { | ||
515 | if (be->be_f_offset > lv->start) | ||
516 | return -EIO; | ||
517 | if (be->be_f_offset < lv->inval) | ||
518 | return -EIO; | ||
519 | if (be->be_f_offset < lv->cowread) | ||
520 | return -EIO; | ||
521 | /* It looks like you might want to min this with lv->start, | ||
522 | * but you really don't. | ||
523 | */ | ||
524 | lv->inval = lv->inval + be->be_length; | ||
525 | lv->cowread = be->be_f_offset + be->be_length; | ||
526 | return 0; | ||
527 | } else | ||
528 | return -EIO; | ||
1003 | } | 529 | } |
1004 | 530 | ||
1005 | static void | 531 | static int decode_sector_number(__be32 **rp, sector_t *sp) |
1006 | bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, | ||
1007 | const struct nfs4_layoutcommit_args *arg) | ||
1008 | { | 532 | { |
1009 | dprintk("%s enter\n", __func__); | 533 | uint64_t s; |
1010 | encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg); | 534 | |
535 | *rp = xdr_decode_hyper(*rp, &s); | ||
536 | if (s & 0x1ff) { | ||
537 | printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__); | ||
538 | return -1; | ||
539 | } | ||
540 | *sp = s >> SECTOR_SHIFT; | ||
541 | return 0; | ||
1011 | } | 542 | } |
1012 | 543 | ||
1013 | static void | 544 | static int |
1014 | bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) | 545 | bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo, |
546 | struct layout_verification *lv, struct list_head *extents, | ||
547 | gfp_t gfp_mask) | ||
1015 | { | 548 | { |
1016 | struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout; | 549 | struct pnfs_block_extent *be; |
550 | struct nfs4_deviceid id; | ||
551 | int error; | ||
552 | __be32 *p; | ||
1017 | 553 | ||
1018 | dprintk("%s enter\n", __func__); | 554 | p = xdr_inline_decode(xdr, 28 + NFS4_DEVICEID4_SIZE); |
1019 | clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status); | 555 | if (!p) |
1020 | } | 556 | return -EIO; |
1021 | 557 | ||
1022 | static void free_blk_mountid(struct block_mount_id *mid) | 558 | be = kzalloc(sizeof(*be), GFP_NOFS); |
1023 | { | 559 | if (!be) |
1024 | if (mid) { | 560 | return -ENOMEM; |
1025 | struct pnfs_block_dev *dev, *tmp; | ||
1026 | 561 | ||
1027 | /* No need to take bm_lock as we are last user freeing bm_devlist */ | 562 | memcpy(&id, p, NFS4_DEVICEID4_SIZE); |
1028 | list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) { | 563 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); |
1029 | list_del(&dev->bm_node); | 564 | |
1030 | bl_free_block_dev(dev); | 565 | error = -EIO; |
1031 | } | 566 | be->be_device = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id, |
1032 | kfree(mid); | 567 | lo->plh_lc_cred, gfp_mask); |
568 | if (!be->be_device) | ||
569 | goto out_free_be; | ||
570 | |||
571 | /* | ||
572 | * The next three values are read in as bytes, but stored in the | ||
573 | * extent structure in 512-byte granularity. | ||
574 | */ | ||
575 | if (decode_sector_number(&p, &be->be_f_offset) < 0) | ||
576 | goto out_put_deviceid; | ||
577 | if (decode_sector_number(&p, &be->be_length) < 0) | ||
578 | goto out_put_deviceid; | ||
579 | if (decode_sector_number(&p, &be->be_v_offset) < 0) | ||
580 | goto out_put_deviceid; | ||
581 | be->be_state = be32_to_cpup(p++); | ||
582 | |||
583 | error = verify_extent(be, lv); | ||
584 | if (error) { | ||
585 | dprintk("%s: extent verification failed\n", __func__); | ||
586 | goto out_put_deviceid; | ||
1033 | } | 587 | } |
588 | |||
589 | list_add_tail(&be->be_list, extents); | ||
590 | return 0; | ||
591 | |||
592 | out_put_deviceid: | ||
593 | nfs4_put_deviceid_node(be->be_device); | ||
594 | out_free_be: | ||
595 | kfree(be); | ||
596 | return error; | ||
1034 | } | 597 | } |
1035 | 598 | ||
1036 | /* This is mostly copied from the filelayout_get_device_info function. | 599 | static struct pnfs_layout_segment * |
1037 | * It seems much of this should be at the generic pnfs level. | 600 | bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, |
1038 | */ | 601 | gfp_t gfp_mask) |
1039 | static struct pnfs_block_dev * | ||
1040 | nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, | ||
1041 | struct nfs4_deviceid *d_id) | ||
1042 | { | 602 | { |
1043 | struct pnfs_device *dev; | 603 | struct layout_verification lv = { |
1044 | struct pnfs_block_dev *rv; | 604 | .mode = lgr->range.iomode, |
1045 | u32 max_resp_sz; | 605 | .start = lgr->range.offset >> SECTOR_SHIFT, |
1046 | int max_pages; | 606 | .inval = lgr->range.offset >> SECTOR_SHIFT, |
1047 | struct page **pages = NULL; | 607 | .cowread = lgr->range.offset >> SECTOR_SHIFT, |
1048 | int i, rc; | 608 | }; |
609 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | ||
610 | struct pnfs_layout_segment *lseg; | ||
611 | struct xdr_buf buf; | ||
612 | struct xdr_stream xdr; | ||
613 | struct page *scratch; | ||
614 | int status, i; | ||
615 | uint32_t count; | ||
616 | __be32 *p; | ||
617 | LIST_HEAD(extents); | ||
618 | |||
619 | dprintk("---> %s\n", __func__); | ||
620 | |||
621 | lseg = kzalloc(sizeof(*lseg), gfp_mask); | ||
622 | if (!lseg) | ||
623 | return ERR_PTR(-ENOMEM); | ||
624 | |||
625 | status = -ENOMEM; | ||
626 | scratch = alloc_page(gfp_mask); | ||
627 | if (!scratch) | ||
628 | goto out; | ||
629 | |||
630 | xdr_init_decode_pages(&xdr, &buf, | ||
631 | lgr->layoutp->pages, lgr->layoutp->len); | ||
632 | xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); | ||
633 | |||
634 | status = -EIO; | ||
635 | p = xdr_inline_decode(&xdr, 4); | ||
636 | if (unlikely(!p)) | ||
637 | goto out_free_scratch; | ||
638 | |||
639 | count = be32_to_cpup(p++); | ||
640 | dprintk("%s: number of extents %d\n", __func__, count); | ||
1049 | 641 | ||
1050 | /* | 642 | /* |
1051 | * Use the session max response size as the basis for setting | 643 | * Decode individual extents, putting them in temporary staging area |
1052 | * GETDEVICEINFO's maxcount | 644 | * until whole layout is decoded to make error recovery easier. |
1053 | */ | 645 | */ |
1054 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | 646 | for (i = 0; i < count; i++) { |
1055 | max_pages = nfs_page_array_len(0, max_resp_sz); | 647 | status = bl_alloc_extent(&xdr, lo, &lv, &extents, gfp_mask); |
1056 | dprintk("%s max_resp_sz %u max_pages %d\n", | 648 | if (status) |
1057 | __func__, max_resp_sz, max_pages); | 649 | goto process_extents; |
1058 | |||
1059 | dev = kmalloc(sizeof(*dev), GFP_NOFS); | ||
1060 | if (!dev) { | ||
1061 | dprintk("%s kmalloc failed\n", __func__); | ||
1062 | return ERR_PTR(-ENOMEM); | ||
1063 | } | 650 | } |
1064 | 651 | ||
1065 | pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS); | 652 | if (lgr->range.offset + lgr->range.length != |
1066 | if (pages == NULL) { | 653 | lv.start << SECTOR_SHIFT) { |
1067 | kfree(dev); | 654 | dprintk("%s Final length mismatch\n", __func__); |
1068 | return ERR_PTR(-ENOMEM); | 655 | status = -EIO; |
656 | goto process_extents; | ||
1069 | } | 657 | } |
1070 | for (i = 0; i < max_pages; i++) { | 658 | |
1071 | pages[i] = alloc_page(GFP_NOFS); | 659 | if (lv.start < lv.cowread) { |
1072 | if (!pages[i]) { | 660 | dprintk("%s Final uncovered COW extent\n", __func__); |
1073 | rv = ERR_PTR(-ENOMEM); | 661 | status = -EIO; |
1074 | goto out_free; | ||
1075 | } | ||
1076 | } | 662 | } |
1077 | 663 | ||
1078 | memcpy(&dev->dev_id, d_id, sizeof(*d_id)); | 664 | process_extents: |
1079 | dev->layout_type = LAYOUT_BLOCK_VOLUME; | 665 | while (!list_empty(&extents)) { |
1080 | dev->pages = pages; | 666 | struct pnfs_block_extent *be = |
1081 | dev->pgbase = 0; | 667 | list_first_entry(&extents, struct pnfs_block_extent, |
1082 | dev->pglen = PAGE_SIZE * max_pages; | 668 | be_list); |
1083 | dev->mincount = 0; | 669 | list_del(&be->be_list); |
1084 | dev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | 670 | |
1085 | 671 | if (!status) | |
1086 | dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); | 672 | status = ext_tree_insert(bl, be); |
1087 | rc = nfs4_proc_getdeviceinfo(server, dev, NULL); | 673 | |
1088 | dprintk("%s getdevice info returns %d\n", __func__, rc); | 674 | if (status) { |
1089 | if (rc) { | 675 | nfs4_put_deviceid_node(be->be_device); |
1090 | rv = ERR_PTR(rc); | 676 | kfree(be); |
1091 | goto out_free; | 677 | } |
1092 | } | 678 | } |
1093 | 679 | ||
1094 | rv = nfs4_blk_decode_device(server, dev); | 680 | out_free_scratch: |
1095 | out_free: | 681 | __free_page(scratch); |
1096 | for (i = 0; i < max_pages; i++) | 682 | out: |
1097 | __free_page(pages[i]); | 683 | dprintk("%s returns %d\n", __func__, status); |
1098 | kfree(pages); | 684 | if (status) { |
1099 | kfree(dev); | 685 | kfree(lseg); |
1100 | return rv; | 686 | return ERR_PTR(status); |
687 | } | ||
688 | return lseg; | ||
1101 | } | 689 | } |
1102 | 690 | ||
1103 | static int | 691 | static void |
1104 | bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) | 692 | bl_return_range(struct pnfs_layout_hdr *lo, |
693 | struct pnfs_layout_range *range) | ||
1105 | { | 694 | { |
1106 | struct block_mount_id *b_mt_id = NULL; | 695 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); |
1107 | struct pnfs_devicelist *dlist = NULL; | 696 | sector_t offset = range->offset >> SECTOR_SHIFT, end; |
1108 | struct pnfs_block_dev *bdev; | ||
1109 | LIST_HEAD(block_disklist); | ||
1110 | int status, i; | ||
1111 | |||
1112 | dprintk("%s enter\n", __func__); | ||
1113 | 697 | ||
1114 | if (server->pnfs_blksize == 0) { | 698 | if (range->offset % 8) { |
1115 | dprintk("%s Server did not return blksize\n", __func__); | 699 | dprintk("%s: offset %lld not block size aligned\n", |
1116 | return -EINVAL; | 700 | __func__, range->offset); |
1117 | } | 701 | return; |
1118 | b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS); | ||
1119 | if (!b_mt_id) { | ||
1120 | status = -ENOMEM; | ||
1121 | goto out_error; | ||
1122 | } | ||
1123 | /* Initialize nfs4 block layout mount id */ | ||
1124 | spin_lock_init(&b_mt_id->bm_lock); | ||
1125 | INIT_LIST_HEAD(&b_mt_id->bm_devlist); | ||
1126 | |||
1127 | dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS); | ||
1128 | if (!dlist) { | ||
1129 | status = -ENOMEM; | ||
1130 | goto out_error; | ||
1131 | } | 702 | } |
1132 | dlist->eof = 0; | 703 | |
1133 | while (!dlist->eof) { | 704 | if (range->length != NFS4_MAX_UINT64) { |
1134 | status = nfs4_proc_getdevicelist(server, fh, dlist); | 705 | if (range->length % 8) { |
1135 | if (status) | 706 | dprintk("%s: length %lld not block size aligned\n", |
1136 | goto out_error; | 707 | __func__, range->length); |
1137 | dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n", | 708 | return; |
1138 | __func__, dlist->num_devs, dlist->eof); | ||
1139 | for (i = 0; i < dlist->num_devs; i++) { | ||
1140 | bdev = nfs4_blk_get_deviceinfo(server, fh, | ||
1141 | &dlist->dev_id[i]); | ||
1142 | if (IS_ERR(bdev)) { | ||
1143 | status = PTR_ERR(bdev); | ||
1144 | goto out_error; | ||
1145 | } | ||
1146 | spin_lock(&b_mt_id->bm_lock); | ||
1147 | list_add(&bdev->bm_node, &b_mt_id->bm_devlist); | ||
1148 | spin_unlock(&b_mt_id->bm_lock); | ||
1149 | } | 709 | } |
1150 | } | ||
1151 | dprintk("%s SUCCESS\n", __func__); | ||
1152 | server->pnfs_ld_data = b_mt_id; | ||
1153 | 710 | ||
1154 | out_return: | 711 | end = offset + (range->length >> SECTOR_SHIFT); |
1155 | kfree(dlist); | 712 | } else { |
1156 | return status; | 713 | end = round_down(NFS4_MAX_UINT64, PAGE_SIZE); |
714 | } | ||
1157 | 715 | ||
1158 | out_error: | 716 | ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end); |
1159 | free_blk_mountid(b_mt_id); | ||
1160 | goto out_return; | ||
1161 | } | 717 | } |
1162 | 718 | ||
1163 | static int | 719 | static int |
1164 | bl_clear_layoutdriver(struct nfs_server *server) | 720 | bl_prepare_layoutcommit(struct nfs4_layoutcommit_args *arg) |
721 | { | ||
722 | return ext_tree_prepare_commit(arg); | ||
723 | } | ||
724 | |||
725 | static void | ||
726 | bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) | ||
1165 | { | 727 | { |
1166 | struct block_mount_id *b_mt_id = server->pnfs_ld_data; | 728 | ext_tree_mark_committed(&lcdata->args, lcdata->res.status); |
729 | } | ||
1167 | 730 | ||
731 | static int | ||
732 | bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) | ||
733 | { | ||
1168 | dprintk("%s enter\n", __func__); | 734 | dprintk("%s enter\n", __func__); |
1169 | free_blk_mountid(b_mt_id); | 735 | |
1170 | dprintk("%s RETURNS\n", __func__); | 736 | if (server->pnfs_blksize == 0) { |
737 | dprintk("%s Server did not return blksize\n", __func__); | ||
738 | return -EINVAL; | ||
739 | } | ||
740 | if (server->pnfs_blksize > PAGE_SIZE) { | ||
741 | printk(KERN_ERR "%s: pNFS blksize %d not supported.\n", | ||
742 | __func__, server->pnfs_blksize); | ||
743 | return -EINVAL; | ||
744 | } | ||
745 | |||
1171 | return 0; | 746 | return 0; |
1172 | } | 747 | } |
1173 | 748 | ||
1174 | static bool | 749 | static bool |
1175 | is_aligned_req(struct nfs_page *req, unsigned int alignment) | 750 | is_aligned_req(struct nfs_pageio_descriptor *pgio, |
751 | struct nfs_page *req, unsigned int alignment) | ||
1176 | { | 752 | { |
1177 | return IS_ALIGNED(req->wb_offset, alignment) && | 753 | /* |
1178 | IS_ALIGNED(req->wb_bytes, alignment); | 754 | * Always accept buffered writes, higher layers take care of the |
755 | * right alignment. | ||
756 | */ | ||
757 | if (pgio->pg_dreq == NULL) | ||
758 | return true; | ||
759 | |||
760 | if (!IS_ALIGNED(req->wb_offset, alignment)) | ||
761 | return false; | ||
762 | |||
763 | if (IS_ALIGNED(req->wb_bytes, alignment)) | ||
764 | return true; | ||
765 | |||
766 | if (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode)) { | ||
767 | /* | ||
768 | * If the write goes up to the inode size, just write | ||
769 | * the full page. Data past the inode size is | ||
770 | * guaranteed to be zeroed by the higher level client | ||
771 | * code, and this behaviour is mandated by RFC 5663 | ||
772 | * section 2.3.2. | ||
773 | */ | ||
774 | return true; | ||
775 | } | ||
776 | |||
777 | return false; | ||
1179 | } | 778 | } |
1180 | 779 | ||
1181 | static void | 780 | static void |
1182 | bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 781 | bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
1183 | { | 782 | { |
1184 | if (pgio->pg_dreq != NULL && | 783 | if (!is_aligned_req(pgio, req, SECTOR_SIZE)) { |
1185 | !is_aligned_req(req, SECTOR_SIZE)) | ||
1186 | nfs_pageio_reset_read_mds(pgio); | 784 | nfs_pageio_reset_read_mds(pgio); |
1187 | else | 785 | return; |
1188 | pnfs_generic_pg_init_read(pgio, req); | 786 | } |
787 | |||
788 | pnfs_generic_pg_init_read(pgio, req); | ||
1189 | } | 789 | } |
1190 | 790 | ||
1191 | /* | 791 | /* |
@@ -1196,10 +796,8 @@ static size_t | |||
1196 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 796 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
1197 | struct nfs_page *req) | 797 | struct nfs_page *req) |
1198 | { | 798 | { |
1199 | if (pgio->pg_dreq != NULL && | 799 | if (!is_aligned_req(pgio, req, SECTOR_SIZE)) |
1200 | !is_aligned_req(req, SECTOR_SIZE)) | ||
1201 | return 0; | 800 | return 0; |
1202 | |||
1203 | return pnfs_generic_pg_test(pgio, prev, req); | 801 | return pnfs_generic_pg_test(pgio, prev, req); |
1204 | } | 802 | } |
1205 | 803 | ||
@@ -1229,19 +827,20 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) | |||
1229 | static void | 827 | static void |
1230 | bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 828 | bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
1231 | { | 829 | { |
1232 | if (pgio->pg_dreq != NULL && | 830 | u64 wb_size; |
1233 | !is_aligned_req(req, PAGE_CACHE_SIZE)) { | 831 | |
832 | if (!is_aligned_req(pgio, req, PAGE_SIZE)) { | ||
1234 | nfs_pageio_reset_write_mds(pgio); | 833 | nfs_pageio_reset_write_mds(pgio); |
1235 | } else { | 834 | return; |
1236 | u64 wb_size; | ||
1237 | if (pgio->pg_dreq == NULL) | ||
1238 | wb_size = pnfs_num_cont_bytes(pgio->pg_inode, | ||
1239 | req->wb_index); | ||
1240 | else | ||
1241 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); | ||
1242 | |||
1243 | pnfs_generic_pg_init_write(pgio, req, wb_size); | ||
1244 | } | 835 | } |
836 | |||
837 | if (pgio->pg_dreq == NULL) | ||
838 | wb_size = pnfs_num_cont_bytes(pgio->pg_inode, | ||
839 | req->wb_index); | ||
840 | else | ||
841 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); | ||
842 | |||
843 | pnfs_generic_pg_init_write(pgio, req, wb_size); | ||
1245 | } | 844 | } |
1246 | 845 | ||
1247 | /* | 846 | /* |
@@ -1252,10 +851,8 @@ static size_t | |||
1252 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 851 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
1253 | struct nfs_page *req) | 852 | struct nfs_page *req) |
1254 | { | 853 | { |
1255 | if (pgio->pg_dreq != NULL && | 854 | if (!is_aligned_req(pgio, req, PAGE_SIZE)) |
1256 | !is_aligned_req(req, PAGE_CACHE_SIZE)) | ||
1257 | return 0; | 855 | return 0; |
1258 | |||
1259 | return pnfs_generic_pg_test(pgio, prev, req); | 856 | return pnfs_generic_pg_test(pgio, prev, req); |
1260 | } | 857 | } |
1261 | 858 | ||
@@ -1275,146 +872,24 @@ static struct pnfs_layoutdriver_type blocklayout_type = { | |||
1275 | .id = LAYOUT_BLOCK_VOLUME, | 872 | .id = LAYOUT_BLOCK_VOLUME, |
1276 | .name = "LAYOUT_BLOCK_VOLUME", | 873 | .name = "LAYOUT_BLOCK_VOLUME", |
1277 | .owner = THIS_MODULE, | 874 | .owner = THIS_MODULE, |
875 | .flags = PNFS_LAYOUTRET_ON_SETATTR | | ||
876 | PNFS_READ_WHOLE_PAGE, | ||
1278 | .read_pagelist = bl_read_pagelist, | 877 | .read_pagelist = bl_read_pagelist, |
1279 | .write_pagelist = bl_write_pagelist, | 878 | .write_pagelist = bl_write_pagelist, |
1280 | .alloc_layout_hdr = bl_alloc_layout_hdr, | 879 | .alloc_layout_hdr = bl_alloc_layout_hdr, |
1281 | .free_layout_hdr = bl_free_layout_hdr, | 880 | .free_layout_hdr = bl_free_layout_hdr, |
1282 | .alloc_lseg = bl_alloc_lseg, | 881 | .alloc_lseg = bl_alloc_lseg, |
1283 | .free_lseg = bl_free_lseg, | 882 | .free_lseg = bl_free_lseg, |
1284 | .encode_layoutcommit = bl_encode_layoutcommit, | 883 | .return_range = bl_return_range, |
884 | .prepare_layoutcommit = bl_prepare_layoutcommit, | ||
1285 | .cleanup_layoutcommit = bl_cleanup_layoutcommit, | 885 | .cleanup_layoutcommit = bl_cleanup_layoutcommit, |
1286 | .set_layoutdriver = bl_set_layoutdriver, | 886 | .set_layoutdriver = bl_set_layoutdriver, |
1287 | .clear_layoutdriver = bl_clear_layoutdriver, | 887 | .alloc_deviceid_node = bl_alloc_deviceid_node, |
888 | .free_deviceid_node = bl_free_deviceid_node, | ||
1288 | .pg_read_ops = &bl_pg_read_ops, | 889 | .pg_read_ops = &bl_pg_read_ops, |
1289 | .pg_write_ops = &bl_pg_write_ops, | 890 | .pg_write_ops = &bl_pg_write_ops, |
1290 | }; | 891 | }; |
1291 | 892 | ||
1292 | static const struct rpc_pipe_ops bl_upcall_ops = { | ||
1293 | .upcall = rpc_pipe_generic_upcall, | ||
1294 | .downcall = bl_pipe_downcall, | ||
1295 | .destroy_msg = bl_pipe_destroy_msg, | ||
1296 | }; | ||
1297 | |||
1298 | static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, | ||
1299 | struct rpc_pipe *pipe) | ||
1300 | { | ||
1301 | struct dentry *dir, *dentry; | ||
1302 | |||
1303 | dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); | ||
1304 | if (dir == NULL) | ||
1305 | return ERR_PTR(-ENOENT); | ||
1306 | dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); | ||
1307 | dput(dir); | ||
1308 | return dentry; | ||
1309 | } | ||
1310 | |||
1311 | static void nfs4blocklayout_unregister_sb(struct super_block *sb, | ||
1312 | struct rpc_pipe *pipe) | ||
1313 | { | ||
1314 | if (pipe->dentry) | ||
1315 | rpc_unlink(pipe->dentry); | ||
1316 | } | ||
1317 | |||
1318 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, | ||
1319 | void *ptr) | ||
1320 | { | ||
1321 | struct super_block *sb = ptr; | ||
1322 | struct net *net = sb->s_fs_info; | ||
1323 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1324 | struct dentry *dentry; | ||
1325 | int ret = 0; | ||
1326 | |||
1327 | if (!try_module_get(THIS_MODULE)) | ||
1328 | return 0; | ||
1329 | |||
1330 | if (nn->bl_device_pipe == NULL) { | ||
1331 | module_put(THIS_MODULE); | ||
1332 | return 0; | ||
1333 | } | ||
1334 | |||
1335 | switch (event) { | ||
1336 | case RPC_PIPEFS_MOUNT: | ||
1337 | dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); | ||
1338 | if (IS_ERR(dentry)) { | ||
1339 | ret = PTR_ERR(dentry); | ||
1340 | break; | ||
1341 | } | ||
1342 | nn->bl_device_pipe->dentry = dentry; | ||
1343 | break; | ||
1344 | case RPC_PIPEFS_UMOUNT: | ||
1345 | if (nn->bl_device_pipe->dentry) | ||
1346 | nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); | ||
1347 | break; | ||
1348 | default: | ||
1349 | ret = -ENOTSUPP; | ||
1350 | break; | ||
1351 | } | ||
1352 | module_put(THIS_MODULE); | ||
1353 | return ret; | ||
1354 | } | ||
1355 | |||
1356 | static struct notifier_block nfs4blocklayout_block = { | ||
1357 | .notifier_call = rpc_pipefs_event, | ||
1358 | }; | ||
1359 | |||
1360 | static struct dentry *nfs4blocklayout_register_net(struct net *net, | ||
1361 | struct rpc_pipe *pipe) | ||
1362 | { | ||
1363 | struct super_block *pipefs_sb; | ||
1364 | struct dentry *dentry; | ||
1365 | |||
1366 | pipefs_sb = rpc_get_sb_net(net); | ||
1367 | if (!pipefs_sb) | ||
1368 | return NULL; | ||
1369 | dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); | ||
1370 | rpc_put_sb_net(net); | ||
1371 | return dentry; | ||
1372 | } | ||
1373 | |||
1374 | static void nfs4blocklayout_unregister_net(struct net *net, | ||
1375 | struct rpc_pipe *pipe) | ||
1376 | { | ||
1377 | struct super_block *pipefs_sb; | ||
1378 | |||
1379 | pipefs_sb = rpc_get_sb_net(net); | ||
1380 | if (pipefs_sb) { | ||
1381 | nfs4blocklayout_unregister_sb(pipefs_sb, pipe); | ||
1382 | rpc_put_sb_net(net); | ||
1383 | } | ||
1384 | } | ||
1385 | |||
1386 | static int nfs4blocklayout_net_init(struct net *net) | ||
1387 | { | ||
1388 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1389 | struct dentry *dentry; | ||
1390 | |||
1391 | init_waitqueue_head(&nn->bl_wq); | ||
1392 | nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); | ||
1393 | if (IS_ERR(nn->bl_device_pipe)) | ||
1394 | return PTR_ERR(nn->bl_device_pipe); | ||
1395 | dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); | ||
1396 | if (IS_ERR(dentry)) { | ||
1397 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
1398 | return PTR_ERR(dentry); | ||
1399 | } | ||
1400 | nn->bl_device_pipe->dentry = dentry; | ||
1401 | return 0; | ||
1402 | } | ||
1403 | |||
1404 | static void nfs4blocklayout_net_exit(struct net *net) | ||
1405 | { | ||
1406 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1407 | |||
1408 | nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); | ||
1409 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
1410 | nn->bl_device_pipe = NULL; | ||
1411 | } | ||
1412 | |||
1413 | static struct pernet_operations nfs4blocklayout_net_ops = { | ||
1414 | .init = nfs4blocklayout_net_init, | ||
1415 | .exit = nfs4blocklayout_net_exit, | ||
1416 | }; | ||
1417 | |||
1418 | static int __init nfs4blocklayout_init(void) | 893 | static int __init nfs4blocklayout_init(void) |
1419 | { | 894 | { |
1420 | int ret; | 895 | int ret; |
@@ -1424,20 +899,14 @@ static int __init nfs4blocklayout_init(void) | |||
1424 | ret = pnfs_register_layoutdriver(&blocklayout_type); | 899 | ret = pnfs_register_layoutdriver(&blocklayout_type); |
1425 | if (ret) | 900 | if (ret) |
1426 | goto out; | 901 | goto out; |
1427 | 902 | ret = bl_init_pipefs(); | |
1428 | ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); | ||
1429 | if (ret) | 903 | if (ret) |
1430 | goto out_remove; | 904 | goto out_unregister; |
1431 | ret = register_pernet_subsys(&nfs4blocklayout_net_ops); | 905 | return 0; |
1432 | if (ret) | ||
1433 | goto out_notifier; | ||
1434 | out: | ||
1435 | return ret; | ||
1436 | 906 | ||
1437 | out_notifier: | 907 | out_unregister: |
1438 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | ||
1439 | out_remove: | ||
1440 | pnfs_unregister_layoutdriver(&blocklayout_type); | 908 | pnfs_unregister_layoutdriver(&blocklayout_type); |
909 | out: | ||
1441 | return ret; | 910 | return ret; |
1442 | } | 911 | } |
1443 | 912 | ||
@@ -1446,8 +915,7 @@ static void __exit nfs4blocklayout_exit(void) | |||
1446 | dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", | 915 | dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", |
1447 | __func__); | 916 | __func__); |
1448 | 917 | ||
1449 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | 918 | bl_cleanup_pipefs(); |
1450 | unregister_pernet_subsys(&nfs4blocklayout_net_ops); | ||
1451 | pnfs_unregister_layoutdriver(&blocklayout_type); | 919 | pnfs_unregister_layoutdriver(&blocklayout_type); |
1452 | } | 920 | } |
1453 | 921 | ||
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 9838fb020473..92dca9e90d8d 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -44,105 +44,112 @@ | |||
44 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) | 44 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) |
45 | #define SECTOR_SIZE (1 << SECTOR_SHIFT) | 45 | #define SECTOR_SIZE (1 << SECTOR_SHIFT) |
46 | 46 | ||
47 | struct block_mount_id { | 47 | struct pnfs_block_dev; |
48 | spinlock_t bm_lock; /* protects list */ | ||
49 | struct list_head bm_devlist; /* holds pnfs_block_dev */ | ||
50 | }; | ||
51 | 48 | ||
52 | struct pnfs_block_dev { | 49 | enum pnfs_block_volume_type { |
53 | struct list_head bm_node; | 50 | PNFS_BLOCK_VOLUME_SIMPLE = 0, |
54 | struct nfs4_deviceid bm_mdevid; /* associated devid */ | 51 | PNFS_BLOCK_VOLUME_SLICE = 1, |
55 | struct block_device *bm_mdev; /* meta device itself */ | 52 | PNFS_BLOCK_VOLUME_CONCAT = 2, |
56 | struct net *net; | 53 | PNFS_BLOCK_VOLUME_STRIPE = 3, |
57 | }; | 54 | }; |
58 | 55 | ||
59 | enum exstate4 { | 56 | #define PNFS_BLOCK_MAX_UUIDS 4 |
60 | PNFS_BLOCK_READWRITE_DATA = 0, | 57 | #define PNFS_BLOCK_MAX_DEVICES 64 |
61 | PNFS_BLOCK_READ_DATA = 1, | 58 | |
62 | PNFS_BLOCK_INVALID_DATA = 2, /* mapped, but data is invalid */ | 59 | /* |
63 | PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */ | 60 | * Random upper cap for the uuid length to avoid unbounded allocation. |
61 | * Not actually limited by the protocol. | ||
62 | */ | ||
63 | #define PNFS_BLOCK_UUID_LEN 128 | ||
64 | |||
65 | |||
66 | struct pnfs_block_volume { | ||
67 | enum pnfs_block_volume_type type; | ||
68 | union { | ||
69 | struct { | ||
70 | int len; | ||
71 | int nr_sigs; | ||
72 | struct { | ||
73 | u64 offset; | ||
74 | u32 sig_len; | ||
75 | u8 sig[PNFS_BLOCK_UUID_LEN]; | ||
76 | } sigs[PNFS_BLOCK_MAX_UUIDS]; | ||
77 | } simple; | ||
78 | struct { | ||
79 | u64 start; | ||
80 | u64 len; | ||
81 | u32 volume; | ||
82 | } slice; | ||
83 | struct { | ||
84 | u32 volumes_count; | ||
85 | u32 volumes[PNFS_BLOCK_MAX_DEVICES]; | ||
86 | } concat; | ||
87 | struct { | ||
88 | u64 chunk_size; | ||
89 | u32 volumes_count; | ||
90 | u32 volumes[PNFS_BLOCK_MAX_DEVICES]; | ||
91 | } stripe; | ||
92 | }; | ||
64 | }; | 93 | }; |
65 | 94 | ||
66 | #define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */ | 95 | struct pnfs_block_dev_map { |
96 | sector_t start; | ||
97 | sector_t len; | ||
67 | 98 | ||
68 | struct my_tree { | 99 | sector_t disk_offset; |
69 | sector_t mtt_step_size; /* Internal sector alignment */ | 100 | struct block_device *bdev; |
70 | struct list_head mtt_stub; /* Should be a radix tree */ | ||
71 | }; | 101 | }; |
72 | 102 | ||
73 | struct pnfs_inval_markings { | 103 | struct pnfs_block_dev { |
74 | spinlock_t im_lock; | 104 | struct nfs4_deviceid_node node; |
75 | struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ | 105 | |
76 | sector_t im_block_size; /* Server blocksize in sectors */ | 106 | u64 start; |
77 | struct list_head im_extents; /* Short extents for INVAL->RW conversion */ | 107 | u64 len; |
108 | |||
109 | u32 nr_children; | ||
110 | struct pnfs_block_dev *children; | ||
111 | u64 chunk_size; | ||
112 | |||
113 | struct block_device *bdev; | ||
114 | u64 disk_offset; | ||
115 | |||
116 | bool (*map)(struct pnfs_block_dev *dev, u64 offset, | ||
117 | struct pnfs_block_dev_map *map); | ||
78 | }; | 118 | }; |
79 | 119 | ||
80 | struct pnfs_inval_tracking { | 120 | enum exstate4 { |
81 | struct list_head it_link; | 121 | PNFS_BLOCK_READWRITE_DATA = 0, |
82 | int it_sector; | 122 | PNFS_BLOCK_READ_DATA = 1, |
83 | int it_tags; | 123 | PNFS_BLOCK_INVALID_DATA = 2, /* mapped, but data is invalid */ |
124 | PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */ | ||
84 | }; | 125 | }; |
85 | 126 | ||
86 | /* sector_t fields are all in 512-byte sectors */ | 127 | /* sector_t fields are all in 512-byte sectors */ |
87 | struct pnfs_block_extent { | 128 | struct pnfs_block_extent { |
88 | struct kref be_refcnt; | 129 | union { |
89 | struct list_head be_node; /* link into lseg list */ | 130 | struct rb_node be_node; |
90 | struct nfs4_deviceid be_devid; /* FIXME: could use device cache instead */ | 131 | struct list_head be_list; |
91 | struct block_device *be_mdev; | 132 | }; |
133 | struct nfs4_deviceid_node *be_device; | ||
92 | sector_t be_f_offset; /* the starting offset in the file */ | 134 | sector_t be_f_offset; /* the starting offset in the file */ |
93 | sector_t be_length; /* the size of the extent */ | 135 | sector_t be_length; /* the size of the extent */ |
94 | sector_t be_v_offset; /* the starting offset in the volume */ | 136 | sector_t be_v_offset; /* the starting offset in the volume */ |
95 | enum exstate4 be_state; /* the state of this extent */ | 137 | enum exstate4 be_state; /* the state of this extent */ |
96 | struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */ | 138 | #define EXTENT_WRITTEN 1 |
139 | #define EXTENT_COMMITTING 2 | ||
140 | unsigned int be_tag; | ||
97 | }; | 141 | }; |
98 | 142 | ||
99 | /* Shortened extent used by LAYOUTCOMMIT */ | 143 | /* on the wire size of the extent */ |
100 | struct pnfs_block_short_extent { | 144 | #define BL_EXTENT_SIZE (7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE) |
101 | struct list_head bse_node; | ||
102 | struct nfs4_deviceid bse_devid; | ||
103 | struct block_device *bse_mdev; | ||
104 | sector_t bse_f_offset; /* the starting offset in the file */ | ||
105 | sector_t bse_length; /* the size of the extent */ | ||
106 | }; | ||
107 | |||
108 | static inline void | ||
109 | BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) | ||
110 | { | ||
111 | spin_lock_init(&marks->im_lock); | ||
112 | INIT_LIST_HEAD(&marks->im_tree.mtt_stub); | ||
113 | INIT_LIST_HEAD(&marks->im_extents); | ||
114 | marks->im_block_size = blocksize; | ||
115 | marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, | ||
116 | blocksize); | ||
117 | } | ||
118 | |||
119 | enum extentclass4 { | ||
120 | RW_EXTENT = 0, /* READWRTE and INVAL */ | ||
121 | RO_EXTENT = 1, /* READ and NONE */ | ||
122 | EXTENT_LISTS = 2, | ||
123 | }; | ||
124 | |||
125 | static inline int bl_choose_list(enum exstate4 state) | ||
126 | { | ||
127 | if (state == PNFS_BLOCK_READ_DATA || state == PNFS_BLOCK_NONE_DATA) | ||
128 | return RO_EXTENT; | ||
129 | else | ||
130 | return RW_EXTENT; | ||
131 | } | ||
132 | 145 | ||
133 | struct pnfs_block_layout { | 146 | struct pnfs_block_layout { |
134 | struct pnfs_layout_hdr bl_layout; | 147 | struct pnfs_layout_hdr bl_layout; |
135 | struct pnfs_inval_markings bl_inval; /* tracks INVAL->RW transition */ | 148 | struct rb_root bl_ext_rw; |
149 | struct rb_root bl_ext_ro; | ||
136 | spinlock_t bl_ext_lock; /* Protects list manipulation */ | 150 | spinlock_t bl_ext_lock; /* Protects list manipulation */ |
137 | struct list_head bl_extents[EXTENT_LISTS]; /* R and RW extents */ | ||
138 | struct list_head bl_commit; /* Needs layout commit */ | ||
139 | struct list_head bl_committing; /* Layout committing */ | ||
140 | unsigned int bl_count; /* entries in bl_commit */ | ||
141 | sector_t bl_blocksize; /* Server blocksize in sectors */ | ||
142 | }; | 151 | }; |
143 | 152 | ||
144 | #define BLK_ID(lo) ((struct block_mount_id *)(NFS_SERVER(lo->plh_inode)->pnfs_ld_data)) | ||
145 | |||
146 | static inline struct pnfs_block_layout * | 153 | static inline struct pnfs_block_layout * |
147 | BLK_LO2EXT(struct pnfs_layout_hdr *lo) | 154 | BLK_LO2EXT(struct pnfs_layout_hdr *lo) |
148 | { | 155 | { |
@@ -171,41 +178,27 @@ struct bl_msg_hdr { | |||
171 | #define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ | 178 | #define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ |
172 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ | 179 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ |
173 | 180 | ||
174 | /* blocklayoutdev.c */ | 181 | /* dev.c */ |
175 | ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); | 182 | struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, |
176 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *); | 183 | struct pnfs_device *pdev, gfp_t gfp_mask); |
177 | void nfs4_blkdev_put(struct block_device *bdev); | 184 | void bl_free_deviceid_node(struct nfs4_deviceid_node *d); |
178 | struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, | 185 | |
179 | struct pnfs_device *dev); | 186 | /* extent_tree.c */ |
180 | int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, | 187 | int ext_tree_insert(struct pnfs_block_layout *bl, |
181 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); | 188 | struct pnfs_block_extent *new); |
182 | 189 | int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start, | |
183 | /* blocklayoutdm.c */ | 190 | sector_t end); |
184 | void bl_free_block_dev(struct pnfs_block_dev *bdev); | 191 | int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, |
185 | 192 | sector_t len); | |
186 | /* extents.c */ | 193 | bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, |
187 | struct pnfs_block_extent * | 194 | struct pnfs_block_extent *ret, bool rw); |
188 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | 195 | int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg); |
189 | struct pnfs_block_extent **cow_read); | 196 | void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status); |
190 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | 197 | |
191 | sector_t offset, sector_t length); | 198 | /* rpc_pipefs.c */ |
192 | void bl_put_extent(struct pnfs_block_extent *be); | 199 | dev_t bl_resolve_deviceid(struct nfs_server *server, |
193 | struct pnfs_block_extent *bl_alloc_extent(void); | 200 | struct pnfs_block_volume *b, gfp_t gfp_mask); |
194 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); | 201 | int __init bl_init_pipefs(void); |
195 | int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | 202 | void __exit bl_cleanup_pipefs(void); |
196 | struct xdr_stream *xdr, | ||
197 | const struct nfs4_layoutcommit_args *arg); | ||
198 | void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
199 | const struct nfs4_layoutcommit_args *arg, | ||
200 | int status); | ||
201 | int bl_add_merge_extent(struct pnfs_block_layout *bl, | ||
202 | struct pnfs_block_extent *new); | ||
203 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
204 | sector_t offset, sector_t length, | ||
205 | struct pnfs_block_short_extent *new); | ||
206 | int bl_push_one_short_extent(struct pnfs_inval_markings *marks); | ||
207 | struct pnfs_block_short_extent * | ||
208 | bl_pop_one_short_extent(struct pnfs_inval_markings *marks); | ||
209 | void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free); | ||
210 | 203 | ||
211 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ | 204 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ |
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c deleted file mode 100644 index 04303b5c9361..000000000000 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ /dev/null | |||
@@ -1,384 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayoutdev.c | ||
3 | * | ||
4 | * Device operations for the pnfs nfs4 file layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/buffer_head.h> /* __bread */ | ||
34 | |||
35 | #include <linux/genhd.h> | ||
36 | #include <linux/blkdev.h> | ||
37 | #include <linux/hash.h> | ||
38 | |||
39 | #include "blocklayout.h" | ||
40 | |||
41 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
42 | |||
43 | static int decode_sector_number(__be32 **rp, sector_t *sp) | ||
44 | { | ||
45 | uint64_t s; | ||
46 | |||
47 | *rp = xdr_decode_hyper(*rp, &s); | ||
48 | if (s & 0x1ff) { | ||
49 | printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__); | ||
50 | return -1; | ||
51 | } | ||
52 | *sp = s >> SECTOR_SHIFT; | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * Release the block device | ||
58 | */ | ||
59 | void nfs4_blkdev_put(struct block_device *bdev) | ||
60 | { | ||
61 | dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev), | ||
62 | MINOR(bdev->bd_dev)); | ||
63 | blkdev_put(bdev, FMODE_READ); | ||
64 | } | ||
65 | |||
66 | ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, | ||
67 | size_t mlen) | ||
68 | { | ||
69 | struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, | ||
70 | nfs_net_id); | ||
71 | |||
72 | if (mlen != sizeof (struct bl_dev_msg)) | ||
73 | return -EINVAL; | ||
74 | |||
75 | if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0) | ||
76 | return -EFAULT; | ||
77 | |||
78 | wake_up(&nn->bl_wq); | ||
79 | |||
80 | return mlen; | ||
81 | } | ||
82 | |||
83 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) | ||
84 | { | ||
85 | struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg); | ||
86 | |||
87 | if (msg->errno >= 0) | ||
88 | return; | ||
89 | wake_up(bl_pipe_msg->bl_wq); | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf. | ||
94 | */ | ||
95 | struct pnfs_block_dev * | ||
96 | nfs4_blk_decode_device(struct nfs_server *server, | ||
97 | struct pnfs_device *dev) | ||
98 | { | ||
99 | struct pnfs_block_dev *rv; | ||
100 | struct block_device *bd = NULL; | ||
101 | struct bl_pipe_msg bl_pipe_msg; | ||
102 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
103 | struct bl_msg_hdr bl_msg = { | ||
104 | .type = BL_DEVICE_MOUNT, | ||
105 | .totallen = dev->mincount, | ||
106 | }; | ||
107 | uint8_t *dataptr; | ||
108 | DECLARE_WAITQUEUE(wq, current); | ||
109 | int offset, len, i, rc; | ||
110 | struct net *net = server->nfs_client->cl_net; | ||
111 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
112 | struct bl_dev_msg *reply = &nn->bl_mount_reply; | ||
113 | |||
114 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | ||
115 | dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, | ||
116 | dev->mincount); | ||
117 | |||
118 | bl_pipe_msg.bl_wq = &nn->bl_wq; | ||
119 | memset(msg, 0, sizeof(*msg)); | ||
120 | msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); | ||
121 | if (!msg->data) { | ||
122 | rv = ERR_PTR(-ENOMEM); | ||
123 | goto out; | ||
124 | } | ||
125 | |||
126 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); | ||
127 | dataptr = (uint8_t *) msg->data; | ||
128 | len = dev->mincount; | ||
129 | offset = sizeof(bl_msg); | ||
130 | for (i = 0; len > 0; i++) { | ||
131 | memcpy(&dataptr[offset], page_address(dev->pages[i]), | ||
132 | len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE); | ||
133 | len -= PAGE_CACHE_SIZE; | ||
134 | offset += PAGE_CACHE_SIZE; | ||
135 | } | ||
136 | msg->len = sizeof(bl_msg) + dev->mincount; | ||
137 | |||
138 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); | ||
139 | add_wait_queue(&nn->bl_wq, &wq); | ||
140 | rc = rpc_queue_upcall(nn->bl_device_pipe, msg); | ||
141 | if (rc < 0) { | ||
142 | remove_wait_queue(&nn->bl_wq, &wq); | ||
143 | rv = ERR_PTR(rc); | ||
144 | goto out; | ||
145 | } | ||
146 | |||
147 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
148 | schedule(); | ||
149 | __set_current_state(TASK_RUNNING); | ||
150 | remove_wait_queue(&nn->bl_wq, &wq); | ||
151 | |||
152 | if (reply->status != BL_DEVICE_REQUEST_PROC) { | ||
153 | dprintk("%s failed to open device: %d\n", | ||
154 | __func__, reply->status); | ||
155 | rv = ERR_PTR(-EINVAL); | ||
156 | goto out; | ||
157 | } | ||
158 | |||
159 | bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor), | ||
160 | FMODE_READ, NULL); | ||
161 | if (IS_ERR(bd)) { | ||
162 | dprintk("%s failed to open device : %ld\n", __func__, | ||
163 | PTR_ERR(bd)); | ||
164 | rv = ERR_CAST(bd); | ||
165 | goto out; | ||
166 | } | ||
167 | |||
168 | rv = kzalloc(sizeof(*rv), GFP_NOFS); | ||
169 | if (!rv) { | ||
170 | rv = ERR_PTR(-ENOMEM); | ||
171 | goto out; | ||
172 | } | ||
173 | |||
174 | rv->bm_mdev = bd; | ||
175 | memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); | ||
176 | rv->net = net; | ||
177 | dprintk("%s Created device %s with bd_block_size %u\n", | ||
178 | __func__, | ||
179 | bd->bd_disk->disk_name, | ||
180 | bd->bd_block_size); | ||
181 | |||
182 | out: | ||
183 | kfree(msg->data); | ||
184 | return rv; | ||
185 | } | ||
186 | |||
187 | /* Map deviceid returned by the server to constructed block_device */ | ||
188 | static struct block_device *translate_devid(struct pnfs_layout_hdr *lo, | ||
189 | struct nfs4_deviceid *id) | ||
190 | { | ||
191 | struct block_device *rv = NULL; | ||
192 | struct block_mount_id *mid; | ||
193 | struct pnfs_block_dev *dev; | ||
194 | |||
195 | dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id); | ||
196 | mid = BLK_ID(lo); | ||
197 | spin_lock(&mid->bm_lock); | ||
198 | list_for_each_entry(dev, &mid->bm_devlist, bm_node) { | ||
199 | if (memcmp(id->data, dev->bm_mdevid.data, | ||
200 | NFS4_DEVICEID4_SIZE) == 0) { | ||
201 | rv = dev->bm_mdev; | ||
202 | goto out; | ||
203 | } | ||
204 | } | ||
205 | out: | ||
206 | spin_unlock(&mid->bm_lock); | ||
207 | dprintk("%s returning %p\n", __func__, rv); | ||
208 | return rv; | ||
209 | } | ||
210 | |||
211 | /* Tracks info needed to ensure extents in layout obey constraints of spec */ | ||
212 | struct layout_verification { | ||
213 | u32 mode; /* R or RW */ | ||
214 | u64 start; /* Expected start of next non-COW extent */ | ||
215 | u64 inval; /* Start of INVAL coverage */ | ||
216 | u64 cowread; /* End of COW read coverage */ | ||
217 | }; | ||
218 | |||
219 | /* Verify the extent meets the layout requirements of the pnfs-block draft, | ||
220 | * section 2.3.1. | ||
221 | */ | ||
222 | static int verify_extent(struct pnfs_block_extent *be, | ||
223 | struct layout_verification *lv) | ||
224 | { | ||
225 | if (lv->mode == IOMODE_READ) { | ||
226 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA || | ||
227 | be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
228 | return -EIO; | ||
229 | if (be->be_f_offset != lv->start) | ||
230 | return -EIO; | ||
231 | lv->start += be->be_length; | ||
232 | return 0; | ||
233 | } | ||
234 | /* lv->mode == IOMODE_RW */ | ||
235 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { | ||
236 | if (be->be_f_offset != lv->start) | ||
237 | return -EIO; | ||
238 | if (lv->cowread > lv->start) | ||
239 | return -EIO; | ||
240 | lv->start += be->be_length; | ||
241 | lv->inval = lv->start; | ||
242 | return 0; | ||
243 | } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
244 | if (be->be_f_offset != lv->start) | ||
245 | return -EIO; | ||
246 | lv->start += be->be_length; | ||
247 | return 0; | ||
248 | } else if (be->be_state == PNFS_BLOCK_READ_DATA) { | ||
249 | if (be->be_f_offset > lv->start) | ||
250 | return -EIO; | ||
251 | if (be->be_f_offset < lv->inval) | ||
252 | return -EIO; | ||
253 | if (be->be_f_offset < lv->cowread) | ||
254 | return -EIO; | ||
255 | /* It looks like you might want to min this with lv->start, | ||
256 | * but you really don't. | ||
257 | */ | ||
258 | lv->inval = lv->inval + be->be_length; | ||
259 | lv->cowread = be->be_f_offset + be->be_length; | ||
260 | return 0; | ||
261 | } else | ||
262 | return -EIO; | ||
263 | } | ||
264 | |||
265 | /* XDR decode pnfs_block_layout4 structure */ | ||
266 | int | ||
267 | nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, | ||
268 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags) | ||
269 | { | ||
270 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | ||
271 | int i, status = -EIO; | ||
272 | uint32_t count; | ||
273 | struct pnfs_block_extent *be = NULL, *save; | ||
274 | struct xdr_stream stream; | ||
275 | struct xdr_buf buf; | ||
276 | struct page *scratch; | ||
277 | __be32 *p; | ||
278 | struct layout_verification lv = { | ||
279 | .mode = lgr->range.iomode, | ||
280 | .start = lgr->range.offset >> SECTOR_SHIFT, | ||
281 | .inval = lgr->range.offset >> SECTOR_SHIFT, | ||
282 | .cowread = lgr->range.offset >> SECTOR_SHIFT, | ||
283 | }; | ||
284 | LIST_HEAD(extents); | ||
285 | |||
286 | dprintk("---> %s\n", __func__); | ||
287 | |||
288 | scratch = alloc_page(gfp_flags); | ||
289 | if (!scratch) | ||
290 | return -ENOMEM; | ||
291 | |||
292 | xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); | ||
293 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
294 | |||
295 | p = xdr_inline_decode(&stream, 4); | ||
296 | if (unlikely(!p)) | ||
297 | goto out_err; | ||
298 | |||
299 | count = be32_to_cpup(p++); | ||
300 | |||
301 | dprintk("%s enter, number of extents %i\n", __func__, count); | ||
302 | p = xdr_inline_decode(&stream, (28 + NFS4_DEVICEID4_SIZE) * count); | ||
303 | if (unlikely(!p)) | ||
304 | goto out_err; | ||
305 | |||
306 | /* Decode individual extents, putting them in temporary | ||
307 | * staging area until whole layout is decoded to make error | ||
308 | * recovery easier. | ||
309 | */ | ||
310 | for (i = 0; i < count; i++) { | ||
311 | be = bl_alloc_extent(); | ||
312 | if (!be) { | ||
313 | status = -ENOMEM; | ||
314 | goto out_err; | ||
315 | } | ||
316 | memcpy(&be->be_devid, p, NFS4_DEVICEID4_SIZE); | ||
317 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | ||
318 | be->be_mdev = translate_devid(lo, &be->be_devid); | ||
319 | if (!be->be_mdev) | ||
320 | goto out_err; | ||
321 | |||
322 | /* The next three values are read in as bytes, | ||
323 | * but stored as 512-byte sector lengths | ||
324 | */ | ||
325 | if (decode_sector_number(&p, &be->be_f_offset) < 0) | ||
326 | goto out_err; | ||
327 | if (decode_sector_number(&p, &be->be_length) < 0) | ||
328 | goto out_err; | ||
329 | if (decode_sector_number(&p, &be->be_v_offset) < 0) | ||
330 | goto out_err; | ||
331 | be->be_state = be32_to_cpup(p++); | ||
332 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
333 | be->be_inval = &bl->bl_inval; | ||
334 | if (verify_extent(be, &lv)) { | ||
335 | dprintk("%s verify failed\n", __func__); | ||
336 | goto out_err; | ||
337 | } | ||
338 | list_add_tail(&be->be_node, &extents); | ||
339 | } | ||
340 | if (lgr->range.offset + lgr->range.length != | ||
341 | lv.start << SECTOR_SHIFT) { | ||
342 | dprintk("%s Final length mismatch\n", __func__); | ||
343 | be = NULL; | ||
344 | goto out_err; | ||
345 | } | ||
346 | if (lv.start < lv.cowread) { | ||
347 | dprintk("%s Final uncovered COW extent\n", __func__); | ||
348 | be = NULL; | ||
349 | goto out_err; | ||
350 | } | ||
351 | /* Extents decoded properly, now try to merge them in to | ||
352 | * existing layout extents. | ||
353 | */ | ||
354 | spin_lock(&bl->bl_ext_lock); | ||
355 | list_for_each_entry_safe(be, save, &extents, be_node) { | ||
356 | list_del(&be->be_node); | ||
357 | status = bl_add_merge_extent(bl, be); | ||
358 | if (status) { | ||
359 | spin_unlock(&bl->bl_ext_lock); | ||
360 | /* This is a fairly catastrophic error, as the | ||
361 | * entire layout extent lists are now corrupted. | ||
362 | * We should have some way to distinguish this. | ||
363 | */ | ||
364 | be = NULL; | ||
365 | goto out_err; | ||
366 | } | ||
367 | } | ||
368 | spin_unlock(&bl->bl_ext_lock); | ||
369 | status = 0; | ||
370 | out: | ||
371 | __free_page(scratch); | ||
372 | dprintk("%s returns %i\n", __func__, status); | ||
373 | return status; | ||
374 | |||
375 | out_err: | ||
376 | bl_put_extent(be); | ||
377 | while (!list_empty(&extents)) { | ||
378 | be = list_first_entry(&extents, struct pnfs_block_extent, | ||
379 | be_node); | ||
380 | list_del(&be->be_node); | ||
381 | bl_put_extent(be); | ||
382 | } | ||
383 | goto out; | ||
384 | } | ||
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c deleted file mode 100644 index 8999cfddd866..000000000000 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ /dev/null | |||
@@ -1,108 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayoutdm.c | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2007 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Fred Isaman <iisaman@umich.edu> | ||
10 | * Andy Adamson <andros@citi.umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include <linux/genhd.h> /* gendisk - used in a dprintk*/ | ||
34 | #include <linux/sched.h> | ||
35 | #include <linux/hash.h> | ||
36 | |||
37 | #include "blocklayout.h" | ||
38 | |||
39 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
40 | |||
41 | static void dev_remove(struct net *net, dev_t dev) | ||
42 | { | ||
43 | struct bl_pipe_msg bl_pipe_msg; | ||
44 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
45 | struct bl_dev_msg bl_umount_request; | ||
46 | struct bl_msg_hdr bl_msg = { | ||
47 | .type = BL_DEVICE_UMOUNT, | ||
48 | .totallen = sizeof(bl_umount_request), | ||
49 | }; | ||
50 | uint8_t *dataptr; | ||
51 | DECLARE_WAITQUEUE(wq, current); | ||
52 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
53 | |||
54 | dprintk("Entering %s\n", __func__); | ||
55 | |||
56 | bl_pipe_msg.bl_wq = &nn->bl_wq; | ||
57 | memset(msg, 0, sizeof(*msg)); | ||
58 | msg->len = sizeof(bl_msg) + bl_msg.totallen; | ||
59 | msg->data = kzalloc(msg->len, GFP_NOFS); | ||
60 | if (!msg->data) | ||
61 | goto out; | ||
62 | |||
63 | memset(&bl_umount_request, 0, sizeof(bl_umount_request)); | ||
64 | bl_umount_request.major = MAJOR(dev); | ||
65 | bl_umount_request.minor = MINOR(dev); | ||
66 | |||
67 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); | ||
68 | dataptr = (uint8_t *) msg->data; | ||
69 | memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); | ||
70 | |||
71 | add_wait_queue(&nn->bl_wq, &wq); | ||
72 | if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { | ||
73 | remove_wait_queue(&nn->bl_wq, &wq); | ||
74 | goto out; | ||
75 | } | ||
76 | |||
77 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
78 | schedule(); | ||
79 | __set_current_state(TASK_RUNNING); | ||
80 | remove_wait_queue(&nn->bl_wq, &wq); | ||
81 | |||
82 | out: | ||
83 | kfree(msg->data); | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Release meta device | ||
88 | */ | ||
89 | static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev) | ||
90 | { | ||
91 | dprintk("%s Releasing\n", __func__); | ||
92 | nfs4_blkdev_put(bdev->bm_mdev); | ||
93 | dev_remove(bdev->net, bdev->bm_mdev->bd_dev); | ||
94 | } | ||
95 | |||
96 | void bl_free_block_dev(struct pnfs_block_dev *bdev) | ||
97 | { | ||
98 | if (bdev) { | ||
99 | if (bdev->bm_mdev) { | ||
100 | dprintk("%s Removing DM device: %d:%d\n", | ||
101 | __func__, | ||
102 | MAJOR(bdev->bm_mdev->bd_dev), | ||
103 | MINOR(bdev->bm_mdev->bd_dev)); | ||
104 | nfs4_blk_metadev_release(bdev); | ||
105 | } | ||
106 | kfree(bdev); | ||
107 | } | ||
108 | } | ||
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c new file mode 100644 index 000000000000..5aed4f98df41 --- /dev/null +++ b/fs/nfs/blocklayout/dev.c | |||
@@ -0,0 +1,363 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | */ | ||
4 | #include <linux/sunrpc/svc.h> | ||
5 | #include <linux/blkdev.h> | ||
6 | #include <linux/nfs4.h> | ||
7 | #include <linux/nfs_fs.h> | ||
8 | #include <linux/nfs_xdr.h> | ||
9 | |||
10 | #include "blocklayout.h" | ||
11 | |||
12 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
13 | |||
14 | static void | ||
15 | bl_free_device(struct pnfs_block_dev *dev) | ||
16 | { | ||
17 | if (dev->nr_children) { | ||
18 | int i; | ||
19 | |||
20 | for (i = 0; i < dev->nr_children; i++) | ||
21 | bl_free_device(&dev->children[i]); | ||
22 | kfree(dev->children); | ||
23 | } else { | ||
24 | if (dev->bdev) | ||
25 | blkdev_put(dev->bdev, FMODE_READ); | ||
26 | } | ||
27 | } | ||
28 | |||
29 | void | ||
30 | bl_free_deviceid_node(struct nfs4_deviceid_node *d) | ||
31 | { | ||
32 | struct pnfs_block_dev *dev = | ||
33 | container_of(d, struct pnfs_block_dev, node); | ||
34 | |||
35 | bl_free_device(dev); | ||
36 | kfree(dev); | ||
37 | } | ||
38 | |||
39 | static int | ||
40 | nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) | ||
41 | { | ||
42 | __be32 *p; | ||
43 | int i; | ||
44 | |||
45 | p = xdr_inline_decode(xdr, 4); | ||
46 | if (!p) | ||
47 | return -EIO; | ||
48 | b->type = be32_to_cpup(p++); | ||
49 | |||
50 | switch (b->type) { | ||
51 | case PNFS_BLOCK_VOLUME_SIMPLE: | ||
52 | p = xdr_inline_decode(xdr, 4); | ||
53 | if (!p) | ||
54 | return -EIO; | ||
55 | b->simple.nr_sigs = be32_to_cpup(p++); | ||
56 | if (!b->simple.nr_sigs) { | ||
57 | dprintk("no signature\n"); | ||
58 | return -EIO; | ||
59 | } | ||
60 | |||
61 | b->simple.len = 4 + 4; | ||
62 | for (i = 0; i < b->simple.nr_sigs; i++) { | ||
63 | p = xdr_inline_decode(xdr, 8 + 4); | ||
64 | if (!p) | ||
65 | return -EIO; | ||
66 | p = xdr_decode_hyper(p, &b->simple.sigs[i].offset); | ||
67 | b->simple.sigs[i].sig_len = be32_to_cpup(p++); | ||
68 | |||
69 | p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len); | ||
70 | if (!p) | ||
71 | return -EIO; | ||
72 | memcpy(&b->simple.sigs[i].sig, p, | ||
73 | b->simple.sigs[i].sig_len); | ||
74 | |||
75 | b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len; | ||
76 | } | ||
77 | break; | ||
78 | case PNFS_BLOCK_VOLUME_SLICE: | ||
79 | p = xdr_inline_decode(xdr, 8 + 8 + 4); | ||
80 | if (!p) | ||
81 | return -EIO; | ||
82 | p = xdr_decode_hyper(p, &b->slice.start); | ||
83 | p = xdr_decode_hyper(p, &b->slice.len); | ||
84 | b->slice.volume = be32_to_cpup(p++); | ||
85 | break; | ||
86 | case PNFS_BLOCK_VOLUME_CONCAT: | ||
87 | p = xdr_inline_decode(xdr, 4); | ||
88 | if (!p) | ||
89 | return -EIO; | ||
90 | b->concat.volumes_count = be32_to_cpup(p++); | ||
91 | |||
92 | p = xdr_inline_decode(xdr, b->concat.volumes_count * 4); | ||
93 | if (!p) | ||
94 | return -EIO; | ||
95 | for (i = 0; i < b->concat.volumes_count; i++) | ||
96 | b->concat.volumes[i] = be32_to_cpup(p++); | ||
97 | break; | ||
98 | case PNFS_BLOCK_VOLUME_STRIPE: | ||
99 | p = xdr_inline_decode(xdr, 8 + 4); | ||
100 | if (!p) | ||
101 | return -EIO; | ||
102 | p = xdr_decode_hyper(p, &b->stripe.chunk_size); | ||
103 | b->stripe.volumes_count = be32_to_cpup(p++); | ||
104 | |||
105 | p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4); | ||
106 | if (!p) | ||
107 | return -EIO; | ||
108 | for (i = 0; i < b->stripe.volumes_count; i++) | ||
109 | b->stripe.volumes[i] = be32_to_cpup(p++); | ||
110 | break; | ||
111 | default: | ||
112 | dprintk("unknown volume type!\n"); | ||
113 | return -EIO; | ||
114 | } | ||
115 | |||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset, | ||
120 | struct pnfs_block_dev_map *map) | ||
121 | { | ||
122 | map->start = dev->start; | ||
123 | map->len = dev->len; | ||
124 | map->disk_offset = dev->disk_offset; | ||
125 | map->bdev = dev->bdev; | ||
126 | return true; | ||
127 | } | ||
128 | |||
129 | static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset, | ||
130 | struct pnfs_block_dev_map *map) | ||
131 | { | ||
132 | int i; | ||
133 | |||
134 | for (i = 0; i < dev->nr_children; i++) { | ||
135 | struct pnfs_block_dev *child = &dev->children[i]; | ||
136 | |||
137 | if (child->start > offset || | ||
138 | child->start + child->len <= offset) | ||
139 | continue; | ||
140 | |||
141 | child->map(child, offset - child->start, map); | ||
142 | return true; | ||
143 | } | ||
144 | |||
145 | dprintk("%s: ran off loop!\n", __func__); | ||
146 | return false; | ||
147 | } | ||
148 | |||
149 | static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, | ||
150 | struct pnfs_block_dev_map *map) | ||
151 | { | ||
152 | struct pnfs_block_dev *child; | ||
153 | u64 chunk; | ||
154 | u32 chunk_idx; | ||
155 | u64 disk_offset; | ||
156 | |||
157 | chunk = div_u64(offset, dev->chunk_size); | ||
158 | div_u64_rem(chunk, dev->nr_children, &chunk_idx); | ||
159 | |||
160 | if (chunk_idx > dev->nr_children) { | ||
161 | dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", | ||
162 | __func__, chunk_idx, offset, dev->chunk_size); | ||
163 | /* error, should not happen */ | ||
164 | return false; | ||
165 | } | ||
166 | |||
167 | /* truncate offset to the beginning of the stripe */ | ||
168 | offset = chunk * dev->chunk_size; | ||
169 | |||
170 | /* disk offset of the stripe */ | ||
171 | disk_offset = div_u64(offset, dev->nr_children); | ||
172 | |||
173 | child = &dev->children[chunk_idx]; | ||
174 | child->map(child, disk_offset, map); | ||
175 | |||
176 | map->start += offset; | ||
177 | map->disk_offset += disk_offset; | ||
178 | map->len = dev->chunk_size; | ||
179 | return true; | ||
180 | } | ||
181 | |||
182 | static int | ||
183 | bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, | ||
184 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask); | ||
185 | |||
186 | |||
187 | static int | ||
188 | bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, | ||
189 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
190 | { | ||
191 | struct pnfs_block_volume *v = &volumes[idx]; | ||
192 | dev_t dev; | ||
193 | |||
194 | dev = bl_resolve_deviceid(server, v, gfp_mask); | ||
195 | if (!dev) | ||
196 | return -EIO; | ||
197 | |||
198 | d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); | ||
199 | if (IS_ERR(d->bdev)) { | ||
200 | printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", | ||
201 | MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev)); | ||
202 | return PTR_ERR(d->bdev); | ||
203 | } | ||
204 | |||
205 | |||
206 | d->len = i_size_read(d->bdev->bd_inode); | ||
207 | d->map = bl_map_simple; | ||
208 | |||
209 | printk(KERN_INFO "pNFS: using block device %s\n", | ||
210 | d->bdev->bd_disk->disk_name); | ||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | static int | ||
215 | bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d, | ||
216 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
217 | { | ||
218 | struct pnfs_block_volume *v = &volumes[idx]; | ||
219 | int ret; | ||
220 | |||
221 | ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask); | ||
222 | if (ret) | ||
223 | return ret; | ||
224 | |||
225 | d->disk_offset = v->slice.start; | ||
226 | d->len = v->slice.len; | ||
227 | return 0; | ||
228 | } | ||
229 | |||
230 | static int | ||
231 | bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d, | ||
232 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
233 | { | ||
234 | struct pnfs_block_volume *v = &volumes[idx]; | ||
235 | u64 len = 0; | ||
236 | int ret, i; | ||
237 | |||
238 | d->children = kcalloc(v->concat.volumes_count, | ||
239 | sizeof(struct pnfs_block_dev), GFP_KERNEL); | ||
240 | if (!d->children) | ||
241 | return -ENOMEM; | ||
242 | |||
243 | for (i = 0; i < v->concat.volumes_count; i++) { | ||
244 | ret = bl_parse_deviceid(server, &d->children[i], | ||
245 | volumes, v->concat.volumes[i], gfp_mask); | ||
246 | if (ret) | ||
247 | return ret; | ||
248 | |||
249 | d->nr_children++; | ||
250 | d->children[i].start += len; | ||
251 | len += d->children[i].len; | ||
252 | } | ||
253 | |||
254 | d->len = len; | ||
255 | d->map = bl_map_concat; | ||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | static int | ||
260 | bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d, | ||
261 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
262 | { | ||
263 | struct pnfs_block_volume *v = &volumes[idx]; | ||
264 | u64 len = 0; | ||
265 | int ret, i; | ||
266 | |||
267 | d->children = kcalloc(v->stripe.volumes_count, | ||
268 | sizeof(struct pnfs_block_dev), GFP_KERNEL); | ||
269 | if (!d->children) | ||
270 | return -ENOMEM; | ||
271 | |||
272 | for (i = 0; i < v->stripe.volumes_count; i++) { | ||
273 | ret = bl_parse_deviceid(server, &d->children[i], | ||
274 | volumes, v->stripe.volumes[i], gfp_mask); | ||
275 | if (ret) | ||
276 | return ret; | ||
277 | |||
278 | d->nr_children++; | ||
279 | len += d->children[i].len; | ||
280 | } | ||
281 | |||
282 | d->len = len; | ||
283 | d->chunk_size = v->stripe.chunk_size; | ||
284 | d->map = bl_map_stripe; | ||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | static int | ||
289 | bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, | ||
290 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
291 | { | ||
292 | switch (volumes[idx].type) { | ||
293 | case PNFS_BLOCK_VOLUME_SIMPLE: | ||
294 | return bl_parse_simple(server, d, volumes, idx, gfp_mask); | ||
295 | case PNFS_BLOCK_VOLUME_SLICE: | ||
296 | return bl_parse_slice(server, d, volumes, idx, gfp_mask); | ||
297 | case PNFS_BLOCK_VOLUME_CONCAT: | ||
298 | return bl_parse_concat(server, d, volumes, idx, gfp_mask); | ||
299 | case PNFS_BLOCK_VOLUME_STRIPE: | ||
300 | return bl_parse_stripe(server, d, volumes, idx, gfp_mask); | ||
301 | default: | ||
302 | dprintk("unsupported volume type: %d\n", volumes[idx].type); | ||
303 | return -EIO; | ||
304 | } | ||
305 | } | ||
306 | |||
307 | struct nfs4_deviceid_node * | ||
308 | bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | ||
309 | gfp_t gfp_mask) | ||
310 | { | ||
311 | struct nfs4_deviceid_node *node = NULL; | ||
312 | struct pnfs_block_volume *volumes; | ||
313 | struct pnfs_block_dev *top; | ||
314 | struct xdr_stream xdr; | ||
315 | struct xdr_buf buf; | ||
316 | struct page *scratch; | ||
317 | int nr_volumes, ret, i; | ||
318 | __be32 *p; | ||
319 | |||
320 | scratch = alloc_page(gfp_mask); | ||
321 | if (!scratch) | ||
322 | goto out; | ||
323 | |||
324 | xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); | ||
325 | xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); | ||
326 | |||
327 | p = xdr_inline_decode(&xdr, sizeof(__be32)); | ||
328 | if (!p) | ||
329 | goto out_free_scratch; | ||
330 | nr_volumes = be32_to_cpup(p++); | ||
331 | |||
332 | volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume), | ||
333 | gfp_mask); | ||
334 | if (!volumes) | ||
335 | goto out_free_scratch; | ||
336 | |||
337 | for (i = 0; i < nr_volumes; i++) { | ||
338 | ret = nfs4_block_decode_volume(&xdr, &volumes[i]); | ||
339 | if (ret < 0) | ||
340 | goto out_free_volumes; | ||
341 | } | ||
342 | |||
343 | top = kzalloc(sizeof(*top), gfp_mask); | ||
344 | if (!top) | ||
345 | goto out_free_volumes; | ||
346 | |||
347 | ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask); | ||
348 | if (ret) { | ||
349 | bl_free_device(top); | ||
350 | kfree(top); | ||
351 | goto out_free_volumes; | ||
352 | } | ||
353 | |||
354 | node = &top->node; | ||
355 | nfs4_init_deviceid_node(node, server, &pdev->dev_id); | ||
356 | |||
357 | out_free_volumes: | ||
358 | kfree(volumes); | ||
359 | out_free_scratch: | ||
360 | __free_page(scratch); | ||
361 | out: | ||
362 | return node; | ||
363 | } | ||
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c new file mode 100644 index 000000000000..31d0b5e53dfd --- /dev/null +++ b/fs/nfs/blocklayout/extent_tree.c | |||
@@ -0,0 +1,602 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | */ | ||
4 | |||
5 | #include <linux/vmalloc.h> | ||
6 | |||
7 | #include "blocklayout.h" | ||
8 | |||
9 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
10 | |||
11 | static inline struct pnfs_block_extent * | ||
12 | ext_node(struct rb_node *node) | ||
13 | { | ||
14 | return rb_entry(node, struct pnfs_block_extent, be_node); | ||
15 | } | ||
16 | |||
17 | static struct pnfs_block_extent * | ||
18 | ext_tree_first(struct rb_root *root) | ||
19 | { | ||
20 | struct rb_node *node = rb_first(root); | ||
21 | return node ? ext_node(node) : NULL; | ||
22 | } | ||
23 | |||
24 | static struct pnfs_block_extent * | ||
25 | ext_tree_prev(struct pnfs_block_extent *be) | ||
26 | { | ||
27 | struct rb_node *node = rb_prev(&be->be_node); | ||
28 | return node ? ext_node(node) : NULL; | ||
29 | } | ||
30 | |||
31 | static struct pnfs_block_extent * | ||
32 | ext_tree_next(struct pnfs_block_extent *be) | ||
33 | { | ||
34 | struct rb_node *node = rb_next(&be->be_node); | ||
35 | return node ? ext_node(node) : NULL; | ||
36 | } | ||
37 | |||
38 | static inline sector_t | ||
39 | ext_f_end(struct pnfs_block_extent *be) | ||
40 | { | ||
41 | return be->be_f_offset + be->be_length; | ||
42 | } | ||
43 | |||
44 | static struct pnfs_block_extent * | ||
45 | __ext_tree_search(struct rb_root *root, sector_t start) | ||
46 | { | ||
47 | struct rb_node *node = root->rb_node; | ||
48 | struct pnfs_block_extent *be = NULL; | ||
49 | |||
50 | while (node) { | ||
51 | be = ext_node(node); | ||
52 | if (start < be->be_f_offset) | ||
53 | node = node->rb_left; | ||
54 | else if (start >= ext_f_end(be)) | ||
55 | node = node->rb_right; | ||
56 | else | ||
57 | return be; | ||
58 | } | ||
59 | |||
60 | if (be) { | ||
61 | if (start < be->be_f_offset) | ||
62 | return be; | ||
63 | |||
64 | if (start >= ext_f_end(be)) | ||
65 | return ext_tree_next(be); | ||
66 | } | ||
67 | |||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | static bool | ||
72 | ext_can_merge(struct pnfs_block_extent *be1, struct pnfs_block_extent *be2) | ||
73 | { | ||
74 | if (be1->be_state != be2->be_state) | ||
75 | return false; | ||
76 | if (be1->be_device != be2->be_device) | ||
77 | return false; | ||
78 | |||
79 | if (be1->be_f_offset + be1->be_length != be2->be_f_offset) | ||
80 | return false; | ||
81 | |||
82 | if (be1->be_state != PNFS_BLOCK_NONE_DATA && | ||
83 | (be1->be_v_offset + be1->be_length != be2->be_v_offset)) | ||
84 | return false; | ||
85 | |||
86 | if (be1->be_state == PNFS_BLOCK_INVALID_DATA && | ||
87 | be1->be_tag != be2->be_tag) | ||
88 | return false; | ||
89 | |||
90 | return true; | ||
91 | } | ||
92 | |||
93 | static struct pnfs_block_extent * | ||
94 | ext_try_to_merge_left(struct rb_root *root, struct pnfs_block_extent *be) | ||
95 | { | ||
96 | struct pnfs_block_extent *left = ext_tree_prev(be); | ||
97 | |||
98 | if (left && ext_can_merge(left, be)) { | ||
99 | left->be_length += be->be_length; | ||
100 | rb_erase(&be->be_node, root); | ||
101 | nfs4_put_deviceid_node(be->be_device); | ||
102 | kfree(be); | ||
103 | return left; | ||
104 | } | ||
105 | |||
106 | return be; | ||
107 | } | ||
108 | |||
109 | static struct pnfs_block_extent * | ||
110 | ext_try_to_merge_right(struct rb_root *root, struct pnfs_block_extent *be) | ||
111 | { | ||
112 | struct pnfs_block_extent *right = ext_tree_next(be); | ||
113 | |||
114 | if (right && ext_can_merge(be, right)) { | ||
115 | be->be_length += right->be_length; | ||
116 | rb_erase(&right->be_node, root); | ||
117 | nfs4_put_deviceid_node(right->be_device); | ||
118 | kfree(right); | ||
119 | } | ||
120 | |||
121 | return be; | ||
122 | } | ||
123 | |||
124 | static void | ||
125 | __ext_tree_insert(struct rb_root *root, | ||
126 | struct pnfs_block_extent *new, bool merge_ok) | ||
127 | { | ||
128 | struct rb_node **p = &root->rb_node, *parent = NULL; | ||
129 | struct pnfs_block_extent *be; | ||
130 | |||
131 | while (*p) { | ||
132 | parent = *p; | ||
133 | be = ext_node(parent); | ||
134 | |||
135 | if (new->be_f_offset < be->be_f_offset) { | ||
136 | if (merge_ok && ext_can_merge(new, be)) { | ||
137 | be->be_f_offset = new->be_f_offset; | ||
138 | if (be->be_state != PNFS_BLOCK_NONE_DATA) | ||
139 | be->be_v_offset = new->be_v_offset; | ||
140 | be->be_length += new->be_length; | ||
141 | be = ext_try_to_merge_left(root, be); | ||
142 | goto free_new; | ||
143 | } | ||
144 | p = &(*p)->rb_left; | ||
145 | } else if (new->be_f_offset >= ext_f_end(be)) { | ||
146 | if (merge_ok && ext_can_merge(be, new)) { | ||
147 | be->be_length += new->be_length; | ||
148 | be = ext_try_to_merge_right(root, be); | ||
149 | goto free_new; | ||
150 | } | ||
151 | p = &(*p)->rb_right; | ||
152 | } else { | ||
153 | BUG(); | ||
154 | } | ||
155 | } | ||
156 | |||
157 | rb_link_node(&new->be_node, parent, p); | ||
158 | rb_insert_color(&new->be_node, root); | ||
159 | return; | ||
160 | free_new: | ||
161 | nfs4_put_deviceid_node(new->be_device); | ||
162 | kfree(new); | ||
163 | } | ||
164 | |||
165 | static int | ||
166 | __ext_tree_remove(struct rb_root *root, sector_t start, sector_t end) | ||
167 | { | ||
168 | struct pnfs_block_extent *be; | ||
169 | sector_t len1 = 0, len2 = 0; | ||
170 | sector_t orig_v_offset; | ||
171 | sector_t orig_len; | ||
172 | |||
173 | be = __ext_tree_search(root, start); | ||
174 | if (!be) | ||
175 | return 0; | ||
176 | if (be->be_f_offset >= end) | ||
177 | return 0; | ||
178 | |||
179 | orig_v_offset = be->be_v_offset; | ||
180 | orig_len = be->be_length; | ||
181 | |||
182 | if (start > be->be_f_offset) | ||
183 | len1 = start - be->be_f_offset; | ||
184 | if (ext_f_end(be) > end) | ||
185 | len2 = ext_f_end(be) - end; | ||
186 | |||
187 | if (len2 > 0) { | ||
188 | if (len1 > 0) { | ||
189 | struct pnfs_block_extent *new; | ||
190 | |||
191 | new = kzalloc(sizeof(*new), GFP_ATOMIC); | ||
192 | if (!new) | ||
193 | return -ENOMEM; | ||
194 | |||
195 | be->be_length = len1; | ||
196 | |||
197 | new->be_f_offset = end; | ||
198 | if (be->be_state != PNFS_BLOCK_NONE_DATA) { | ||
199 | new->be_v_offset = | ||
200 | orig_v_offset + orig_len - len2; | ||
201 | } | ||
202 | new->be_length = len2; | ||
203 | new->be_state = be->be_state; | ||
204 | new->be_tag = be->be_tag; | ||
205 | new->be_device = nfs4_get_deviceid(be->be_device); | ||
206 | |||
207 | __ext_tree_insert(root, new, true); | ||
208 | } else { | ||
209 | be->be_f_offset = end; | ||
210 | if (be->be_state != PNFS_BLOCK_NONE_DATA) { | ||
211 | be->be_v_offset = | ||
212 | orig_v_offset + orig_len - len2; | ||
213 | } | ||
214 | be->be_length = len2; | ||
215 | } | ||
216 | } else { | ||
217 | if (len1 > 0) { | ||
218 | be->be_length = len1; | ||
219 | be = ext_tree_next(be); | ||
220 | } | ||
221 | |||
222 | while (be && ext_f_end(be) <= end) { | ||
223 | struct pnfs_block_extent *next = ext_tree_next(be); | ||
224 | |||
225 | rb_erase(&be->be_node, root); | ||
226 | nfs4_put_deviceid_node(be->be_device); | ||
227 | kfree(be); | ||
228 | be = next; | ||
229 | } | ||
230 | |||
231 | if (be && be->be_f_offset < end) { | ||
232 | len1 = ext_f_end(be) - end; | ||
233 | be->be_f_offset = end; | ||
234 | if (be->be_state != PNFS_BLOCK_NONE_DATA) | ||
235 | be->be_v_offset += be->be_length - len1; | ||
236 | be->be_length = len1; | ||
237 | } | ||
238 | } | ||
239 | |||
240 | return 0; | ||
241 | } | ||
242 | |||
243 | int | ||
244 | ext_tree_insert(struct pnfs_block_layout *bl, struct pnfs_block_extent *new) | ||
245 | { | ||
246 | struct pnfs_block_extent *be; | ||
247 | struct rb_root *root; | ||
248 | int err = 0; | ||
249 | |||
250 | switch (new->be_state) { | ||
251 | case PNFS_BLOCK_READWRITE_DATA: | ||
252 | case PNFS_BLOCK_INVALID_DATA: | ||
253 | root = &bl->bl_ext_rw; | ||
254 | break; | ||
255 | case PNFS_BLOCK_READ_DATA: | ||
256 | case PNFS_BLOCK_NONE_DATA: | ||
257 | root = &bl->bl_ext_ro; | ||
258 | break; | ||
259 | default: | ||
260 | dprintk("invalid extent type\n"); | ||
261 | return -EINVAL; | ||
262 | } | ||
263 | |||
264 | spin_lock(&bl->bl_ext_lock); | ||
265 | retry: | ||
266 | be = __ext_tree_search(root, new->be_f_offset); | ||
267 | if (!be || be->be_f_offset >= ext_f_end(new)) { | ||
268 | __ext_tree_insert(root, new, true); | ||
269 | } else if (new->be_f_offset >= be->be_f_offset) { | ||
270 | if (ext_f_end(new) <= ext_f_end(be)) { | ||
271 | nfs4_put_deviceid_node(new->be_device); | ||
272 | kfree(new); | ||
273 | } else { | ||
274 | sector_t new_len = ext_f_end(new) - ext_f_end(be); | ||
275 | sector_t diff = new->be_length - new_len; | ||
276 | |||
277 | new->be_f_offset += diff; | ||
278 | new->be_v_offset += diff; | ||
279 | new->be_length = new_len; | ||
280 | goto retry; | ||
281 | } | ||
282 | } else if (ext_f_end(new) <= ext_f_end(be)) { | ||
283 | new->be_length = be->be_f_offset - new->be_f_offset; | ||
284 | __ext_tree_insert(root, new, true); | ||
285 | } else { | ||
286 | struct pnfs_block_extent *split; | ||
287 | sector_t new_len = ext_f_end(new) - ext_f_end(be); | ||
288 | sector_t diff = new->be_length - new_len; | ||
289 | |||
290 | split = kmemdup(new, sizeof(*new), GFP_ATOMIC); | ||
291 | if (!split) { | ||
292 | err = -EINVAL; | ||
293 | goto out; | ||
294 | } | ||
295 | |||
296 | split->be_length = be->be_f_offset - split->be_f_offset; | ||
297 | split->be_device = nfs4_get_deviceid(new->be_device); | ||
298 | __ext_tree_insert(root, split, true); | ||
299 | |||
300 | new->be_f_offset += diff; | ||
301 | new->be_v_offset += diff; | ||
302 | new->be_length = new_len; | ||
303 | goto retry; | ||
304 | } | ||
305 | out: | ||
306 | spin_unlock(&bl->bl_ext_lock); | ||
307 | return err; | ||
308 | } | ||
309 | |||
310 | static bool | ||
311 | __ext_tree_lookup(struct rb_root *root, sector_t isect, | ||
312 | struct pnfs_block_extent *ret) | ||
313 | { | ||
314 | struct rb_node *node; | ||
315 | struct pnfs_block_extent *be; | ||
316 | |||
317 | node = root->rb_node; | ||
318 | while (node) { | ||
319 | be = ext_node(node); | ||
320 | if (isect < be->be_f_offset) | ||
321 | node = node->rb_left; | ||
322 | else if (isect >= ext_f_end(be)) | ||
323 | node = node->rb_right; | ||
324 | else { | ||
325 | *ret = *be; | ||
326 | return true; | ||
327 | } | ||
328 | } | ||
329 | |||
330 | return false; | ||
331 | } | ||
332 | |||
333 | bool | ||
334 | ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, | ||
335 | struct pnfs_block_extent *ret, bool rw) | ||
336 | { | ||
337 | bool found = false; | ||
338 | |||
339 | spin_lock(&bl->bl_ext_lock); | ||
340 | if (!rw) | ||
341 | found = __ext_tree_lookup(&bl->bl_ext_ro, isect, ret); | ||
342 | if (!found) | ||
343 | found = __ext_tree_lookup(&bl->bl_ext_rw, isect, ret); | ||
344 | spin_unlock(&bl->bl_ext_lock); | ||
345 | |||
346 | return found; | ||
347 | } | ||
348 | |||
349 | int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, | ||
350 | sector_t start, sector_t end) | ||
351 | { | ||
352 | int err, err2; | ||
353 | |||
354 | spin_lock(&bl->bl_ext_lock); | ||
355 | err = __ext_tree_remove(&bl->bl_ext_ro, start, end); | ||
356 | if (rw) { | ||
357 | err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end); | ||
358 | if (!err) | ||
359 | err = err2; | ||
360 | } | ||
361 | spin_unlock(&bl->bl_ext_lock); | ||
362 | |||
363 | return err; | ||
364 | } | ||
365 | |||
366 | static int | ||
367 | ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be, | ||
368 | sector_t split) | ||
369 | { | ||
370 | struct pnfs_block_extent *new; | ||
371 | sector_t orig_len = be->be_length; | ||
372 | |||
373 | new = kzalloc(sizeof(*new), GFP_ATOMIC); | ||
374 | if (!new) | ||
375 | return -ENOMEM; | ||
376 | |||
377 | be->be_length = split - be->be_f_offset; | ||
378 | |||
379 | new->be_f_offset = split; | ||
380 | if (be->be_state != PNFS_BLOCK_NONE_DATA) | ||
381 | new->be_v_offset = be->be_v_offset + be->be_length; | ||
382 | new->be_length = orig_len - be->be_length; | ||
383 | new->be_state = be->be_state; | ||
384 | new->be_tag = be->be_tag; | ||
385 | new->be_device = nfs4_get_deviceid(be->be_device); | ||
386 | |||
387 | __ext_tree_insert(root, new, false); | ||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | int | ||
392 | ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, | ||
393 | sector_t len) | ||
394 | { | ||
395 | struct rb_root *root = &bl->bl_ext_rw; | ||
396 | sector_t end = start + len; | ||
397 | struct pnfs_block_extent *be; | ||
398 | int err = 0; | ||
399 | |||
400 | spin_lock(&bl->bl_ext_lock); | ||
401 | /* | ||
402 | * First remove all COW extents or holes from written to range. | ||
403 | */ | ||
404 | err = __ext_tree_remove(&bl->bl_ext_ro, start, end); | ||
405 | if (err) | ||
406 | goto out; | ||
407 | |||
408 | /* | ||
409 | * Then mark all invalid extents in the range as written to. | ||
410 | */ | ||
411 | for (be = __ext_tree_search(root, start); be; be = ext_tree_next(be)) { | ||
412 | if (be->be_f_offset >= end) | ||
413 | break; | ||
414 | |||
415 | if (be->be_state != PNFS_BLOCK_INVALID_DATA || be->be_tag) | ||
416 | continue; | ||
417 | |||
418 | if (be->be_f_offset < start) { | ||
419 | struct pnfs_block_extent *left = ext_tree_prev(be); | ||
420 | |||
421 | if (left && ext_can_merge(left, be)) { | ||
422 | sector_t diff = start - be->be_f_offset; | ||
423 | |||
424 | left->be_length += diff; | ||
425 | |||
426 | be->be_f_offset += diff; | ||
427 | be->be_v_offset += diff; | ||
428 | be->be_length -= diff; | ||
429 | } else { | ||
430 | err = ext_tree_split(root, be, start); | ||
431 | if (err) | ||
432 | goto out; | ||
433 | } | ||
434 | } | ||
435 | |||
436 | if (ext_f_end(be) > end) { | ||
437 | struct pnfs_block_extent *right = ext_tree_next(be); | ||
438 | |||
439 | if (right && ext_can_merge(be, right)) { | ||
440 | sector_t diff = end - be->be_f_offset; | ||
441 | |||
442 | be->be_length -= diff; | ||
443 | |||
444 | right->be_f_offset -= diff; | ||
445 | right->be_v_offset -= diff; | ||
446 | right->be_length += diff; | ||
447 | } else { | ||
448 | err = ext_tree_split(root, be, end); | ||
449 | if (err) | ||
450 | goto out; | ||
451 | } | ||
452 | } | ||
453 | |||
454 | if (be->be_f_offset >= start && ext_f_end(be) <= end) { | ||
455 | be->be_tag = EXTENT_WRITTEN; | ||
456 | be = ext_try_to_merge_left(root, be); | ||
457 | be = ext_try_to_merge_right(root, be); | ||
458 | } | ||
459 | } | ||
460 | out: | ||
461 | spin_unlock(&bl->bl_ext_lock); | ||
462 | return err; | ||
463 | } | ||
464 | |||
465 | static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg, | ||
466 | size_t buffer_size) | ||
467 | { | ||
468 | if (arg->layoutupdate_pages != &arg->layoutupdate_page) { | ||
469 | int nr_pages = DIV_ROUND_UP(buffer_size, PAGE_SIZE), i; | ||
470 | |||
471 | for (i = 0; i < nr_pages; i++) | ||
472 | put_page(arg->layoutupdate_pages[i]); | ||
473 | kfree(arg->layoutupdate_pages); | ||
474 | } else { | ||
475 | put_page(arg->layoutupdate_page); | ||
476 | } | ||
477 | } | ||
478 | |||
479 | static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, | ||
480 | size_t buffer_size, size_t *count) | ||
481 | { | ||
482 | struct pnfs_block_extent *be; | ||
483 | int ret = 0; | ||
484 | |||
485 | spin_lock(&bl->bl_ext_lock); | ||
486 | for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) { | ||
487 | if (be->be_state != PNFS_BLOCK_INVALID_DATA || | ||
488 | be->be_tag != EXTENT_WRITTEN) | ||
489 | continue; | ||
490 | |||
491 | (*count)++; | ||
492 | if (*count * BL_EXTENT_SIZE > buffer_size) { | ||
493 | /* keep counting.. */ | ||
494 | ret = -ENOSPC; | ||
495 | continue; | ||
496 | } | ||
497 | |||
498 | p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data, | ||
499 | NFS4_DEVICEID4_SIZE); | ||
500 | p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT); | ||
501 | p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT); | ||
502 | p = xdr_encode_hyper(p, 0LL); | ||
503 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); | ||
504 | |||
505 | be->be_tag = EXTENT_COMMITTING; | ||
506 | } | ||
507 | spin_unlock(&bl->bl_ext_lock); | ||
508 | |||
509 | return ret; | ||
510 | } | ||
511 | |||
512 | int | ||
513 | ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) | ||
514 | { | ||
515 | struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout); | ||
516 | size_t count = 0, buffer_size = PAGE_SIZE; | ||
517 | __be32 *start_p; | ||
518 | int ret; | ||
519 | |||
520 | dprintk("%s enter\n", __func__); | ||
521 | |||
522 | arg->layoutupdate_page = alloc_page(GFP_NOFS); | ||
523 | if (!arg->layoutupdate_page) | ||
524 | return -ENOMEM; | ||
525 | start_p = page_address(arg->layoutupdate_page); | ||
526 | arg->layoutupdate_pages = &arg->layoutupdate_page; | ||
527 | |||
528 | retry: | ||
529 | ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count); | ||
530 | if (unlikely(ret)) { | ||
531 | ext_tree_free_commitdata(arg, buffer_size); | ||
532 | |||
533 | buffer_size = sizeof(__be32) + BL_EXTENT_SIZE * count; | ||
534 | count = 0; | ||
535 | |||
536 | arg->layoutupdate_pages = | ||
537 | kcalloc(DIV_ROUND_UP(buffer_size, PAGE_SIZE), | ||
538 | sizeof(struct page *), GFP_NOFS); | ||
539 | if (!arg->layoutupdate_pages) | ||
540 | return -ENOMEM; | ||
541 | |||
542 | start_p = __vmalloc(buffer_size, GFP_NOFS, PAGE_KERNEL); | ||
543 | if (!start_p) { | ||
544 | kfree(arg->layoutupdate_pages); | ||
545 | return -ENOMEM; | ||
546 | } | ||
547 | |||
548 | goto retry; | ||
549 | } | ||
550 | |||
551 | *start_p = cpu_to_be32(count); | ||
552 | arg->layoutupdate_len = sizeof(__be32) + BL_EXTENT_SIZE * count; | ||
553 | |||
554 | if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) { | ||
555 | __be32 *p = start_p; | ||
556 | int i = 0; | ||
557 | |||
558 | for (p = start_p; | ||
559 | p < start_p + arg->layoutupdate_len; | ||
560 | p += PAGE_SIZE) { | ||
561 | arg->layoutupdate_pages[i++] = vmalloc_to_page(p); | ||
562 | } | ||
563 | } | ||
564 | |||
565 | dprintk("%s found %zu ranges\n", __func__, count); | ||
566 | return 0; | ||
567 | } | ||
568 | |||
569 | void | ||
570 | ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status) | ||
571 | { | ||
572 | struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout); | ||
573 | struct rb_root *root = &bl->bl_ext_rw; | ||
574 | struct pnfs_block_extent *be; | ||
575 | |||
576 | dprintk("%s status %d\n", __func__, status); | ||
577 | |||
578 | ext_tree_free_commitdata(arg, arg->layoutupdate_len); | ||
579 | |||
580 | spin_lock(&bl->bl_ext_lock); | ||
581 | for (be = ext_tree_first(root); be; be = ext_tree_next(be)) { | ||
582 | if (be->be_state != PNFS_BLOCK_INVALID_DATA || | ||
583 | be->be_tag != EXTENT_COMMITTING) | ||
584 | continue; | ||
585 | |||
586 | if (status) { | ||
587 | /* | ||
588 | * Mark as written and try again. | ||
589 | * | ||
590 | * XXX: some real error handling here wouldn't hurt.. | ||
591 | */ | ||
592 | be->be_tag = EXTENT_WRITTEN; | ||
593 | } else { | ||
594 | be->be_state = PNFS_BLOCK_READWRITE_DATA; | ||
595 | be->be_tag = 0; | ||
596 | } | ||
597 | |||
598 | be = ext_try_to_merge_left(root, be); | ||
599 | be = ext_try_to_merge_right(root, be); | ||
600 | } | ||
601 | spin_unlock(&bl->bl_ext_lock); | ||
602 | } | ||
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c deleted file mode 100644 index 4d0161442565..000000000000 --- a/fs/nfs/blocklayout/extents.c +++ /dev/null | |||
@@ -1,908 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayout.h | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include "blocklayout.h" | ||
34 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
35 | |||
36 | /* Bit numbers */ | ||
37 | #define EXTENT_INITIALIZED 0 | ||
38 | #define EXTENT_WRITTEN 1 | ||
39 | #define EXTENT_IN_COMMIT 2 | ||
40 | #define INTERNAL_EXISTS MY_MAX_TAGS | ||
41 | #define INTERNAL_MASK ((1 << INTERNAL_EXISTS) - 1) | ||
42 | |||
43 | /* Returns largest t<=s s.t. t%base==0 */ | ||
44 | static inline sector_t normalize(sector_t s, int base) | ||
45 | { | ||
46 | sector_t tmp = s; /* Since do_div modifies its argument */ | ||
47 | return s - sector_div(tmp, base); | ||
48 | } | ||
49 | |||
50 | static inline sector_t normalize_up(sector_t s, int base) | ||
51 | { | ||
52 | return normalize(s + base - 1, base); | ||
53 | } | ||
54 | |||
55 | /* Complete stub using list while determine API wanted */ | ||
56 | |||
57 | /* Returns tags, or negative */ | ||
58 | static int32_t _find_entry(struct my_tree *tree, u64 s) | ||
59 | { | ||
60 | struct pnfs_inval_tracking *pos; | ||
61 | |||
62 | dprintk("%s(%llu) enter\n", __func__, s); | ||
63 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
64 | if (pos->it_sector > s) | ||
65 | continue; | ||
66 | else if (pos->it_sector == s) | ||
67 | return pos->it_tags & INTERNAL_MASK; | ||
68 | else | ||
69 | break; | ||
70 | } | ||
71 | return -ENOENT; | ||
72 | } | ||
73 | |||
74 | static inline | ||
75 | int _has_tag(struct my_tree *tree, u64 s, int32_t tag) | ||
76 | { | ||
77 | int32_t tags; | ||
78 | |||
79 | dprintk("%s(%llu, %i) enter\n", __func__, s, tag); | ||
80 | s = normalize(s, tree->mtt_step_size); | ||
81 | tags = _find_entry(tree, s); | ||
82 | if ((tags < 0) || !(tags & (1 << tag))) | ||
83 | return 0; | ||
84 | else | ||
85 | return 1; | ||
86 | } | ||
87 | |||
88 | /* Creates entry with tag, or if entry already exists, unions tag to it. | ||
89 | * If storage is not NULL, newly created entry will use it. | ||
90 | * Returns number of entries added, or negative on error. | ||
91 | */ | ||
92 | static int _add_entry(struct my_tree *tree, u64 s, int32_t tag, | ||
93 | struct pnfs_inval_tracking *storage) | ||
94 | { | ||
95 | int found = 0; | ||
96 | struct pnfs_inval_tracking *pos; | ||
97 | |||
98 | dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage); | ||
99 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
100 | if (pos->it_sector > s) | ||
101 | continue; | ||
102 | else if (pos->it_sector == s) { | ||
103 | found = 1; | ||
104 | break; | ||
105 | } else | ||
106 | break; | ||
107 | } | ||
108 | if (found) { | ||
109 | pos->it_tags |= (1 << tag); | ||
110 | return 0; | ||
111 | } else { | ||
112 | struct pnfs_inval_tracking *new; | ||
113 | new = storage; | ||
114 | new->it_sector = s; | ||
115 | new->it_tags = (1 << tag); | ||
116 | list_add(&new->it_link, &pos->it_link); | ||
117 | return 1; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | /* XXXX Really want option to not create */ | ||
122 | /* Over range, unions tag with existing entries, else creates entry with tag */ | ||
123 | static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length) | ||
124 | { | ||
125 | u64 i; | ||
126 | |||
127 | dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length); | ||
128 | for (i = normalize(s, tree->mtt_step_size); i < s + length; | ||
129 | i += tree->mtt_step_size) | ||
130 | if (_add_entry(tree, i, tag, NULL)) | ||
131 | return -ENOMEM; | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* Ensure that future operations on given range of tree will not malloc */ | ||
136 | static int _preload_range(struct pnfs_inval_markings *marks, | ||
137 | u64 offset, u64 length) | ||
138 | { | ||
139 | u64 start, end, s; | ||
140 | int count, i, used = 0, status = -ENOMEM; | ||
141 | struct pnfs_inval_tracking **storage; | ||
142 | struct my_tree *tree = &marks->im_tree; | ||
143 | |||
144 | dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); | ||
145 | start = normalize(offset, tree->mtt_step_size); | ||
146 | end = normalize_up(offset + length, tree->mtt_step_size); | ||
147 | count = (int)(end - start) / (int)tree->mtt_step_size; | ||
148 | |||
149 | /* Pre-malloc what memory we might need */ | ||
150 | storage = kcalloc(count, sizeof(*storage), GFP_NOFS); | ||
151 | if (!storage) | ||
152 | return -ENOMEM; | ||
153 | for (i = 0; i < count; i++) { | ||
154 | storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking), | ||
155 | GFP_NOFS); | ||
156 | if (!storage[i]) | ||
157 | goto out_cleanup; | ||
158 | } | ||
159 | |||
160 | spin_lock_bh(&marks->im_lock); | ||
161 | for (s = start; s < end; s += tree->mtt_step_size) | ||
162 | used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); | ||
163 | spin_unlock_bh(&marks->im_lock); | ||
164 | |||
165 | status = 0; | ||
166 | |||
167 | out_cleanup: | ||
168 | for (i = used; i < count; i++) { | ||
169 | if (!storage[i]) | ||
170 | break; | ||
171 | kfree(storage[i]); | ||
172 | } | ||
173 | kfree(storage); | ||
174 | return status; | ||
175 | } | ||
176 | |||
177 | /* We are relying on page lock to serialize this */ | ||
178 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) | ||
179 | { | ||
180 | int rv; | ||
181 | |||
182 | spin_lock_bh(&marks->im_lock); | ||
183 | rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); | ||
184 | spin_unlock_bh(&marks->im_lock); | ||
185 | return rv; | ||
186 | } | ||
187 | |||
188 | /* Assume start, end already sector aligned */ | ||
189 | static int | ||
190 | _range_has_tag(struct my_tree *tree, u64 start, u64 end, int32_t tag) | ||
191 | { | ||
192 | struct pnfs_inval_tracking *pos; | ||
193 | u64 expect = 0; | ||
194 | |||
195 | dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag); | ||
196 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
197 | if (pos->it_sector >= end) | ||
198 | continue; | ||
199 | if (!expect) { | ||
200 | if ((pos->it_sector == end - tree->mtt_step_size) && | ||
201 | (pos->it_tags & (1 << tag))) { | ||
202 | expect = pos->it_sector - tree->mtt_step_size; | ||
203 | if (pos->it_sector < tree->mtt_step_size || expect < start) | ||
204 | return 1; | ||
205 | continue; | ||
206 | } else { | ||
207 | return 0; | ||
208 | } | ||
209 | } | ||
210 | if (pos->it_sector != expect || !(pos->it_tags & (1 << tag))) | ||
211 | return 0; | ||
212 | expect -= tree->mtt_step_size; | ||
213 | if (expect < start) | ||
214 | return 1; | ||
215 | } | ||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static int is_range_written(struct pnfs_inval_markings *marks, | ||
220 | sector_t start, sector_t end) | ||
221 | { | ||
222 | int rv; | ||
223 | |||
224 | spin_lock_bh(&marks->im_lock); | ||
225 | rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); | ||
226 | spin_unlock_bh(&marks->im_lock); | ||
227 | return rv; | ||
228 | } | ||
229 | |||
230 | /* Marks sectors in [offest, offset_length) as having been initialized. | ||
231 | * All lengths are step-aligned, where step is min(pagesize, blocksize). | ||
232 | * Currently assumes offset is page-aligned | ||
233 | */ | ||
234 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | ||
235 | sector_t offset, sector_t length) | ||
236 | { | ||
237 | sector_t start, end; | ||
238 | |||
239 | dprintk("%s(offset=%llu,len=%llu) enter\n", | ||
240 | __func__, (u64)offset, (u64)length); | ||
241 | |||
242 | start = normalize(offset, marks->im_block_size); | ||
243 | end = normalize_up(offset + length, marks->im_block_size); | ||
244 | if (_preload_range(marks, start, end - start)) | ||
245 | goto outerr; | ||
246 | |||
247 | spin_lock_bh(&marks->im_lock); | ||
248 | if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) | ||
249 | goto out_unlock; | ||
250 | spin_unlock_bh(&marks->im_lock); | ||
251 | |||
252 | return 0; | ||
253 | |||
254 | out_unlock: | ||
255 | spin_unlock_bh(&marks->im_lock); | ||
256 | outerr: | ||
257 | return -ENOMEM; | ||
258 | } | ||
259 | |||
260 | /* Marks sectors in [offest, offset+length) as having been written to disk. | ||
261 | * All lengths should be block aligned. | ||
262 | */ | ||
263 | static int mark_written_sectors(struct pnfs_inval_markings *marks, | ||
264 | sector_t offset, sector_t length) | ||
265 | { | ||
266 | int status; | ||
267 | |||
268 | dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, | ||
269 | (u64)offset, (u64)length); | ||
270 | spin_lock_bh(&marks->im_lock); | ||
271 | status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); | ||
272 | spin_unlock_bh(&marks->im_lock); | ||
273 | return status; | ||
274 | } | ||
275 | |||
276 | static void print_short_extent(struct pnfs_block_short_extent *be) | ||
277 | { | ||
278 | dprintk("PRINT SHORT EXTENT extent %p\n", be); | ||
279 | if (be) { | ||
280 | dprintk(" be_f_offset %llu\n", (u64)be->bse_f_offset); | ||
281 | dprintk(" be_length %llu\n", (u64)be->bse_length); | ||
282 | } | ||
283 | } | ||
284 | |||
285 | static void print_clist(struct list_head *list, unsigned int count) | ||
286 | { | ||
287 | struct pnfs_block_short_extent *be; | ||
288 | unsigned int i = 0; | ||
289 | |||
290 | ifdebug(FACILITY) { | ||
291 | printk(KERN_DEBUG "****************\n"); | ||
292 | printk(KERN_DEBUG "Extent list looks like:\n"); | ||
293 | list_for_each_entry(be, list, bse_node) { | ||
294 | i++; | ||
295 | print_short_extent(be); | ||
296 | } | ||
297 | if (i != count) | ||
298 | printk(KERN_DEBUG "\n\nExpected %u entries\n\n\n", count); | ||
299 | printk(KERN_DEBUG "****************\n"); | ||
300 | } | ||
301 | } | ||
302 | |||
303 | /* Note: In theory, we should do more checking that devid's match between | ||
304 | * old and new, but if they don't, the lists are too corrupt to salvage anyway. | ||
305 | */ | ||
306 | /* Note this is very similar to bl_add_merge_extent */ | ||
307 | static void add_to_commitlist(struct pnfs_block_layout *bl, | ||
308 | struct pnfs_block_short_extent *new) | ||
309 | { | ||
310 | struct list_head *clist = &bl->bl_commit; | ||
311 | struct pnfs_block_short_extent *old, *save; | ||
312 | sector_t end = new->bse_f_offset + new->bse_length; | ||
313 | |||
314 | dprintk("%s enter\n", __func__); | ||
315 | print_short_extent(new); | ||
316 | print_clist(clist, bl->bl_count); | ||
317 | bl->bl_count++; | ||
318 | /* Scan for proper place to insert, extending new to the left | ||
319 | * as much as possible. | ||
320 | */ | ||
321 | list_for_each_entry_safe(old, save, clist, bse_node) { | ||
322 | if (new->bse_f_offset < old->bse_f_offset) | ||
323 | break; | ||
324 | if (end <= old->bse_f_offset + old->bse_length) { | ||
325 | /* Range is already in list */ | ||
326 | bl->bl_count--; | ||
327 | kfree(new); | ||
328 | return; | ||
329 | } else if (new->bse_f_offset <= | ||
330 | old->bse_f_offset + old->bse_length) { | ||
331 | /* new overlaps or abuts existing be */ | ||
332 | if (new->bse_mdev == old->bse_mdev) { | ||
333 | /* extend new to fully replace old */ | ||
334 | new->bse_length += new->bse_f_offset - | ||
335 | old->bse_f_offset; | ||
336 | new->bse_f_offset = old->bse_f_offset; | ||
337 | list_del(&old->bse_node); | ||
338 | bl->bl_count--; | ||
339 | kfree(old); | ||
340 | } | ||
341 | } | ||
342 | } | ||
343 | /* Note that if we never hit the above break, old will not point to a | ||
344 | * valid extent. However, in that case &old->bse_node==list. | ||
345 | */ | ||
346 | list_add_tail(&new->bse_node, &old->bse_node); | ||
347 | /* Scan forward for overlaps. If we find any, extend new and | ||
348 | * remove the overlapped extent. | ||
349 | */ | ||
350 | old = list_prepare_entry(new, clist, bse_node); | ||
351 | list_for_each_entry_safe_continue(old, save, clist, bse_node) { | ||
352 | if (end < old->bse_f_offset) | ||
353 | break; | ||
354 | /* new overlaps or abuts old */ | ||
355 | if (new->bse_mdev == old->bse_mdev) { | ||
356 | if (end < old->bse_f_offset + old->bse_length) { | ||
357 | /* extend new to fully cover old */ | ||
358 | end = old->bse_f_offset + old->bse_length; | ||
359 | new->bse_length = end - new->bse_f_offset; | ||
360 | } | ||
361 | list_del(&old->bse_node); | ||
362 | bl->bl_count--; | ||
363 | kfree(old); | ||
364 | } | ||
365 | } | ||
366 | dprintk("%s: after merging\n", __func__); | ||
367 | print_clist(clist, bl->bl_count); | ||
368 | } | ||
369 | |||
370 | /* Note the range described by offset, length is guaranteed to be contained | ||
371 | * within be. | ||
372 | * new will be freed, either by this function or add_to_commitlist if they | ||
373 | * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist. | ||
374 | */ | ||
375 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
376 | sector_t offset, sector_t length, | ||
377 | struct pnfs_block_short_extent *new) | ||
378 | { | ||
379 | sector_t new_end, end = offset + length; | ||
380 | struct pnfs_block_layout *bl = container_of(be->be_inval, | ||
381 | struct pnfs_block_layout, | ||
382 | bl_inval); | ||
383 | |||
384 | mark_written_sectors(be->be_inval, offset, length); | ||
385 | /* We want to add the range to commit list, but it must be | ||
386 | * block-normalized, and verified that the normalized range has | ||
387 | * been entirely written to disk. | ||
388 | */ | ||
389 | new->bse_f_offset = offset; | ||
390 | offset = normalize(offset, bl->bl_blocksize); | ||
391 | if (offset < new->bse_f_offset) { | ||
392 | if (is_range_written(be->be_inval, offset, new->bse_f_offset)) | ||
393 | new->bse_f_offset = offset; | ||
394 | else | ||
395 | new->bse_f_offset = offset + bl->bl_blocksize; | ||
396 | } | ||
397 | new_end = normalize_up(end, bl->bl_blocksize); | ||
398 | if (end < new_end) { | ||
399 | if (is_range_written(be->be_inval, end, new_end)) | ||
400 | end = new_end; | ||
401 | else | ||
402 | end = new_end - bl->bl_blocksize; | ||
403 | } | ||
404 | if (end <= new->bse_f_offset) { | ||
405 | kfree(new); | ||
406 | return 0; | ||
407 | } | ||
408 | new->bse_length = end - new->bse_f_offset; | ||
409 | new->bse_devid = be->be_devid; | ||
410 | new->bse_mdev = be->be_mdev; | ||
411 | |||
412 | spin_lock(&bl->bl_ext_lock); | ||
413 | add_to_commitlist(bl, new); | ||
414 | spin_unlock(&bl->bl_ext_lock); | ||
415 | return 0; | ||
416 | } | ||
417 | |||
418 | static void print_bl_extent(struct pnfs_block_extent *be) | ||
419 | { | ||
420 | dprintk("PRINT EXTENT extent %p\n", be); | ||
421 | if (be) { | ||
422 | dprintk(" be_f_offset %llu\n", (u64)be->be_f_offset); | ||
423 | dprintk(" be_length %llu\n", (u64)be->be_length); | ||
424 | dprintk(" be_v_offset %llu\n", (u64)be->be_v_offset); | ||
425 | dprintk(" be_state %d\n", be->be_state); | ||
426 | } | ||
427 | } | ||
428 | |||
429 | static void | ||
430 | destroy_extent(struct kref *kref) | ||
431 | { | ||
432 | struct pnfs_block_extent *be; | ||
433 | |||
434 | be = container_of(kref, struct pnfs_block_extent, be_refcnt); | ||
435 | dprintk("%s be=%p\n", __func__, be); | ||
436 | kfree(be); | ||
437 | } | ||
438 | |||
439 | void | ||
440 | bl_put_extent(struct pnfs_block_extent *be) | ||
441 | { | ||
442 | if (be) { | ||
443 | dprintk("%s enter %p (%i)\n", __func__, be, | ||
444 | atomic_read(&be->be_refcnt.refcount)); | ||
445 | kref_put(&be->be_refcnt, destroy_extent); | ||
446 | } | ||
447 | } | ||
448 | |||
449 | struct pnfs_block_extent *bl_alloc_extent(void) | ||
450 | { | ||
451 | struct pnfs_block_extent *be; | ||
452 | |||
453 | be = kmalloc(sizeof(struct pnfs_block_extent), GFP_NOFS); | ||
454 | if (!be) | ||
455 | return NULL; | ||
456 | INIT_LIST_HEAD(&be->be_node); | ||
457 | kref_init(&be->be_refcnt); | ||
458 | be->be_inval = NULL; | ||
459 | return be; | ||
460 | } | ||
461 | |||
462 | static void print_elist(struct list_head *list) | ||
463 | { | ||
464 | struct pnfs_block_extent *be; | ||
465 | dprintk("****************\n"); | ||
466 | dprintk("Extent list looks like:\n"); | ||
467 | list_for_each_entry(be, list, be_node) { | ||
468 | print_bl_extent(be); | ||
469 | } | ||
470 | dprintk("****************\n"); | ||
471 | } | ||
472 | |||
473 | static inline int | ||
474 | extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new) | ||
475 | { | ||
476 | /* Note this assumes new->be_f_offset >= old->be_f_offset */ | ||
477 | return (new->be_state == old->be_state) && | ||
478 | ((new->be_state == PNFS_BLOCK_NONE_DATA) || | ||
479 | ((new->be_v_offset - old->be_v_offset == | ||
480 | new->be_f_offset - old->be_f_offset) && | ||
481 | new->be_mdev == old->be_mdev)); | ||
482 | } | ||
483 | |||
484 | /* Adds new to appropriate list in bl, modifying new and removing existing | ||
485 | * extents as appropriate to deal with overlaps. | ||
486 | * | ||
487 | * See bl_find_get_extent for list constraints. | ||
488 | * | ||
489 | * Refcount on new is already set. If end up not using it, or error out, | ||
490 | * need to put the reference. | ||
491 | * | ||
492 | * bl->bl_ext_lock is held by caller. | ||
493 | */ | ||
494 | int | ||
495 | bl_add_merge_extent(struct pnfs_block_layout *bl, | ||
496 | struct pnfs_block_extent *new) | ||
497 | { | ||
498 | struct pnfs_block_extent *be, *tmp; | ||
499 | sector_t end = new->be_f_offset + new->be_length; | ||
500 | struct list_head *list; | ||
501 | |||
502 | dprintk("%s enter with be=%p\n", __func__, new); | ||
503 | print_bl_extent(new); | ||
504 | list = &bl->bl_extents[bl_choose_list(new->be_state)]; | ||
505 | print_elist(list); | ||
506 | |||
507 | /* Scan for proper place to insert, extending new to the left | ||
508 | * as much as possible. | ||
509 | */ | ||
510 | list_for_each_entry_safe_reverse(be, tmp, list, be_node) { | ||
511 | if (new->be_f_offset >= be->be_f_offset + be->be_length) | ||
512 | break; | ||
513 | if (new->be_f_offset >= be->be_f_offset) { | ||
514 | if (end <= be->be_f_offset + be->be_length) { | ||
515 | /* new is a subset of existing be*/ | ||
516 | if (extents_consistent(be, new)) { | ||
517 | dprintk("%s: new is subset, ignoring\n", | ||
518 | __func__); | ||
519 | bl_put_extent(new); | ||
520 | return 0; | ||
521 | } else { | ||
522 | goto out_err; | ||
523 | } | ||
524 | } else { | ||
525 | /* |<-- be -->| | ||
526 | * |<-- new -->| */ | ||
527 | if (extents_consistent(be, new)) { | ||
528 | /* extend new to fully replace be */ | ||
529 | new->be_length += new->be_f_offset - | ||
530 | be->be_f_offset; | ||
531 | new->be_f_offset = be->be_f_offset; | ||
532 | new->be_v_offset = be->be_v_offset; | ||
533 | dprintk("%s: removing %p\n", __func__, be); | ||
534 | list_del(&be->be_node); | ||
535 | bl_put_extent(be); | ||
536 | } else { | ||
537 | goto out_err; | ||
538 | } | ||
539 | } | ||
540 | } else if (end >= be->be_f_offset + be->be_length) { | ||
541 | /* new extent overlap existing be */ | ||
542 | if (extents_consistent(be, new)) { | ||
543 | /* extend new to fully replace be */ | ||
544 | dprintk("%s: removing %p\n", __func__, be); | ||
545 | list_del(&be->be_node); | ||
546 | bl_put_extent(be); | ||
547 | } else { | ||
548 | goto out_err; | ||
549 | } | ||
550 | } else if (end > be->be_f_offset) { | ||
551 | /* |<-- be -->| | ||
552 | *|<-- new -->| */ | ||
553 | if (extents_consistent(new, be)) { | ||
554 | /* extend new to fully replace be */ | ||
555 | new->be_length += be->be_f_offset + be->be_length - | ||
556 | new->be_f_offset - new->be_length; | ||
557 | dprintk("%s: removing %p\n", __func__, be); | ||
558 | list_del(&be->be_node); | ||
559 | bl_put_extent(be); | ||
560 | } else { | ||
561 | goto out_err; | ||
562 | } | ||
563 | } | ||
564 | } | ||
565 | /* Note that if we never hit the above break, be will not point to a | ||
566 | * valid extent. However, in that case &be->be_node==list. | ||
567 | */ | ||
568 | list_add(&new->be_node, &be->be_node); | ||
569 | dprintk("%s: inserting new\n", __func__); | ||
570 | print_elist(list); | ||
571 | /* FIXME - The per-list consistency checks have all been done, | ||
572 | * should now check cross-list consistency. | ||
573 | */ | ||
574 | return 0; | ||
575 | |||
576 | out_err: | ||
577 | bl_put_extent(new); | ||
578 | return -EIO; | ||
579 | } | ||
580 | |||
581 | /* Returns extent, or NULL. If a second READ extent exists, it is returned | ||
582 | * in cow_read, if given. | ||
583 | * | ||
584 | * The extents are kept in two seperate ordered lists, one for READ and NONE, | ||
585 | * one for READWRITE and INVALID. Within each list, we assume: | ||
586 | * 1. Extents are ordered by file offset. | ||
587 | * 2. For any given isect, there is at most one extents that matches. | ||
588 | */ | ||
589 | struct pnfs_block_extent * | ||
590 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | ||
591 | struct pnfs_block_extent **cow_read) | ||
592 | { | ||
593 | struct pnfs_block_extent *be, *cow, *ret; | ||
594 | int i; | ||
595 | |||
596 | dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | ||
597 | cow = ret = NULL; | ||
598 | spin_lock(&bl->bl_ext_lock); | ||
599 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
600 | list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | ||
601 | if (isect >= be->be_f_offset + be->be_length) | ||
602 | break; | ||
603 | if (isect >= be->be_f_offset) { | ||
604 | /* We have found an extent */ | ||
605 | dprintk("%s Get %p (%i)\n", __func__, be, | ||
606 | atomic_read(&be->be_refcnt.refcount)); | ||
607 | kref_get(&be->be_refcnt); | ||
608 | if (!ret) | ||
609 | ret = be; | ||
610 | else if (be->be_state != PNFS_BLOCK_READ_DATA) | ||
611 | bl_put_extent(be); | ||
612 | else | ||
613 | cow = be; | ||
614 | break; | ||
615 | } | ||
616 | } | ||
617 | if (ret && | ||
618 | (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA)) | ||
619 | break; | ||
620 | } | ||
621 | spin_unlock(&bl->bl_ext_lock); | ||
622 | if (cow_read) | ||
623 | *cow_read = cow; | ||
624 | print_bl_extent(ret); | ||
625 | return ret; | ||
626 | } | ||
627 | |||
628 | /* Similar to bl_find_get_extent, but called with lock held, and ignores cow */ | ||
629 | static struct pnfs_block_extent * | ||
630 | bl_find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect) | ||
631 | { | ||
632 | struct pnfs_block_extent *be, *ret = NULL; | ||
633 | int i; | ||
634 | |||
635 | dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | ||
636 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
637 | if (ret) | ||
638 | break; | ||
639 | list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | ||
640 | if (isect >= be->be_f_offset + be->be_length) | ||
641 | break; | ||
642 | if (isect >= be->be_f_offset) { | ||
643 | /* We have found an extent */ | ||
644 | dprintk("%s Get %p (%i)\n", __func__, be, | ||
645 | atomic_read(&be->be_refcnt.refcount)); | ||
646 | kref_get(&be->be_refcnt); | ||
647 | ret = be; | ||
648 | break; | ||
649 | } | ||
650 | } | ||
651 | } | ||
652 | print_bl_extent(ret); | ||
653 | return ret; | ||
654 | } | ||
655 | |||
656 | int | ||
657 | encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
658 | struct xdr_stream *xdr, | ||
659 | const struct nfs4_layoutcommit_args *arg) | ||
660 | { | ||
661 | struct pnfs_block_short_extent *lce, *save; | ||
662 | unsigned int count = 0; | ||
663 | __be32 *p, *xdr_start; | ||
664 | |||
665 | dprintk("%s enter\n", __func__); | ||
666 | /* BUG - creation of bl_commit is buggy - need to wait for | ||
667 | * entire block to be marked WRITTEN before it can be added. | ||
668 | */ | ||
669 | spin_lock(&bl->bl_ext_lock); | ||
670 | /* Want to adjust for possible truncate */ | ||
671 | /* We now want to adjust argument range */ | ||
672 | |||
673 | /* XDR encode the ranges found */ | ||
674 | xdr_start = xdr_reserve_space(xdr, 8); | ||
675 | if (!xdr_start) | ||
676 | goto out; | ||
677 | list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) { | ||
678 | p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data)); | ||
679 | if (!p) | ||
680 | break; | ||
681 | p = xdr_encode_opaque_fixed(p, lce->bse_devid.data, NFS4_DEVICEID4_SIZE); | ||
682 | p = xdr_encode_hyper(p, lce->bse_f_offset << SECTOR_SHIFT); | ||
683 | p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); | ||
684 | p = xdr_encode_hyper(p, 0LL); | ||
685 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); | ||
686 | list_move_tail(&lce->bse_node, &bl->bl_committing); | ||
687 | bl->bl_count--; | ||
688 | count++; | ||
689 | } | ||
690 | xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4); | ||
691 | xdr_start[1] = cpu_to_be32(count); | ||
692 | out: | ||
693 | spin_unlock(&bl->bl_ext_lock); | ||
694 | dprintk("%s found %i ranges\n", __func__, count); | ||
695 | return 0; | ||
696 | } | ||
697 | |||
698 | /* Helper function to set_to_rw that initialize a new extent */ | ||
699 | static void | ||
700 | _prep_new_extent(struct pnfs_block_extent *new, | ||
701 | struct pnfs_block_extent *orig, | ||
702 | sector_t offset, sector_t length, int state) | ||
703 | { | ||
704 | kref_init(&new->be_refcnt); | ||
705 | /* don't need to INIT_LIST_HEAD(&new->be_node) */ | ||
706 | memcpy(&new->be_devid, &orig->be_devid, sizeof(struct nfs4_deviceid)); | ||
707 | new->be_mdev = orig->be_mdev; | ||
708 | new->be_f_offset = offset; | ||
709 | new->be_length = length; | ||
710 | new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset; | ||
711 | new->be_state = state; | ||
712 | new->be_inval = orig->be_inval; | ||
713 | } | ||
714 | |||
715 | /* Tries to merge be with extent in front of it in list. | ||
716 | * Frees storage if not used. | ||
717 | */ | ||
718 | static struct pnfs_block_extent * | ||
719 | _front_merge(struct pnfs_block_extent *be, struct list_head *head, | ||
720 | struct pnfs_block_extent *storage) | ||
721 | { | ||
722 | struct pnfs_block_extent *prev; | ||
723 | |||
724 | if (!storage) | ||
725 | goto no_merge; | ||
726 | if (&be->be_node == head || be->be_node.prev == head) | ||
727 | goto no_merge; | ||
728 | prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node); | ||
729 | if ((prev->be_f_offset + prev->be_length != be->be_f_offset) || | ||
730 | !extents_consistent(prev, be)) | ||
731 | goto no_merge; | ||
732 | _prep_new_extent(storage, prev, prev->be_f_offset, | ||
733 | prev->be_length + be->be_length, prev->be_state); | ||
734 | list_replace(&prev->be_node, &storage->be_node); | ||
735 | bl_put_extent(prev); | ||
736 | list_del(&be->be_node); | ||
737 | bl_put_extent(be); | ||
738 | return storage; | ||
739 | |||
740 | no_merge: | ||
741 | kfree(storage); | ||
742 | return be; | ||
743 | } | ||
744 | |||
745 | static u64 | ||
746 | set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length) | ||
747 | { | ||
748 | u64 rv = offset + length; | ||
749 | struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old; | ||
750 | struct pnfs_block_extent *children[3]; | ||
751 | struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL; | ||
752 | int i = 0, j; | ||
753 | |||
754 | dprintk("%s(%llu, %llu)\n", __func__, offset, length); | ||
755 | /* Create storage for up to three new extents e1, e2, e3 */ | ||
756 | e1 = kmalloc(sizeof(*e1), GFP_ATOMIC); | ||
757 | e2 = kmalloc(sizeof(*e2), GFP_ATOMIC); | ||
758 | e3 = kmalloc(sizeof(*e3), GFP_ATOMIC); | ||
759 | /* BUG - we are ignoring any failure */ | ||
760 | if (!e1 || !e2 || !e3) | ||
761 | goto out_nosplit; | ||
762 | |||
763 | spin_lock(&bl->bl_ext_lock); | ||
764 | be = bl_find_get_extent_locked(bl, offset); | ||
765 | rv = be->be_f_offset + be->be_length; | ||
766 | if (be->be_state != PNFS_BLOCK_INVALID_DATA) { | ||
767 | spin_unlock(&bl->bl_ext_lock); | ||
768 | goto out_nosplit; | ||
769 | } | ||
770 | /* Add e* to children, bumping e*'s krefs */ | ||
771 | if (be->be_f_offset != offset) { | ||
772 | _prep_new_extent(e1, be, be->be_f_offset, | ||
773 | offset - be->be_f_offset, | ||
774 | PNFS_BLOCK_INVALID_DATA); | ||
775 | children[i++] = e1; | ||
776 | print_bl_extent(e1); | ||
777 | } else | ||
778 | merge1 = e1; | ||
779 | _prep_new_extent(e2, be, offset, | ||
780 | min(length, be->be_f_offset + be->be_length - offset), | ||
781 | PNFS_BLOCK_READWRITE_DATA); | ||
782 | children[i++] = e2; | ||
783 | print_bl_extent(e2); | ||
784 | if (offset + length < be->be_f_offset + be->be_length) { | ||
785 | _prep_new_extent(e3, be, e2->be_f_offset + e2->be_length, | ||
786 | be->be_f_offset + be->be_length - | ||
787 | offset - length, | ||
788 | PNFS_BLOCK_INVALID_DATA); | ||
789 | children[i++] = e3; | ||
790 | print_bl_extent(e3); | ||
791 | } else | ||
792 | merge2 = e3; | ||
793 | |||
794 | /* Remove be from list, and insert the e* */ | ||
795 | /* We don't get refs on e*, since this list is the base reference | ||
796 | * set when init'ed. | ||
797 | */ | ||
798 | if (i < 3) | ||
799 | children[i] = NULL; | ||
800 | new = children[0]; | ||
801 | list_replace(&be->be_node, &new->be_node); | ||
802 | bl_put_extent(be); | ||
803 | new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1); | ||
804 | for (j = 1; j < i; j++) { | ||
805 | old = new; | ||
806 | new = children[j]; | ||
807 | list_add(&new->be_node, &old->be_node); | ||
808 | } | ||
809 | if (merge2) { | ||
810 | /* This is a HACK, should just create a _back_merge function */ | ||
811 | new = list_entry(new->be_node.next, | ||
812 | struct pnfs_block_extent, be_node); | ||
813 | new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2); | ||
814 | } | ||
815 | spin_unlock(&bl->bl_ext_lock); | ||
816 | |||
817 | /* Since we removed the base reference above, be is now scheduled for | ||
818 | * destruction. | ||
819 | */ | ||
820 | bl_put_extent(be); | ||
821 | dprintk("%s returns %llu after split\n", __func__, rv); | ||
822 | return rv; | ||
823 | |||
824 | out_nosplit: | ||
825 | kfree(e1); | ||
826 | kfree(e2); | ||
827 | kfree(e3); | ||
828 | dprintk("%s returns %llu without splitting\n", __func__, rv); | ||
829 | return rv; | ||
830 | } | ||
831 | |||
832 | void | ||
833 | clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
834 | const struct nfs4_layoutcommit_args *arg, | ||
835 | int status) | ||
836 | { | ||
837 | struct pnfs_block_short_extent *lce, *save; | ||
838 | |||
839 | dprintk("%s status %d\n", __func__, status); | ||
840 | list_for_each_entry_safe(lce, save, &bl->bl_committing, bse_node) { | ||
841 | if (likely(!status)) { | ||
842 | u64 offset = lce->bse_f_offset; | ||
843 | u64 end = offset + lce->bse_length; | ||
844 | |||
845 | do { | ||
846 | offset = set_to_rw(bl, offset, end - offset); | ||
847 | } while (offset < end); | ||
848 | list_del(&lce->bse_node); | ||
849 | |||
850 | kfree(lce); | ||
851 | } else { | ||
852 | list_del(&lce->bse_node); | ||
853 | spin_lock(&bl->bl_ext_lock); | ||
854 | add_to_commitlist(bl, lce); | ||
855 | spin_unlock(&bl->bl_ext_lock); | ||
856 | } | ||
857 | } | ||
858 | } | ||
859 | |||
860 | int bl_push_one_short_extent(struct pnfs_inval_markings *marks) | ||
861 | { | ||
862 | struct pnfs_block_short_extent *new; | ||
863 | |||
864 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
865 | if (unlikely(!new)) | ||
866 | return -ENOMEM; | ||
867 | |||
868 | spin_lock_bh(&marks->im_lock); | ||
869 | list_add(&new->bse_node, &marks->im_extents); | ||
870 | spin_unlock_bh(&marks->im_lock); | ||
871 | |||
872 | return 0; | ||
873 | } | ||
874 | |||
875 | struct pnfs_block_short_extent * | ||
876 | bl_pop_one_short_extent(struct pnfs_inval_markings *marks) | ||
877 | { | ||
878 | struct pnfs_block_short_extent *rv = NULL; | ||
879 | |||
880 | spin_lock_bh(&marks->im_lock); | ||
881 | if (!list_empty(&marks->im_extents)) { | ||
882 | rv = list_entry((&marks->im_extents)->next, | ||
883 | struct pnfs_block_short_extent, bse_node); | ||
884 | list_del_init(&rv->bse_node); | ||
885 | } | ||
886 | spin_unlock_bh(&marks->im_lock); | ||
887 | |||
888 | return rv; | ||
889 | } | ||
890 | |||
891 | void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free) | ||
892 | { | ||
893 | struct pnfs_block_short_extent *se = NULL, *tmp; | ||
894 | |||
895 | if (num_to_free <= 0) | ||
896 | return; | ||
897 | |||
898 | spin_lock(&marks->im_lock); | ||
899 | list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) { | ||
900 | list_del(&se->bse_node); | ||
901 | kfree(se); | ||
902 | if (--num_to_free == 0) | ||
903 | break; | ||
904 | } | ||
905 | spin_unlock(&marks->im_lock); | ||
906 | |||
907 | BUG_ON(num_to_free > 0); | ||
908 | } | ||
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c new file mode 100644 index 000000000000..8d04bda2bd2e --- /dev/null +++ b/fs/nfs/blocklayout/rpc_pipefs.c | |||
@@ -0,0 +1,285 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006,2007 The Regents of the University of Michigan. | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * Andy Adamson <andros@citi.umich.edu> | ||
6 | * Fred Isaman <iisaman@umich.edu> | ||
7 | * | ||
8 | * permission is granted to use, copy, create derivative works and | ||
9 | * redistribute this software and such derivative works for any purpose, | ||
10 | * so long as the name of the university of michigan is not used in | ||
11 | * any advertising or publicity pertaining to the use or distribution | ||
12 | * of this software without specific, written prior authorization. if | ||
13 | * the above copyright notice or any other identification of the | ||
14 | * university of michigan is included in any copy of any portion of | ||
15 | * this software, then the disclaimer below must also be included. | ||
16 | * | ||
17 | * this software is provided as is, without representation from the | ||
18 | * university of michigan as to its fitness for any purpose, and without | ||
19 | * warranty by the university of michigan of any kind, either express | ||
20 | * or implied, including without limitation the implied warranties of | ||
21 | * merchantability and fitness for a particular purpose. the regents | ||
22 | * of the university of michigan shall not be liable for any damages, | ||
23 | * including special, indirect, incidental, or consequential damages, | ||
24 | * with respect to any claim arising out or in connection with the use | ||
25 | * of the software, even if it has been or is hereafter advised of the | ||
26 | * possibility of such damages. | ||
27 | */ | ||
28 | |||
29 | #include <linux/module.h> | ||
30 | #include <linux/genhd.h> | ||
31 | #include <linux/blkdev.h> | ||
32 | |||
33 | #include "blocklayout.h" | ||
34 | |||
35 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
36 | |||
37 | static void | ||
38 | nfs4_encode_simple(__be32 *p, struct pnfs_block_volume *b) | ||
39 | { | ||
40 | int i; | ||
41 | |||
42 | *p++ = cpu_to_be32(1); | ||
43 | *p++ = cpu_to_be32(b->type); | ||
44 | *p++ = cpu_to_be32(b->simple.nr_sigs); | ||
45 | for (i = 0; i < b->simple.nr_sigs; i++) { | ||
46 | p = xdr_encode_hyper(p, b->simple.sigs[i].offset); | ||
47 | p = xdr_encode_opaque(p, b->simple.sigs[i].sig, | ||
48 | b->simple.sigs[i].sig_len); | ||
49 | } | ||
50 | } | ||
51 | |||
52 | dev_t | ||
53 | bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, | ||
54 | gfp_t gfp_mask) | ||
55 | { | ||
56 | struct net *net = server->nfs_client->cl_net; | ||
57 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
58 | struct bl_dev_msg *reply = &nn->bl_mount_reply; | ||
59 | struct bl_pipe_msg bl_pipe_msg; | ||
60 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
61 | struct bl_msg_hdr *bl_msg; | ||
62 | DECLARE_WAITQUEUE(wq, current); | ||
63 | dev_t dev = 0; | ||
64 | int rc; | ||
65 | |||
66 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | ||
67 | |||
68 | bl_pipe_msg.bl_wq = &nn->bl_wq; | ||
69 | |||
70 | b->simple.len += 4; /* single volume */ | ||
71 | if (b->simple.len > PAGE_SIZE) | ||
72 | return -EIO; | ||
73 | |||
74 | memset(msg, 0, sizeof(*msg)); | ||
75 | msg->len = sizeof(*bl_msg) + b->simple.len; | ||
76 | msg->data = kzalloc(msg->len, gfp_mask); | ||
77 | if (!msg->data) | ||
78 | goto out; | ||
79 | |||
80 | bl_msg = msg->data; | ||
81 | bl_msg->type = BL_DEVICE_MOUNT, | ||
82 | bl_msg->totallen = b->simple.len; | ||
83 | nfs4_encode_simple(msg->data + sizeof(*bl_msg), b); | ||
84 | |||
85 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); | ||
86 | add_wait_queue(&nn->bl_wq, &wq); | ||
87 | rc = rpc_queue_upcall(nn->bl_device_pipe, msg); | ||
88 | if (rc < 0) { | ||
89 | remove_wait_queue(&nn->bl_wq, &wq); | ||
90 | goto out; | ||
91 | } | ||
92 | |||
93 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
94 | schedule(); | ||
95 | __set_current_state(TASK_RUNNING); | ||
96 | remove_wait_queue(&nn->bl_wq, &wq); | ||
97 | |||
98 | if (reply->status != BL_DEVICE_REQUEST_PROC) { | ||
99 | printk(KERN_WARNING "%s failed to decode device: %d\n", | ||
100 | __func__, reply->status); | ||
101 | goto out; | ||
102 | } | ||
103 | |||
104 | dev = MKDEV(reply->major, reply->minor); | ||
105 | out: | ||
106 | kfree(msg->data); | ||
107 | return dev; | ||
108 | } | ||
109 | |||
110 | static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, | ||
111 | size_t mlen) | ||
112 | { | ||
113 | struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, | ||
114 | nfs_net_id); | ||
115 | |||
116 | if (mlen != sizeof (struct bl_dev_msg)) | ||
117 | return -EINVAL; | ||
118 | |||
119 | if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0) | ||
120 | return -EFAULT; | ||
121 | |||
122 | wake_up(&nn->bl_wq); | ||
123 | |||
124 | return mlen; | ||
125 | } | ||
126 | |||
127 | static void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) | ||
128 | { | ||
129 | struct bl_pipe_msg *bl_pipe_msg = | ||
130 | container_of(msg, struct bl_pipe_msg, msg); | ||
131 | |||
132 | if (msg->errno >= 0) | ||
133 | return; | ||
134 | wake_up(bl_pipe_msg->bl_wq); | ||
135 | } | ||
136 | |||
137 | static const struct rpc_pipe_ops bl_upcall_ops = { | ||
138 | .upcall = rpc_pipe_generic_upcall, | ||
139 | .downcall = bl_pipe_downcall, | ||
140 | .destroy_msg = bl_pipe_destroy_msg, | ||
141 | }; | ||
142 | |||
143 | static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, | ||
144 | struct rpc_pipe *pipe) | ||
145 | { | ||
146 | struct dentry *dir, *dentry; | ||
147 | |||
148 | dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); | ||
149 | if (dir == NULL) | ||
150 | return ERR_PTR(-ENOENT); | ||
151 | dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); | ||
152 | dput(dir); | ||
153 | return dentry; | ||
154 | } | ||
155 | |||
156 | static void nfs4blocklayout_unregister_sb(struct super_block *sb, | ||
157 | struct rpc_pipe *pipe) | ||
158 | { | ||
159 | if (pipe->dentry) | ||
160 | rpc_unlink(pipe->dentry); | ||
161 | } | ||
162 | |||
163 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, | ||
164 | void *ptr) | ||
165 | { | ||
166 | struct super_block *sb = ptr; | ||
167 | struct net *net = sb->s_fs_info; | ||
168 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
169 | struct dentry *dentry; | ||
170 | int ret = 0; | ||
171 | |||
172 | if (!try_module_get(THIS_MODULE)) | ||
173 | return 0; | ||
174 | |||
175 | if (nn->bl_device_pipe == NULL) { | ||
176 | module_put(THIS_MODULE); | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | switch (event) { | ||
181 | case RPC_PIPEFS_MOUNT: | ||
182 | dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); | ||
183 | if (IS_ERR(dentry)) { | ||
184 | ret = PTR_ERR(dentry); | ||
185 | break; | ||
186 | } | ||
187 | nn->bl_device_pipe->dentry = dentry; | ||
188 | break; | ||
189 | case RPC_PIPEFS_UMOUNT: | ||
190 | if (nn->bl_device_pipe->dentry) | ||
191 | nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); | ||
192 | break; | ||
193 | default: | ||
194 | ret = -ENOTSUPP; | ||
195 | break; | ||
196 | } | ||
197 | module_put(THIS_MODULE); | ||
198 | return ret; | ||
199 | } | ||
200 | |||
201 | static struct notifier_block nfs4blocklayout_block = { | ||
202 | .notifier_call = rpc_pipefs_event, | ||
203 | }; | ||
204 | |||
205 | static struct dentry *nfs4blocklayout_register_net(struct net *net, | ||
206 | struct rpc_pipe *pipe) | ||
207 | { | ||
208 | struct super_block *pipefs_sb; | ||
209 | struct dentry *dentry; | ||
210 | |||
211 | pipefs_sb = rpc_get_sb_net(net); | ||
212 | if (!pipefs_sb) | ||
213 | return NULL; | ||
214 | dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); | ||
215 | rpc_put_sb_net(net); | ||
216 | return dentry; | ||
217 | } | ||
218 | |||
219 | static void nfs4blocklayout_unregister_net(struct net *net, | ||
220 | struct rpc_pipe *pipe) | ||
221 | { | ||
222 | struct super_block *pipefs_sb; | ||
223 | |||
224 | pipefs_sb = rpc_get_sb_net(net); | ||
225 | if (pipefs_sb) { | ||
226 | nfs4blocklayout_unregister_sb(pipefs_sb, pipe); | ||
227 | rpc_put_sb_net(net); | ||
228 | } | ||
229 | } | ||
230 | |||
231 | static int nfs4blocklayout_net_init(struct net *net) | ||
232 | { | ||
233 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
234 | struct dentry *dentry; | ||
235 | |||
236 | init_waitqueue_head(&nn->bl_wq); | ||
237 | nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); | ||
238 | if (IS_ERR(nn->bl_device_pipe)) | ||
239 | return PTR_ERR(nn->bl_device_pipe); | ||
240 | dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); | ||
241 | if (IS_ERR(dentry)) { | ||
242 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
243 | return PTR_ERR(dentry); | ||
244 | } | ||
245 | nn->bl_device_pipe->dentry = dentry; | ||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | static void nfs4blocklayout_net_exit(struct net *net) | ||
250 | { | ||
251 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
252 | |||
253 | nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); | ||
254 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
255 | nn->bl_device_pipe = NULL; | ||
256 | } | ||
257 | |||
258 | static struct pernet_operations nfs4blocklayout_net_ops = { | ||
259 | .init = nfs4blocklayout_net_init, | ||
260 | .exit = nfs4blocklayout_net_exit, | ||
261 | }; | ||
262 | |||
263 | int __init bl_init_pipefs(void) | ||
264 | { | ||
265 | int ret; | ||
266 | |||
267 | ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); | ||
268 | if (ret) | ||
269 | goto out; | ||
270 | ret = register_pernet_subsys(&nfs4blocklayout_net_ops); | ||
271 | if (ret) | ||
272 | goto out_unregister_notifier; | ||
273 | return 0; | ||
274 | |||
275 | out_unregister_notifier: | ||
276 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | ||
277 | out: | ||
278 | return ret; | ||
279 | } | ||
280 | |||
281 | void __exit bl_cleanup_pipefs(void) | ||
282 | { | ||
283 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | ||
284 | unregister_pernet_subsys(&nfs4blocklayout_net_ops); | ||
285 | } | ||
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 41db5258e7a7..73466b934090 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -171,14 +171,26 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
171 | goto out; | 171 | goto out; |
172 | 172 | ||
173 | ino = lo->plh_inode; | 173 | ino = lo->plh_inode; |
174 | |||
175 | spin_lock(&ino->i_lock); | ||
176 | pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); | ||
177 | spin_unlock(&ino->i_lock); | ||
178 | |||
179 | pnfs_layoutcommit_inode(ino, false); | ||
180 | |||
174 | spin_lock(&ino->i_lock); | 181 | spin_lock(&ino->i_lock); |
175 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 182 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || |
176 | pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, | 183 | pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, |
177 | &args->cbl_range)) | 184 | &args->cbl_range)) { |
178 | rv = NFS4ERR_DELAY; | 185 | rv = NFS4ERR_DELAY; |
179 | else | 186 | goto unlock; |
180 | rv = NFS4ERR_NOMATCHING_LAYOUT; | 187 | } |
181 | pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); | 188 | |
189 | if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { | ||
190 | NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, | ||
191 | &args->cbl_range); | ||
192 | } | ||
193 | unlock: | ||
182 | spin_unlock(&ino->i_lock); | 194 | spin_unlock(&ino->i_lock); |
183 | pnfs_free_lseg_list(&free_me_list); | 195 | pnfs_free_lseg_list(&free_me_list); |
184 | pnfs_put_layout_hdr(lo); | 196 | pnfs_put_layout_hdr(lo); |
@@ -277,9 +289,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, | |||
277 | } | 289 | } |
278 | 290 | ||
279 | found: | 291 | found: |
280 | if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) | ||
281 | dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, " | ||
282 | "deleting instead\n", __func__); | ||
283 | nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); | 292 | nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); |
284 | } | 293 | } |
285 | 294 | ||
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1c5ff6d58385..f9f4845db989 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -1252,6 +1252,7 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) | |||
1252 | * set up the iterator to start reading from the server list and return the first item | 1252 | * set up the iterator to start reading from the server list and return the first item |
1253 | */ | 1253 | */ |
1254 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) | 1254 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) |
1255 | __acquires(&nn->nfs_client_lock) | ||
1255 | { | 1256 | { |
1256 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); | 1257 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); |
1257 | 1258 | ||
@@ -1274,6 +1275,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) | |||
1274 | * clean up after reading from the transports list | 1275 | * clean up after reading from the transports list |
1275 | */ | 1276 | */ |
1276 | static void nfs_server_list_stop(struct seq_file *p, void *v) | 1277 | static void nfs_server_list_stop(struct seq_file *p, void *v) |
1278 | __releases(&nn->nfs_client_lock) | ||
1277 | { | 1279 | { |
1278 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); | 1280 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); |
1279 | 1281 | ||
@@ -1318,7 +1320,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v) | |||
1318 | */ | 1320 | */ |
1319 | static int nfs_volume_list_open(struct inode *inode, struct file *file) | 1321 | static int nfs_volume_list_open(struct inode *inode, struct file *file) |
1320 | { | 1322 | { |
1321 | return seq_open_net(inode, file, &nfs_server_list_ops, | 1323 | return seq_open_net(inode, file, &nfs_volume_list_ops, |
1322 | sizeof(struct seq_net_private)); | 1324 | sizeof(struct seq_net_private)); |
1323 | } | 1325 | } |
1324 | 1326 | ||
@@ -1326,6 +1328,7 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) | |||
1326 | * set up the iterator to start reading from the volume list and return the first item | 1328 | * set up the iterator to start reading from the volume list and return the first item |
1327 | */ | 1329 | */ |
1328 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) | 1330 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) |
1331 | __acquires(&nn->nfs_client_lock) | ||
1329 | { | 1332 | { |
1330 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); | 1333 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); |
1331 | 1334 | ||
@@ -1348,6 +1351,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) | |||
1348 | * clean up after reading from the transports list | 1351 | * clean up after reading from the transports list |
1349 | */ | 1352 | */ |
1350 | static void nfs_volume_list_stop(struct seq_file *p, void *v) | 1353 | static void nfs_volume_list_stop(struct seq_file *p, void *v) |
1354 | __releases(&nn->nfs_client_lock) | ||
1351 | { | 1355 | { |
1352 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); | 1356 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); |
1353 | 1357 | ||
@@ -1412,24 +1416,18 @@ int nfs_fs_proc_net_init(struct net *net) | |||
1412 | p = proc_create("volumes", S_IFREG|S_IRUGO, | 1416 | p = proc_create("volumes", S_IFREG|S_IRUGO, |
1413 | nn->proc_nfsfs, &nfs_volume_list_fops); | 1417 | nn->proc_nfsfs, &nfs_volume_list_fops); |
1414 | if (!p) | 1418 | if (!p) |
1415 | goto error_2; | 1419 | goto error_1; |
1416 | return 0; | 1420 | return 0; |
1417 | 1421 | ||
1418 | error_2: | ||
1419 | remove_proc_entry("servers", nn->proc_nfsfs); | ||
1420 | error_1: | 1422 | error_1: |
1421 | remove_proc_entry("fs/nfsfs", NULL); | 1423 | remove_proc_subtree("nfsfs", net->proc_net); |
1422 | error_0: | 1424 | error_0: |
1423 | return -ENOMEM; | 1425 | return -ENOMEM; |
1424 | } | 1426 | } |
1425 | 1427 | ||
1426 | void nfs_fs_proc_net_exit(struct net *net) | 1428 | void nfs_fs_proc_net_exit(struct net *net) |
1427 | { | 1429 | { |
1428 | struct nfs_net *nn = net_generic(net, nfs_net_id); | 1430 | remove_proc_subtree("nfsfs", net->proc_net); |
1429 | |||
1430 | remove_proc_entry("volumes", nn->proc_nfsfs); | ||
1431 | remove_proc_entry("servers", nn->proc_nfsfs); | ||
1432 | remove_proc_entry("fs/nfsfs", NULL); | ||
1433 | } | 1431 | } |
1434 | 1432 | ||
1435 | /* | 1433 | /* |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 65ef6e00deee..dda4b8667c02 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -178,7 +178,6 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, | |||
178 | return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); | 178 | return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); |
179 | } | 179 | } |
180 | 180 | ||
181 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
182 | /* | 181 | /* |
183 | * nfs_direct_cmp_commit_data_verf - compare verifier for commit data | 182 | * nfs_direct_cmp_commit_data_verf - compare verifier for commit data |
184 | * @dreq - direct request possibly spanning multiple servers | 183 | * @dreq - direct request possibly spanning multiple servers |
@@ -197,7 +196,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, | |||
197 | WARN_ON_ONCE(verfp->committed < 0); | 196 | WARN_ON_ONCE(verfp->committed < 0); |
198 | return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); | 197 | return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); |
199 | } | 198 | } |
200 | #endif | ||
201 | 199 | ||
202 | /** | 200 | /** |
203 | * nfs_direct_IO - NFS address space operation for direct I/O | 201 | * nfs_direct_IO - NFS address space operation for direct I/O |
@@ -576,7 +574,6 @@ out: | |||
576 | return result; | 574 | return result; |
577 | } | 575 | } |
578 | 576 | ||
579 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
580 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | 577 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) |
581 | { | 578 | { |
582 | struct nfs_pageio_descriptor desc; | 579 | struct nfs_pageio_descriptor desc; |
@@ -700,17 +697,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode | |||
700 | schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ | 697 | schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ |
701 | } | 698 | } |
702 | 699 | ||
703 | #else | ||
704 | static void nfs_direct_write_schedule_work(struct work_struct *work) | ||
705 | { | ||
706 | } | ||
707 | |||
708 | static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) | ||
709 | { | ||
710 | nfs_direct_complete(dreq, true); | ||
711 | } | ||
712 | #endif | ||
713 | |||
714 | static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | 700 | static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) |
715 | { | 701 | { |
716 | struct nfs_direct_req *dreq = hdr->dreq; | 702 | struct nfs_direct_req *dreq = hdr->dreq; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8c4048ecdad1..4ea92ce0537f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include "internal.h" | 36 | #include "internal.h" |
37 | #include "iostat.h" | 37 | #include "iostat.h" |
38 | #include "fscache.h" | 38 | #include "fscache.h" |
39 | #include "pnfs.h" | ||
39 | 40 | ||
40 | #include "nfstrace.h" | 41 | #include "nfstrace.h" |
41 | 42 | ||
@@ -327,6 +328,12 @@ static int nfs_want_read_modify_write(struct file *file, struct page *page, | |||
327 | unsigned int offset = pos & (PAGE_CACHE_SIZE - 1); | 328 | unsigned int offset = pos & (PAGE_CACHE_SIZE - 1); |
328 | unsigned int end = offset + len; | 329 | unsigned int end = offset + len; |
329 | 330 | ||
331 | if (pnfs_ld_read_whole_page(file->f_mapping->host)) { | ||
332 | if (!PageUptodate(page)) | ||
333 | return 1; | ||
334 | return 0; | ||
335 | } | ||
336 | |||
330 | if ((file->f_mode & FMODE_READ) && /* open for read? */ | 337 | if ((file->f_mode & FMODE_READ) && /* open for read? */ |
331 | !PageUptodate(page) && /* Uptodate? */ | 338 | !PageUptodate(page) && /* Uptodate? */ |
332 | !PagePrivate(page) && /* i/o request already? */ | 339 | !PagePrivate(page) && /* i/o request already? */ |
@@ -468,17 +475,26 @@ static int nfs_release_page(struct page *page, gfp_t gfp) | |||
468 | 475 | ||
469 | dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); | 476 | dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); |
470 | 477 | ||
471 | /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not | 478 | /* Always try to initiate a 'commit' if relevant, but only |
472 | * doing this memory reclaim for a fs-related allocation. | 479 | * wait for it if __GFP_WAIT is set. Even then, only wait 1 |
480 | * second and only if the 'bdi' is not congested. | ||
481 | * Waiting indefinitely can cause deadlocks when the NFS | ||
482 | * server is on this machine, when a new TCP connection is | ||
483 | * needed and in other rare cases. There is no particular | ||
484 | * need to wait extensively here. A short wait has the | ||
485 | * benefit that someone else can worry about the freezer. | ||
473 | */ | 486 | */ |
474 | if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && | 487 | if (mapping) { |
475 | !(current->flags & PF_FSTRANS)) { | 488 | struct nfs_server *nfss = NFS_SERVER(mapping->host); |
476 | int how = FLUSH_SYNC; | 489 | nfs_commit_inode(mapping->host, 0); |
477 | 490 | if ((gfp & __GFP_WAIT) && | |
478 | /* Don't let kswapd deadlock waiting for OOM RPC calls */ | 491 | !bdi_write_congested(&nfss->backing_dev_info)) { |
479 | if (current_is_kswapd()) | 492 | wait_on_page_bit_killable_timeout(page, PG_private, |
480 | how = 0; | 493 | HZ); |
481 | nfs_commit_inode(mapping->host, how); | 494 | if (PagePrivate(page)) |
495 | set_bdi_congested(&nfss->backing_dev_info, | ||
496 | BLK_RW_ASYNC); | ||
497 | } | ||
482 | } | 498 | } |
483 | /* If PagePrivate() is set, then the page is not freeable */ | 499 | /* If PagePrivate() is set, then the page is not freeable */ |
484 | if (PagePrivate(page)) | 500 | if (PagePrivate(page)) |
@@ -539,13 +555,25 @@ static int nfs_launder_page(struct page *page) | |||
539 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, | 555 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
540 | sector_t *span) | 556 | sector_t *span) |
541 | { | 557 | { |
558 | int ret; | ||
559 | struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); | ||
560 | |||
542 | *span = sis->pages; | 561 | *span = sis->pages; |
543 | return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1); | 562 | |
563 | rcu_read_lock(); | ||
564 | ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1); | ||
565 | rcu_read_unlock(); | ||
566 | |||
567 | return ret; | ||
544 | } | 568 | } |
545 | 569 | ||
546 | static void nfs_swap_deactivate(struct file *file) | 570 | static void nfs_swap_deactivate(struct file *file) |
547 | { | 571 | { |
548 | xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0); | 572 | struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); |
573 | |||
574 | rcu_read_lock(); | ||
575 | xs_swapper(rcu_dereference(clnt->cl_xprt), 0); | ||
576 | rcu_read_unlock(); | ||
549 | } | 577 | } |
550 | #endif | 578 | #endif |
551 | 579 | ||
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 1359c4a27393..abc5056999d6 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
@@ -265,7 +265,7 @@ filelayout_set_layoutcommit(struct nfs_pgio_header *hdr) | |||
265 | { | 265 | { |
266 | 266 | ||
267 | if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || | 267 | if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || |
268 | hdr->res.verf->committed == NFS_FILE_SYNC) | 268 | hdr->res.verf->committed != NFS_DATA_SYNC) |
269 | return; | 269 | return; |
270 | 270 | ||
271 | pnfs_set_layoutcommit(hdr); | 271 | pnfs_set_layoutcommit(hdr); |
@@ -403,6 +403,9 @@ static int filelayout_commit_done_cb(struct rpc_task *task, | |||
403 | return -EAGAIN; | 403 | return -EAGAIN; |
404 | } | 404 | } |
405 | 405 | ||
406 | if (data->verf.committed == NFS_UNSTABLE) | ||
407 | pnfs_commit_set_layoutcommit(data); | ||
408 | |||
406 | return 0; | 409 | return 0; |
407 | } | 410 | } |
408 | 411 | ||
@@ -646,18 +649,15 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
646 | } | 649 | } |
647 | 650 | ||
648 | /* find and reference the deviceid */ | 651 | /* find and reference the deviceid */ |
649 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, | 652 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), id, |
650 | NFS_SERVER(lo->plh_inode)->nfs_client, id); | 653 | lo->plh_lc_cred, gfp_flags); |
651 | if (d == NULL) { | 654 | if (d == NULL) |
652 | dsaddr = filelayout_get_device_info(lo->plh_inode, id, | 655 | goto out; |
653 | lo->plh_lc_cred, gfp_flags); | 656 | |
654 | if (dsaddr == NULL) | 657 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); |
655 | goto out; | ||
656 | } else | ||
657 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | ||
658 | /* Found deviceid is unavailable */ | 658 | /* Found deviceid is unavailable */ |
659 | if (filelayout_test_devid_unavailable(&dsaddr->id_node)) | 659 | if (filelayout_test_devid_unavailable(&dsaddr->id_node)) |
660 | goto out_put; | 660 | goto out_put; |
661 | 661 | ||
662 | fl->dsaddr = dsaddr; | 662 | fl->dsaddr = dsaddr; |
663 | 663 | ||
@@ -1269,11 +1269,12 @@ filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page) | |||
1269 | static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx) | 1269 | static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx) |
1270 | { | 1270 | { |
1271 | struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; | 1271 | struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; |
1272 | struct pnfs_commit_bucket *bucket = fl_cinfo->buckets; | 1272 | struct pnfs_commit_bucket *bucket; |
1273 | struct pnfs_layout_segment *freeme; | 1273 | struct pnfs_layout_segment *freeme; |
1274 | int i; | 1274 | int i; |
1275 | 1275 | ||
1276 | for (i = idx; i < fl_cinfo->nbuckets; i++, bucket++) { | 1276 | for (i = idx; i < fl_cinfo->nbuckets; i++) { |
1277 | bucket = &fl_cinfo->buckets[i]; | ||
1277 | if (list_empty(&bucket->committing)) | 1278 | if (list_empty(&bucket->committing)) |
1278 | continue; | 1279 | continue; |
1279 | nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); | 1280 | nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); |
@@ -1367,6 +1368,17 @@ out: | |||
1367 | cinfo->ds->ncommitting = 0; | 1368 | cinfo->ds->ncommitting = 0; |
1368 | return PNFS_ATTEMPTED; | 1369 | return PNFS_ATTEMPTED; |
1369 | } | 1370 | } |
1371 | static struct nfs4_deviceid_node * | ||
1372 | filelayout_alloc_deviceid_node(struct nfs_server *server, | ||
1373 | struct pnfs_device *pdev, gfp_t gfp_flags) | ||
1374 | { | ||
1375 | struct nfs4_file_layout_dsaddr *dsaddr; | ||
1376 | |||
1377 | dsaddr = nfs4_fl_alloc_deviceid_node(server, pdev, gfp_flags); | ||
1378 | if (!dsaddr) | ||
1379 | return NULL; | ||
1380 | return &dsaddr->id_node; | ||
1381 | } | ||
1370 | 1382 | ||
1371 | static void | 1383 | static void |
1372 | filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) | 1384 | filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) |
@@ -1419,6 +1431,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
1419 | .commit_pagelist = filelayout_commit_pagelist, | 1431 | .commit_pagelist = filelayout_commit_pagelist, |
1420 | .read_pagelist = filelayout_read_pagelist, | 1432 | .read_pagelist = filelayout_read_pagelist, |
1421 | .write_pagelist = filelayout_write_pagelist, | 1433 | .write_pagelist = filelayout_write_pagelist, |
1434 | .alloc_deviceid_node = filelayout_alloc_deviceid_node, | ||
1422 | .free_deviceid_node = filelayout_free_deveiceid_node, | 1435 | .free_deviceid_node = filelayout_free_deveiceid_node, |
1423 | }; | 1436 | }; |
1424 | 1437 | ||
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index ffbddf2219ea..7c9f800c49d7 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h | |||
@@ -147,10 +147,11 @@ u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); | |||
147 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); | 147 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); |
148 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, | 148 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, |
149 | u32 ds_idx); | 149 | u32 ds_idx); |
150 | |||
151 | extern struct nfs4_file_layout_dsaddr * | ||
152 | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, | ||
153 | struct pnfs_device *pdev, gfp_t gfp_flags); | ||
150 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 154 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
151 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 155 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
152 | struct nfs4_file_layout_dsaddr * | ||
153 | filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, | ||
154 | struct rpc_cred *cred, gfp_t gfp_flags); | ||
155 | 156 | ||
156 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ | 157 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ |
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 8540516f4d71..9bb806a76d99 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c | |||
@@ -484,8 +484,9 @@ out_err: | |||
484 | } | 484 | } |
485 | 485 | ||
486 | /* Decode opaque device data and return the result */ | 486 | /* Decode opaque device data and return the result */ |
487 | static struct nfs4_file_layout_dsaddr* | 487 | struct nfs4_file_layout_dsaddr * |
488 | decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | 488 | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, |
489 | gfp_t gfp_flags) | ||
489 | { | 490 | { |
490 | int i; | 491 | int i; |
491 | u32 cnt, num; | 492 | u32 cnt, num; |
@@ -570,10 +571,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
570 | dsaddr->stripe_indices = stripe_indices; | 571 | dsaddr->stripe_indices = stripe_indices; |
571 | stripe_indices = NULL; | 572 | stripe_indices = NULL; |
572 | dsaddr->ds_num = num; | 573 | dsaddr->ds_num = num; |
573 | nfs4_init_deviceid_node(&dsaddr->id_node, | 574 | nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id); |
574 | NFS_SERVER(ino)->pnfs_curr_ld, | ||
575 | NFS_SERVER(ino)->nfs_client, | ||
576 | &pdev->dev_id); | ||
577 | 575 | ||
578 | INIT_LIST_HEAD(&dsaddrs); | 576 | INIT_LIST_HEAD(&dsaddrs); |
579 | 577 | ||
@@ -587,7 +585,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
587 | 585 | ||
588 | mp_count = be32_to_cpup(p); /* multipath count */ | 586 | mp_count = be32_to_cpup(p); /* multipath count */ |
589 | for (j = 0; j < mp_count; j++) { | 587 | for (j = 0; j < mp_count; j++) { |
590 | da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net, | 588 | da = decode_ds_addr(server->nfs_client->cl_net, |
591 | &stream, gfp_flags); | 589 | &stream, gfp_flags); |
592 | if (da) | 590 | if (da) |
593 | list_add_tail(&da->da_node, &dsaddrs); | 591 | list_add_tail(&da->da_node, &dsaddrs); |
@@ -637,102 +635,6 @@ out_err: | |||
637 | return NULL; | 635 | return NULL; |
638 | } | 636 | } |
639 | 637 | ||
640 | /* | ||
641 | * Decode the opaque device specified in 'dev' and add it to the cache of | ||
642 | * available devices. | ||
643 | */ | ||
644 | static struct nfs4_file_layout_dsaddr * | ||
645 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) | ||
646 | { | ||
647 | struct nfs4_deviceid_node *d; | ||
648 | struct nfs4_file_layout_dsaddr *n, *new; | ||
649 | |||
650 | new = decode_device(inode, dev, gfp_flags); | ||
651 | if (!new) { | ||
652 | printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", | ||
653 | __func__); | ||
654 | return NULL; | ||
655 | } | ||
656 | |||
657 | d = nfs4_insert_deviceid_node(&new->id_node); | ||
658 | n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | ||
659 | if (n != new) { | ||
660 | nfs4_fl_free_deviceid(new); | ||
661 | return n; | ||
662 | } | ||
663 | |||
664 | return new; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Retrieve the information for dev_id, add it to the list | ||
669 | * of available devices, and return it. | ||
670 | */ | ||
671 | struct nfs4_file_layout_dsaddr * | ||
672 | filelayout_get_device_info(struct inode *inode, | ||
673 | struct nfs4_deviceid *dev_id, | ||
674 | struct rpc_cred *cred, | ||
675 | gfp_t gfp_flags) | ||
676 | { | ||
677 | struct pnfs_device *pdev = NULL; | ||
678 | u32 max_resp_sz; | ||
679 | int max_pages; | ||
680 | struct page **pages = NULL; | ||
681 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | ||
682 | int rc, i; | ||
683 | struct nfs_server *server = NFS_SERVER(inode); | ||
684 | |||
685 | /* | ||
686 | * Use the session max response size as the basis for setting | ||
687 | * GETDEVICEINFO's maxcount | ||
688 | */ | ||
689 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
690 | max_pages = nfs_page_array_len(0, max_resp_sz); | ||
691 | dprintk("%s inode %p max_resp_sz %u max_pages %d\n", | ||
692 | __func__, inode, max_resp_sz, max_pages); | ||
693 | |||
694 | pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags); | ||
695 | if (pdev == NULL) | ||
696 | return NULL; | ||
697 | |||
698 | pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); | ||
699 | if (pages == NULL) { | ||
700 | kfree(pdev); | ||
701 | return NULL; | ||
702 | } | ||
703 | for (i = 0; i < max_pages; i++) { | ||
704 | pages[i] = alloc_page(gfp_flags); | ||
705 | if (!pages[i]) | ||
706 | goto out_free; | ||
707 | } | ||
708 | |||
709 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | ||
710 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; | ||
711 | pdev->pages = pages; | ||
712 | pdev->pgbase = 0; | ||
713 | pdev->pglen = max_resp_sz; | ||
714 | pdev->mincount = 0; | ||
715 | pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | ||
716 | |||
717 | rc = nfs4_proc_getdeviceinfo(server, pdev, cred); | ||
718 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
719 | if (rc) | ||
720 | goto out_free; | ||
721 | |||
722 | /* | ||
723 | * Found new device, need to decode it and then add it to the | ||
724 | * list of known devices for this mountpoint. | ||
725 | */ | ||
726 | dsaddr = decode_and_add_device(inode, pdev, gfp_flags); | ||
727 | out_free: | ||
728 | for (i = 0; i < max_pages; i++) | ||
729 | __free_page(pages[i]); | ||
730 | kfree(pages); | ||
731 | kfree(pdev); | ||
732 | dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); | ||
733 | return dsaddr; | ||
734 | } | ||
735 | |||
736 | void | 638 | void |
737 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | 639 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
738 | { | 640 | { |
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c index 7cf2c4699b08..777b055063f6 100644 --- a/fs/nfs/fscache-index.c +++ b/fs/nfs/fscache-index.c | |||
@@ -74,11 +74,10 @@ static uint16_t nfs_server_get_key(const void *cookie_netfs_data, | |||
74 | struct nfs_server_key *key = buffer; | 74 | struct nfs_server_key *key = buffer; |
75 | uint16_t len = sizeof(struct nfs_server_key); | 75 | uint16_t len = sizeof(struct nfs_server_key); |
76 | 76 | ||
77 | memset(key, 0, len); | ||
77 | key->nfsversion = clp->rpc_ops->version; | 78 | key->nfsversion = clp->rpc_ops->version; |
78 | key->family = clp->cl_addr.ss_family; | 79 | key->family = clp->cl_addr.ss_family; |
79 | 80 | ||
80 | memset(key, 0, len); | ||
81 | |||
82 | switch (clp->cl_addr.ss_family) { | 81 | switch (clp->cl_addr.ss_family) { |
83 | case AF_INET: | 82 | case AF_INET: |
84 | key->port = sin->sin_port; | 83 | key->port = sin->sin_port; |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 577a36f0a510..141c9f4a40de 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -505,7 +505,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
505 | attr->ia_valid &= ~ATTR_MODE; | 505 | attr->ia_valid &= ~ATTR_MODE; |
506 | 506 | ||
507 | if (attr->ia_valid & ATTR_SIZE) { | 507 | if (attr->ia_valid & ATTR_SIZE) { |
508 | if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) | 508 | BUG_ON(!S_ISREG(inode->i_mode)); |
509 | |||
510 | if (attr->ia_size == i_size_read(inode)) | ||
509 | attr->ia_valid &= ~ATTR_SIZE; | 511 | attr->ia_valid &= ~ATTR_SIZE; |
510 | } | 512 | } |
511 | 513 | ||
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 94d922ebb5ac..efaa31c70fbe 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -218,13 +218,6 @@ static inline void nfs_fs_proc_exit(void) | |||
218 | int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); | 218 | int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); |
219 | #endif | 219 | #endif |
220 | 220 | ||
221 | /* nfs3client.c */ | ||
222 | #if IS_ENABLED(CONFIG_NFS_V3) | ||
223 | struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); | ||
224 | struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, | ||
225 | struct nfs_fattr *, rpc_authflavor_t); | ||
226 | #endif | ||
227 | |||
228 | /* callback_xdr.c */ | 221 | /* callback_xdr.c */ |
229 | extern struct svc_version nfs4_callback_version1; | 222 | extern struct svc_version nfs4_callback_version1; |
230 | extern struct svc_version nfs4_callback_version4; | 223 | extern struct svc_version nfs4_callback_version4; |
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h new file mode 100644 index 000000000000..333ae4068506 --- /dev/null +++ b/fs/nfs/nfs3_fs.h | |||
@@ -0,0 +1,34 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2014 Anna Schumaker. | ||
3 | * | ||
4 | * NFSv3-specific filesystem definitions and declarations | ||
5 | */ | ||
6 | #ifndef __LINUX_FS_NFS_NFS3_FS_H | ||
7 | #define __LINUX_FS_NFS_NFS3_FS_H | ||
8 | |||
9 | /* | ||
10 | * nfs3acl.c | ||
11 | */ | ||
12 | #ifdef CONFIG_NFS_V3_ACL | ||
13 | extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type); | ||
14 | extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type); | ||
15 | extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | ||
16 | struct posix_acl *dfacl); | ||
17 | extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t); | ||
18 | extern const struct xattr_handler *nfs3_xattr_handlers[]; | ||
19 | #else | ||
20 | static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | ||
21 | struct posix_acl *dfacl) | ||
22 | { | ||
23 | return 0; | ||
24 | } | ||
25 | #define nfs3_listxattr NULL | ||
26 | #endif /* CONFIG_NFS_V3_ACL */ | ||
27 | |||
28 | /* nfs3client.c */ | ||
29 | struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); | ||
30 | struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, | ||
31 | struct nfs_fattr *, rpc_authflavor_t); | ||
32 | |||
33 | |||
34 | #endif /* __LINUX_FS_NFS_NFS3_FS_H */ | ||
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index d0fec260132a..658e586ca438 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/nfsacl.h> | 7 | #include <linux/nfsacl.h> |
8 | 8 | ||
9 | #include "internal.h" | 9 | #include "internal.h" |
10 | #include "nfs3_fs.h" | ||
10 | 11 | ||
11 | #define NFSDBG_FACILITY NFSDBG_PROC | 12 | #define NFSDBG_FACILITY NFSDBG_PROC |
12 | 13 | ||
@@ -129,7 +130,10 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | |||
129 | .rpc_argp = &args, | 130 | .rpc_argp = &args, |
130 | .rpc_resp = &fattr, | 131 | .rpc_resp = &fattr, |
131 | }; | 132 | }; |
132 | int status; | 133 | int status = 0; |
134 | |||
135 | if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL)) | ||
136 | goto out; | ||
133 | 137 | ||
134 | status = -EOPNOTSUPP; | 138 | status = -EOPNOTSUPP; |
135 | if (!nfs_server_capable(inode, NFS_CAP_ACLS)) | 139 | if (!nfs_server_capable(inode, NFS_CAP_ACLS)) |
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index b3fc65ef39ca..8c1b437c5403 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/nfs_fs.h> | 1 | #include <linux/nfs_fs.h> |
2 | #include <linux/nfs_mount.h> | 2 | #include <linux/nfs_mount.h> |
3 | #include "internal.h" | 3 | #include "internal.h" |
4 | #include "nfs3_fs.h" | ||
4 | 5 | ||
5 | #ifdef CONFIG_NFS_V3_ACL | 6 | #ifdef CONFIG_NFS_V3_ACL |
6 | static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; | 7 | static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 809670eba52a..524f9f837408 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -22,6 +22,7 @@ | |||
22 | 22 | ||
23 | #include "iostat.h" | 23 | #include "iostat.h" |
24 | #include "internal.h" | 24 | #include "internal.h" |
25 | #include "nfs3_fs.h" | ||
25 | 26 | ||
26 | #define NFSDBG_FACILITY NFSDBG_PROC | 27 | #define NFSDBG_FACILITY NFSDBG_PROC |
27 | 28 | ||
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index d6a98949af19..6af29c2da352 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <linux/nfs_fs.h> | 5 | #include <linux/nfs_fs.h> |
6 | #include "internal.h" | 6 | #include "internal.h" |
7 | #include "nfs3_fs.h" | ||
7 | #include "nfs.h" | 8 | #include "nfs.h" |
8 | 9 | ||
9 | static struct nfs_subversion nfs_v3 = { | 10 | static struct nfs_subversion nfs_v3 = { |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 92193eddb41d..a8b855ab4e22 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -130,16 +130,15 @@ enum { | |||
130 | */ | 130 | */ |
131 | 131 | ||
132 | struct nfs4_lock_state { | 132 | struct nfs4_lock_state { |
133 | struct list_head ls_locks; /* Other lock stateids */ | 133 | struct list_head ls_locks; /* Other lock stateids */ |
134 | struct nfs4_state * ls_state; /* Pointer to open state */ | 134 | struct nfs4_state * ls_state; /* Pointer to open state */ |
135 | #define NFS_LOCK_INITIALIZED 0 | 135 | #define NFS_LOCK_INITIALIZED 0 |
136 | #define NFS_LOCK_LOST 1 | 136 | #define NFS_LOCK_LOST 1 |
137 | unsigned long ls_flags; | 137 | unsigned long ls_flags; |
138 | struct nfs_seqid_counter ls_seqid; | 138 | struct nfs_seqid_counter ls_seqid; |
139 | nfs4_stateid ls_stateid; | 139 | nfs4_stateid ls_stateid; |
140 | atomic_t ls_count; | 140 | atomic_t ls_count; |
141 | fl_owner_t ls_owner; | 141 | fl_owner_t ls_owner; |
142 | struct work_struct ls_release; | ||
143 | }; | 142 | }; |
144 | 143 | ||
145 | /* bits for nfs4_state->flags */ | 144 | /* bits for nfs4_state->flags */ |
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 53e435a95260..ffdb28d86cf8 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
@@ -482,6 +482,16 @@ int nfs40_walk_client_list(struct nfs_client *new, | |||
482 | 482 | ||
483 | spin_lock(&nn->nfs_client_lock); | 483 | spin_lock(&nn->nfs_client_lock); |
484 | list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { | 484 | list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { |
485 | |||
486 | if (pos->rpc_ops != new->rpc_ops) | ||
487 | continue; | ||
488 | |||
489 | if (pos->cl_proto != new->cl_proto) | ||
490 | continue; | ||
491 | |||
492 | if (pos->cl_minorversion != new->cl_minorversion) | ||
493 | continue; | ||
494 | |||
485 | /* If "pos" isn't marked ready, we can't trust the | 495 | /* If "pos" isn't marked ready, we can't trust the |
486 | * remaining fields in "pos" */ | 496 | * remaining fields in "pos" */ |
487 | if (pos->cl_cons_state > NFS_CS_READY) { | 497 | if (pos->cl_cons_state > NFS_CS_READY) { |
@@ -501,15 +511,6 @@ int nfs40_walk_client_list(struct nfs_client *new, | |||
501 | if (pos->cl_cons_state != NFS_CS_READY) | 511 | if (pos->cl_cons_state != NFS_CS_READY) |
502 | continue; | 512 | continue; |
503 | 513 | ||
504 | if (pos->rpc_ops != new->rpc_ops) | ||
505 | continue; | ||
506 | |||
507 | if (pos->cl_proto != new->cl_proto) | ||
508 | continue; | ||
509 | |||
510 | if (pos->cl_minorversion != new->cl_minorversion) | ||
511 | continue; | ||
512 | |||
513 | if (pos->cl_clientid != new->cl_clientid) | 514 | if (pos->cl_clientid != new->cl_clientid) |
514 | continue; | 515 | continue; |
515 | 516 | ||
@@ -622,6 +623,16 @@ int nfs41_walk_client_list(struct nfs_client *new, | |||
622 | 623 | ||
623 | spin_lock(&nn->nfs_client_lock); | 624 | spin_lock(&nn->nfs_client_lock); |
624 | list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { | 625 | list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { |
626 | |||
627 | if (pos->rpc_ops != new->rpc_ops) | ||
628 | continue; | ||
629 | |||
630 | if (pos->cl_proto != new->cl_proto) | ||
631 | continue; | ||
632 | |||
633 | if (pos->cl_minorversion != new->cl_minorversion) | ||
634 | continue; | ||
635 | |||
625 | /* If "pos" isn't marked ready, we can't trust the | 636 | /* If "pos" isn't marked ready, we can't trust the |
626 | * remaining fields in "pos", especially the client | 637 | * remaining fields in "pos", especially the client |
627 | * ID and serverowner fields. Wait for CREATE_SESSION | 638 | * ID and serverowner fields. Wait for CREATE_SESSION |
@@ -647,15 +658,6 @@ int nfs41_walk_client_list(struct nfs_client *new, | |||
647 | if (pos->cl_cons_state != NFS_CS_READY) | 658 | if (pos->cl_cons_state != NFS_CS_READY) |
648 | continue; | 659 | continue; |
649 | 660 | ||
650 | if (pos->rpc_ops != new->rpc_ops) | ||
651 | continue; | ||
652 | |||
653 | if (pos->cl_proto != new->cl_proto) | ||
654 | continue; | ||
655 | |||
656 | if (pos->cl_minorversion != new->cl_minorversion) | ||
657 | continue; | ||
658 | |||
659 | if (!nfs4_match_clientids(pos, new)) | 661 | if (!nfs4_match_clientids(pos, new)) |
660 | continue; | 662 | continue; |
661 | 663 | ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 75ae8d22f067..5aa55c132aa2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -77,7 +77,7 @@ struct nfs4_opendata; | |||
77 | static int _nfs4_proc_open(struct nfs4_opendata *data); | 77 | static int _nfs4_proc_open(struct nfs4_opendata *data); |
78 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); | 78 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); |
79 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 79 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
80 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); | 80 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *); |
81 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); | 81 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); |
82 | static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); | 82 | static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); |
83 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); | 83 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); |
@@ -314,20 +314,30 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent | |||
314 | kunmap_atomic(start); | 314 | kunmap_atomic(start); |
315 | } | 315 | } |
316 | 316 | ||
317 | static long nfs4_update_delay(long *timeout) | ||
318 | { | ||
319 | long ret; | ||
320 | if (!timeout) | ||
321 | return NFS4_POLL_RETRY_MAX; | ||
322 | if (*timeout <= 0) | ||
323 | *timeout = NFS4_POLL_RETRY_MIN; | ||
324 | if (*timeout > NFS4_POLL_RETRY_MAX) | ||
325 | *timeout = NFS4_POLL_RETRY_MAX; | ||
326 | ret = *timeout; | ||
327 | *timeout <<= 1; | ||
328 | return ret; | ||
329 | } | ||
330 | |||
317 | static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) | 331 | static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) |
318 | { | 332 | { |
319 | int res = 0; | 333 | int res = 0; |
320 | 334 | ||
321 | might_sleep(); | 335 | might_sleep(); |
322 | 336 | ||
323 | if (*timeout <= 0) | 337 | freezable_schedule_timeout_killable_unsafe( |
324 | *timeout = NFS4_POLL_RETRY_MIN; | 338 | nfs4_update_delay(timeout)); |
325 | if (*timeout > NFS4_POLL_RETRY_MAX) | ||
326 | *timeout = NFS4_POLL_RETRY_MAX; | ||
327 | freezable_schedule_timeout_killable_unsafe(*timeout); | ||
328 | if (fatal_signal_pending(current)) | 339 | if (fatal_signal_pending(current)) |
329 | res = -ERESTARTSYS; | 340 | res = -ERESTARTSYS; |
330 | *timeout <<= 1; | ||
331 | return res; | 341 | return res; |
332 | } | 342 | } |
333 | 343 | ||
@@ -1307,15 +1317,13 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) | |||
1307 | int ret = -EAGAIN; | 1317 | int ret = -EAGAIN; |
1308 | 1318 | ||
1309 | for (;;) { | 1319 | for (;;) { |
1320 | spin_lock(&state->owner->so_lock); | ||
1310 | if (can_open_cached(state, fmode, open_mode)) { | 1321 | if (can_open_cached(state, fmode, open_mode)) { |
1311 | spin_lock(&state->owner->so_lock); | 1322 | update_open_stateflags(state, fmode); |
1312 | if (can_open_cached(state, fmode, open_mode)) { | ||
1313 | update_open_stateflags(state, fmode); | ||
1314 | spin_unlock(&state->owner->so_lock); | ||
1315 | goto out_return_state; | ||
1316 | } | ||
1317 | spin_unlock(&state->owner->so_lock); | 1323 | spin_unlock(&state->owner->so_lock); |
1324 | goto out_return_state; | ||
1318 | } | 1325 | } |
1326 | spin_unlock(&state->owner->so_lock); | ||
1319 | rcu_read_lock(); | 1327 | rcu_read_lock(); |
1320 | delegation = rcu_dereference(nfsi->delegation); | 1328 | delegation = rcu_dereference(nfsi->delegation); |
1321 | if (!can_open_delegated(delegation, fmode)) { | 1329 | if (!can_open_delegated(delegation, fmode)) { |
@@ -2226,9 +2234,13 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | |||
2226 | ret = _nfs4_proc_open(opendata); | 2234 | ret = _nfs4_proc_open(opendata); |
2227 | if (ret != 0) { | 2235 | if (ret != 0) { |
2228 | if (ret == -ENOENT) { | 2236 | if (ret == -ENOENT) { |
2229 | d_drop(opendata->dentry); | 2237 | dentry = opendata->dentry; |
2230 | d_add(opendata->dentry, NULL); | 2238 | if (dentry->d_inode) |
2231 | nfs_set_verifier(opendata->dentry, | 2239 | d_delete(dentry); |
2240 | else if (d_unhashed(dentry)) | ||
2241 | d_add(dentry, NULL); | ||
2242 | |||
2243 | nfs_set_verifier(dentry, | ||
2232 | nfs_save_change_attribute(opendata->dir->d_inode)); | 2244 | nfs_save_change_attribute(opendata->dir->d_inode)); |
2233 | } | 2245 | } |
2234 | goto out; | 2246 | goto out; |
@@ -2560,6 +2572,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
2560 | struct nfs4_closedata *calldata = data; | 2572 | struct nfs4_closedata *calldata = data; |
2561 | struct nfs4_state *state = calldata->state; | 2573 | struct nfs4_state *state = calldata->state; |
2562 | struct nfs_server *server = NFS_SERVER(calldata->inode); | 2574 | struct nfs_server *server = NFS_SERVER(calldata->inode); |
2575 | nfs4_stateid *res_stateid = NULL; | ||
2563 | 2576 | ||
2564 | dprintk("%s: begin!\n", __func__); | 2577 | dprintk("%s: begin!\n", __func__); |
2565 | if (!nfs4_sequence_done(task, &calldata->res.seq_res)) | 2578 | if (!nfs4_sequence_done(task, &calldata->res.seq_res)) |
@@ -2570,12 +2583,12 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
2570 | */ | 2583 | */ |
2571 | switch (task->tk_status) { | 2584 | switch (task->tk_status) { |
2572 | case 0: | 2585 | case 0: |
2573 | if (calldata->roc) | 2586 | res_stateid = &calldata->res.stateid; |
2587 | if (calldata->arg.fmode == 0 && calldata->roc) | ||
2574 | pnfs_roc_set_barrier(state->inode, | 2588 | pnfs_roc_set_barrier(state->inode, |
2575 | calldata->roc_barrier); | 2589 | calldata->roc_barrier); |
2576 | nfs_clear_open_stateid(state, &calldata->res.stateid, 0); | ||
2577 | renew_lease(server, calldata->timestamp); | 2590 | renew_lease(server, calldata->timestamp); |
2578 | goto out_release; | 2591 | break; |
2579 | case -NFS4ERR_ADMIN_REVOKED: | 2592 | case -NFS4ERR_ADMIN_REVOKED: |
2580 | case -NFS4ERR_STALE_STATEID: | 2593 | case -NFS4ERR_STALE_STATEID: |
2581 | case -NFS4ERR_OLD_STATEID: | 2594 | case -NFS4ERR_OLD_STATEID: |
@@ -2584,12 +2597,12 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
2584 | if (calldata->arg.fmode == 0) | 2597 | if (calldata->arg.fmode == 0) |
2585 | break; | 2598 | break; |
2586 | default: | 2599 | default: |
2587 | if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { | 2600 | if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) { |
2588 | rpc_restart_call_prepare(task); | 2601 | rpc_restart_call_prepare(task); |
2589 | goto out_release; | 2602 | goto out_release; |
2590 | } | 2603 | } |
2591 | } | 2604 | } |
2592 | nfs_clear_open_stateid(state, NULL, calldata->arg.fmode); | 2605 | nfs_clear_open_stateid(state, res_stateid, calldata->arg.fmode); |
2593 | out_release: | 2606 | out_release: |
2594 | nfs_release_seqid(calldata->arg.seqid); | 2607 | nfs_release_seqid(calldata->arg.seqid); |
2595 | nfs_refresh_inode(calldata->inode, calldata->res.fattr); | 2608 | nfs_refresh_inode(calldata->inode, calldata->res.fattr); |
@@ -2601,6 +2614,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
2601 | struct nfs4_closedata *calldata = data; | 2614 | struct nfs4_closedata *calldata = data; |
2602 | struct nfs4_state *state = calldata->state; | 2615 | struct nfs4_state *state = calldata->state; |
2603 | struct inode *inode = calldata->inode; | 2616 | struct inode *inode = calldata->inode; |
2617 | bool is_rdonly, is_wronly, is_rdwr; | ||
2604 | int call_close = 0; | 2618 | int call_close = 0; |
2605 | 2619 | ||
2606 | dprintk("%s: begin!\n", __func__); | 2620 | dprintk("%s: begin!\n", __func__); |
@@ -2608,21 +2622,27 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
2608 | goto out_wait; | 2622 | goto out_wait; |
2609 | 2623 | ||
2610 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; | 2624 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; |
2611 | calldata->arg.fmode = FMODE_READ|FMODE_WRITE; | ||
2612 | spin_lock(&state->owner->so_lock); | 2625 | spin_lock(&state->owner->so_lock); |
2626 | is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); | ||
2627 | is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
2628 | is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
2613 | /* Calculate the change in open mode */ | 2629 | /* Calculate the change in open mode */ |
2630 | calldata->arg.fmode = 0; | ||
2614 | if (state->n_rdwr == 0) { | 2631 | if (state->n_rdwr == 0) { |
2615 | if (state->n_rdonly == 0) { | 2632 | if (state->n_rdonly == 0) |
2616 | call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); | 2633 | call_close |= is_rdonly; |
2617 | call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); | 2634 | else if (is_rdonly) |
2618 | calldata->arg.fmode &= ~FMODE_READ; | 2635 | calldata->arg.fmode |= FMODE_READ; |
2619 | } | 2636 | if (state->n_wronly == 0) |
2620 | if (state->n_wronly == 0) { | 2637 | call_close |= is_wronly; |
2621 | call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); | 2638 | else if (is_wronly) |
2622 | call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); | 2639 | calldata->arg.fmode |= FMODE_WRITE; |
2623 | calldata->arg.fmode &= ~FMODE_WRITE; | 2640 | } else if (is_rdwr) |
2624 | } | 2641 | calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; |
2625 | } | 2642 | |
2643 | if (calldata->arg.fmode == 0) | ||
2644 | call_close |= is_rdwr; | ||
2645 | |||
2626 | if (!nfs4_valid_open_stateid(state)) | 2646 | if (!nfs4_valid_open_stateid(state)) |
2627 | call_close = 0; | 2647 | call_close = 0; |
2628 | spin_unlock(&state->owner->so_lock); | 2648 | spin_unlock(&state->owner->so_lock); |
@@ -3205,7 +3225,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
3205 | struct nfs4_label *label = NULL; | 3225 | struct nfs4_label *label = NULL; |
3206 | int status; | 3226 | int status; |
3207 | 3227 | ||
3208 | if (pnfs_ld_layoutret_on_setattr(inode)) | 3228 | if (pnfs_ld_layoutret_on_setattr(inode) && |
3229 | sattr->ia_valid & ATTR_SIZE && | ||
3230 | sattr->ia_size < i_size_read(inode)) | ||
3209 | pnfs_commit_and_return_layout(inode); | 3231 | pnfs_commit_and_return_layout(inode); |
3210 | 3232 | ||
3211 | nfs_fattr_init(fattr); | 3233 | nfs_fattr_init(fattr); |
@@ -3564,7 +3586,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) | |||
3564 | 3586 | ||
3565 | if (!nfs4_sequence_done(task, &res->seq_res)) | 3587 | if (!nfs4_sequence_done(task, &res->seq_res)) |
3566 | return 0; | 3588 | return 0; |
3567 | if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) | 3589 | if (nfs4_async_handle_error(task, res->server, NULL, |
3590 | &data->timeout) == -EAGAIN) | ||
3568 | return 0; | 3591 | return 0; |
3569 | update_changeattr(dir, &res->cinfo); | 3592 | update_changeattr(dir, &res->cinfo); |
3570 | return 1; | 3593 | return 1; |
@@ -3597,7 +3620,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | |||
3597 | 3620 | ||
3598 | if (!nfs4_sequence_done(task, &res->seq_res)) | 3621 | if (!nfs4_sequence_done(task, &res->seq_res)) |
3599 | return 0; | 3622 | return 0; |
3600 | if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) | 3623 | if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN) |
3601 | return 0; | 3624 | return 0; |
3602 | 3625 | ||
3603 | update_changeattr(old_dir, &res->old_cinfo); | 3626 | update_changeattr(old_dir, &res->old_cinfo); |
@@ -4101,7 +4124,8 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) | |||
4101 | 4124 | ||
4102 | trace_nfs4_read(hdr, task->tk_status); | 4125 | trace_nfs4_read(hdr, task->tk_status); |
4103 | if (nfs4_async_handle_error(task, server, | 4126 | if (nfs4_async_handle_error(task, server, |
4104 | hdr->args.context->state) == -EAGAIN) { | 4127 | hdr->args.context->state, |
4128 | NULL) == -EAGAIN) { | ||
4105 | rpc_restart_call_prepare(task); | 4129 | rpc_restart_call_prepare(task); |
4106 | return -EAGAIN; | 4130 | return -EAGAIN; |
4107 | } | 4131 | } |
@@ -4169,10 +4193,11 @@ static int nfs4_write_done_cb(struct rpc_task *task, | |||
4169 | struct nfs_pgio_header *hdr) | 4193 | struct nfs_pgio_header *hdr) |
4170 | { | 4194 | { |
4171 | struct inode *inode = hdr->inode; | 4195 | struct inode *inode = hdr->inode; |
4172 | 4196 | ||
4173 | trace_nfs4_write(hdr, task->tk_status); | 4197 | trace_nfs4_write(hdr, task->tk_status); |
4174 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), | 4198 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), |
4175 | hdr->args.context->state) == -EAGAIN) { | 4199 | hdr->args.context->state, |
4200 | NULL) == -EAGAIN) { | ||
4176 | rpc_restart_call_prepare(task); | 4201 | rpc_restart_call_prepare(task); |
4177 | return -EAGAIN; | 4202 | return -EAGAIN; |
4178 | } | 4203 | } |
@@ -4252,7 +4277,8 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da | |||
4252 | struct inode *inode = data->inode; | 4277 | struct inode *inode = data->inode; |
4253 | 4278 | ||
4254 | trace_nfs4_commit(data, task->tk_status); | 4279 | trace_nfs4_commit(data, task->tk_status); |
4255 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { | 4280 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), |
4281 | NULL, NULL) == -EAGAIN) { | ||
4256 | rpc_restart_call_prepare(task); | 4282 | rpc_restart_call_prepare(task); |
4257 | return -EAGAIN; | 4283 | return -EAGAIN; |
4258 | } | 4284 | } |
@@ -4805,7 +4831,8 @@ out: | |||
4805 | 4831 | ||
4806 | 4832 | ||
4807 | static int | 4833 | static int |
4808 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) | 4834 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, |
4835 | struct nfs4_state *state, long *timeout) | ||
4809 | { | 4836 | { |
4810 | struct nfs_client *clp = server->nfs_client; | 4837 | struct nfs_client *clp = server->nfs_client; |
4811 | 4838 | ||
@@ -4855,6 +4882,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, | |||
4855 | #endif /* CONFIG_NFS_V4_1 */ | 4882 | #endif /* CONFIG_NFS_V4_1 */ |
4856 | case -NFS4ERR_DELAY: | 4883 | case -NFS4ERR_DELAY: |
4857 | nfs_inc_server_stats(server, NFSIOS_DELAY); | 4884 | nfs_inc_server_stats(server, NFSIOS_DELAY); |
4885 | rpc_delay(task, nfs4_update_delay(timeout)); | ||
4886 | goto restart_call; | ||
4858 | case -NFS4ERR_GRACE: | 4887 | case -NFS4ERR_GRACE: |
4859 | rpc_delay(task, NFS4_POLL_RETRY_MAX); | 4888 | rpc_delay(task, NFS4_POLL_RETRY_MAX); |
4860 | case -NFS4ERR_RETRY_UNCACHED_REP: | 4889 | case -NFS4ERR_RETRY_UNCACHED_REP: |
@@ -5095,8 +5124,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | |||
5095 | pnfs_roc_set_barrier(data->inode, data->roc_barrier); | 5124 | pnfs_roc_set_barrier(data->inode, data->roc_barrier); |
5096 | break; | 5125 | break; |
5097 | default: | 5126 | default: |
5098 | if (nfs4_async_handle_error(task, data->res.server, NULL) == | 5127 | if (nfs4_async_handle_error(task, data->res.server, |
5099 | -EAGAIN) { | 5128 | NULL, NULL) == -EAGAIN) { |
5100 | rpc_restart_call_prepare(task); | 5129 | rpc_restart_call_prepare(task); |
5101 | return; | 5130 | return; |
5102 | } | 5131 | } |
@@ -5360,7 +5389,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) | |||
5360 | case -NFS4ERR_EXPIRED: | 5389 | case -NFS4ERR_EXPIRED: |
5361 | break; | 5390 | break; |
5362 | default: | 5391 | default: |
5363 | if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) | 5392 | if (nfs4_async_handle_error(task, calldata->server, |
5393 | NULL, NULL) == -EAGAIN) | ||
5364 | rpc_restart_call_prepare(task); | 5394 | rpc_restart_call_prepare(task); |
5365 | } | 5395 | } |
5366 | nfs_release_seqid(calldata->arg.seqid); | 5396 | nfs_release_seqid(calldata->arg.seqid); |
@@ -5966,7 +5996,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) | |||
5966 | break; | 5996 | break; |
5967 | case -NFS4ERR_LEASE_MOVED: | 5997 | case -NFS4ERR_LEASE_MOVED: |
5968 | case -NFS4ERR_DELAY: | 5998 | case -NFS4ERR_DELAY: |
5969 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) | 5999 | if (nfs4_async_handle_error(task, server, |
6000 | NULL, NULL) == -EAGAIN) | ||
5970 | rpc_restart_call_prepare(task); | 6001 | rpc_restart_call_prepare(task); |
5971 | } | 6002 | } |
5972 | } | 6003 | } |
@@ -7341,7 +7372,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr | |||
7341 | int ret = 0; | 7372 | int ret = 0; |
7342 | 7373 | ||
7343 | if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) | 7374 | if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) |
7344 | return 0; | 7375 | return -EAGAIN; |
7345 | task = _nfs41_proc_sequence(clp, cred, false); | 7376 | task = _nfs41_proc_sequence(clp, cred, false); |
7346 | if (IS_ERR(task)) | 7377 | if (IS_ERR(task)) |
7347 | ret = PTR_ERR(task); | 7378 | ret = PTR_ERR(task); |
@@ -7571,14 +7602,19 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | |||
7571 | } else { | 7602 | } else { |
7572 | LIST_HEAD(head); | 7603 | LIST_HEAD(head); |
7573 | 7604 | ||
7605 | /* | ||
7606 | * Mark the bad layout state as invalid, then retry | ||
7607 | * with the current stateid. | ||
7608 | */ | ||
7574 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); | 7609 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); |
7575 | spin_unlock(&inode->i_lock); | 7610 | spin_unlock(&inode->i_lock); |
7576 | /* Mark the bad layout state as invalid, then | ||
7577 | * retry using the open stateid. */ | ||
7578 | pnfs_free_lseg_list(&head); | 7611 | pnfs_free_lseg_list(&head); |
7612 | |||
7613 | task->tk_status = 0; | ||
7614 | rpc_restart_call_prepare(task); | ||
7579 | } | 7615 | } |
7580 | } | 7616 | } |
7581 | if (nfs4_async_handle_error(task, server, state) == -EAGAIN) | 7617 | if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) |
7582 | rpc_restart_call_prepare(task); | 7618 | rpc_restart_call_prepare(task); |
7583 | out: | 7619 | out: |
7584 | dprintk("<-- %s\n", __func__); | 7620 | dprintk("<-- %s\n", __func__); |
@@ -7738,7 +7774,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
7738 | case 0: | 7774 | case 0: |
7739 | break; | 7775 | break; |
7740 | case -NFS4ERR_DELAY: | 7776 | case -NFS4ERR_DELAY: |
7741 | if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN) | 7777 | if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) |
7742 | break; | 7778 | break; |
7743 | rpc_restart_call_prepare(task); | 7779 | rpc_restart_call_prepare(task); |
7744 | return; | 7780 | return; |
@@ -7797,54 +7833,6 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | |||
7797 | return status; | 7833 | return status; |
7798 | } | 7834 | } |
7799 | 7835 | ||
7800 | /* | ||
7801 | * Retrieve the list of Data Server devices from the MDS. | ||
7802 | */ | ||
7803 | static int _nfs4_getdevicelist(struct nfs_server *server, | ||
7804 | const struct nfs_fh *fh, | ||
7805 | struct pnfs_devicelist *devlist) | ||
7806 | { | ||
7807 | struct nfs4_getdevicelist_args args = { | ||
7808 | .fh = fh, | ||
7809 | .layoutclass = server->pnfs_curr_ld->id, | ||
7810 | }; | ||
7811 | struct nfs4_getdevicelist_res res = { | ||
7812 | .devlist = devlist, | ||
7813 | }; | ||
7814 | struct rpc_message msg = { | ||
7815 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST], | ||
7816 | .rpc_argp = &args, | ||
7817 | .rpc_resp = &res, | ||
7818 | }; | ||
7819 | int status; | ||
7820 | |||
7821 | dprintk("--> %s\n", __func__); | ||
7822 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, | ||
7823 | &res.seq_res, 0); | ||
7824 | dprintk("<-- %s status=%d\n", __func__, status); | ||
7825 | return status; | ||
7826 | } | ||
7827 | |||
7828 | int nfs4_proc_getdevicelist(struct nfs_server *server, | ||
7829 | const struct nfs_fh *fh, | ||
7830 | struct pnfs_devicelist *devlist) | ||
7831 | { | ||
7832 | struct nfs4_exception exception = { }; | ||
7833 | int err; | ||
7834 | |||
7835 | do { | ||
7836 | err = nfs4_handle_exception(server, | ||
7837 | _nfs4_getdevicelist(server, fh, devlist), | ||
7838 | &exception); | ||
7839 | } while (exception.retry); | ||
7840 | |||
7841 | dprintk("%s: err=%d, num_devs=%u\n", __func__, | ||
7842 | err, devlist->num_devs); | ||
7843 | |||
7844 | return err; | ||
7845 | } | ||
7846 | EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); | ||
7847 | |||
7848 | static int | 7836 | static int |
7849 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, | 7837 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, |
7850 | struct pnfs_device *pdev, | 7838 | struct pnfs_device *pdev, |
@@ -7917,7 +7905,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) | |||
7917 | case 0: | 7905 | case 0: |
7918 | break; | 7906 | break; |
7919 | default: | 7907 | default: |
7920 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | 7908 | if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) { |
7921 | rpc_restart_call_prepare(task); | 7909 | rpc_restart_call_prepare(task); |
7922 | return; | 7910 | return; |
7923 | } | 7911 | } |
@@ -8213,7 +8201,7 @@ static void nfs41_free_stateid_done(struct rpc_task *task, void *calldata) | |||
8213 | 8201 | ||
8214 | switch (task->tk_status) { | 8202 | switch (task->tk_status) { |
8215 | case -NFS4ERR_DELAY: | 8203 | case -NFS4ERR_DELAY: |
8216 | if (nfs4_async_handle_error(task, data->server, NULL) == -EAGAIN) | 8204 | if (nfs4_async_handle_error(task, data->server, NULL, NULL) == -EAGAIN) |
8217 | rpc_restart_call_prepare(task); | 8205 | rpc_restart_call_prepare(task); |
8218 | } | 8206 | } |
8219 | } | 8207 | } |
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 1720d32ffa54..e1ba58c3d1ad 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c | |||
@@ -88,10 +88,18 @@ nfs4_renew_state(struct work_struct *work) | |||
88 | } | 88 | } |
89 | nfs_expire_all_delegations(clp); | 89 | nfs_expire_all_delegations(clp); |
90 | } else { | 90 | } else { |
91 | int ret; | ||
92 | |||
91 | /* Queue an asynchronous RENEW. */ | 93 | /* Queue an asynchronous RENEW. */ |
92 | ops->sched_state_renewal(clp, cred, renew_flags); | 94 | ret = ops->sched_state_renewal(clp, cred, renew_flags); |
93 | put_rpccred(cred); | 95 | put_rpccred(cred); |
94 | goto out_exp; | 96 | switch (ret) { |
97 | default: | ||
98 | goto out_exp; | ||
99 | case -EAGAIN: | ||
100 | case -ENOMEM: | ||
101 | break; | ||
102 | } | ||
95 | } | 103 | } |
96 | } else { | 104 | } else { |
97 | dprintk("%s: failed to call renewd. Reason: lease not expired \n", | 105 | dprintk("%s: failed to call renewd. Reason: lease not expired \n", |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a043f618cd5a..5194933ed419 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -799,18 +799,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) | |||
799 | return NULL; | 799 | return NULL; |
800 | } | 800 | } |
801 | 801 | ||
802 | static void | ||
803 | free_lock_state_work(struct work_struct *work) | ||
804 | { | ||
805 | struct nfs4_lock_state *lsp = container_of(work, | ||
806 | struct nfs4_lock_state, ls_release); | ||
807 | struct nfs4_state *state = lsp->ls_state; | ||
808 | struct nfs_server *server = state->owner->so_server; | ||
809 | struct nfs_client *clp = server->nfs_client; | ||
810 | |||
811 | clp->cl_mvops->free_lock_state(server, lsp); | ||
812 | } | ||
813 | |||
814 | /* | 802 | /* |
815 | * Return a compatible lock_state. If no initialized lock_state structure | 803 | * Return a compatible lock_state. If no initialized lock_state structure |
816 | * exists, return an uninitialized one. | 804 | * exists, return an uninitialized one. |
@@ -832,7 +820,6 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f | |||
832 | if (lsp->ls_seqid.owner_id < 0) | 820 | if (lsp->ls_seqid.owner_id < 0) |
833 | goto out_free; | 821 | goto out_free; |
834 | INIT_LIST_HEAD(&lsp->ls_locks); | 822 | INIT_LIST_HEAD(&lsp->ls_locks); |
835 | INIT_WORK(&lsp->ls_release, free_lock_state_work); | ||
836 | return lsp; | 823 | return lsp; |
837 | out_free: | 824 | out_free: |
838 | kfree(lsp); | 825 | kfree(lsp); |
@@ -896,12 +883,13 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) | |||
896 | if (list_empty(&state->lock_states)) | 883 | if (list_empty(&state->lock_states)) |
897 | clear_bit(LK_STATE_IN_USE, &state->flags); | 884 | clear_bit(LK_STATE_IN_USE, &state->flags); |
898 | spin_unlock(&state->state_lock); | 885 | spin_unlock(&state->state_lock); |
899 | if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) | 886 | server = state->owner->so_server; |
900 | queue_work(nfsiod_workqueue, &lsp->ls_release); | 887 | if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { |
901 | else { | 888 | struct nfs_client *clp = server->nfs_client; |
902 | server = state->owner->so_server; | 889 | |
890 | clp->cl_mvops->free_lock_state(server, lsp); | ||
891 | } else | ||
903 | nfs4_free_lock_state(server, lsp); | 892 | nfs4_free_lock_state(server, lsp); |
904 | } | ||
905 | } | 893 | } |
906 | 894 | ||
907 | static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) | 895 | static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) |
@@ -1717,7 +1705,8 @@ restart: | |||
1717 | if (status < 0) { | 1705 | if (status < 0) { |
1718 | set_bit(ops->owner_flag_bit, &sp->so_flags); | 1706 | set_bit(ops->owner_flag_bit, &sp->so_flags); |
1719 | nfs4_put_state_owner(sp); | 1707 | nfs4_put_state_owner(sp); |
1720 | return nfs4_recovery_handle_error(clp, status); | 1708 | status = nfs4_recovery_handle_error(clp, status); |
1709 | return (status != 0) ? status : -EAGAIN; | ||
1721 | } | 1710 | } |
1722 | 1711 | ||
1723 | nfs4_put_state_owner(sp); | 1712 | nfs4_put_state_owner(sp); |
@@ -1726,7 +1715,7 @@ restart: | |||
1726 | spin_unlock(&clp->cl_lock); | 1715 | spin_unlock(&clp->cl_lock); |
1727 | } | 1716 | } |
1728 | rcu_read_unlock(); | 1717 | rcu_read_unlock(); |
1729 | return status; | 1718 | return 0; |
1730 | } | 1719 | } |
1731 | 1720 | ||
1732 | static int nfs4_check_lease(struct nfs_client *clp) | 1721 | static int nfs4_check_lease(struct nfs_client *clp) |
@@ -1773,7 +1762,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) | |||
1773 | break; | 1762 | break; |
1774 | case -NFS4ERR_STALE_CLIENTID: | 1763 | case -NFS4ERR_STALE_CLIENTID: |
1775 | clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); | 1764 | clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); |
1776 | nfs4_state_clear_reclaim_reboot(clp); | ||
1777 | nfs4_state_start_reclaim_reboot(clp); | 1765 | nfs4_state_start_reclaim_reboot(clp); |
1778 | break; | 1766 | break; |
1779 | case -NFS4ERR_CLID_INUSE: | 1767 | case -NFS4ERR_CLID_INUSE: |
@@ -2357,6 +2345,7 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
2357 | status = nfs4_check_lease(clp); | 2345 | status = nfs4_check_lease(clp); |
2358 | if (status < 0) | 2346 | if (status < 0) |
2359 | goto out_error; | 2347 | goto out_error; |
2348 | continue; | ||
2360 | } | 2349 | } |
2361 | 2350 | ||
2362 | if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) { | 2351 | if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) { |
@@ -2378,14 +2367,11 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
2378 | section = "reclaim reboot"; | 2367 | section = "reclaim reboot"; |
2379 | status = nfs4_do_reclaim(clp, | 2368 | status = nfs4_do_reclaim(clp, |
2380 | clp->cl_mvops->reboot_recovery_ops); | 2369 | clp->cl_mvops->reboot_recovery_ops); |
2381 | if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || | 2370 | if (status == -EAGAIN) |
2382 | test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) | ||
2383 | continue; | ||
2384 | nfs4_state_end_reclaim_reboot(clp); | ||
2385 | if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) | ||
2386 | continue; | 2371 | continue; |
2387 | if (status < 0) | 2372 | if (status < 0) |
2388 | goto out_error; | 2373 | goto out_error; |
2374 | nfs4_state_end_reclaim_reboot(clp); | ||
2389 | } | 2375 | } |
2390 | 2376 | ||
2391 | /* Now recover expired state... */ | 2377 | /* Now recover expired state... */ |
@@ -2393,9 +2379,7 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
2393 | section = "reclaim nograce"; | 2379 | section = "reclaim nograce"; |
2394 | status = nfs4_do_reclaim(clp, | 2380 | status = nfs4_do_reclaim(clp, |
2395 | clp->cl_mvops->nograce_recovery_ops); | 2381 | clp->cl_mvops->nograce_recovery_ops); |
2396 | if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || | 2382 | if (status == -EAGAIN) |
2397 | test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) || | ||
2398 | test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) | ||
2399 | continue; | 2383 | continue; |
2400 | if (status < 0) | 2384 | if (status < 0) |
2401 | goto out_error; | 2385 | goto out_error; |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e13b59d8d9aa..005d03c5d274 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -362,25 +362,19 @@ static int nfs4_stat_to_errno(int); | |||
362 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) | 362 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) |
363 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) | 363 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) |
364 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) | 364 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) |
365 | #define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \ | 365 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + \ |
366 | encode_verifier_maxsz) | 366 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE) + \ |
367 | #define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + \ | 367 | 1 /* layout type */ + \ |
368 | 2 /* nfs_cookie4 gdlr_cookie */ + \ | 368 | 1 /* maxcount */ + \ |
369 | decode_verifier_maxsz \ | 369 | 1 /* bitmap size */ + \ |
370 | /* verifier4 gdlr_verifier */ + \ | 370 | 1 /* notification bitmap length */ + \ |
371 | 1 /* gdlr_deviceid_list count */ + \ | 371 | 1 /* notification bitmap, word 0 */) |
372 | XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM * \ | ||
373 | NFS4_DEVICEID4_SIZE) \ | ||
374 | /* gdlr_deviceid_list */ + \ | ||
375 | 1 /* bool gdlr_eof */) | ||
376 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ | ||
377 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) | ||
378 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ | 372 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ |
379 | 1 /* layout type */ + \ | 373 | 1 /* layout type */ + \ |
380 | 1 /* opaque devaddr4 length */ + \ | 374 | 1 /* opaque devaddr4 length */ + \ |
381 | /* devaddr4 payload is read into page */ \ | 375 | /* devaddr4 payload is read into page */ \ |
382 | 1 /* notification bitmap length */ + \ | 376 | 1 /* notification bitmap length */ + \ |
383 | 1 /* notification bitmap */) | 377 | 1 /* notification bitmap, word 0 */) |
384 | #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ | 378 | #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ |
385 | encode_stateid_maxsz) | 379 | encode_stateid_maxsz) |
386 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ | 380 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ |
@@ -395,7 +389,10 @@ static int nfs4_stat_to_errno(int); | |||
395 | 2 /* last byte written */ + \ | 389 | 2 /* last byte written */ + \ |
396 | 1 /* nt_timechanged (false) */ + \ | 390 | 1 /* nt_timechanged (false) */ + \ |
397 | 1 /* layoutupdate4 layout type */ + \ | 391 | 1 /* layoutupdate4 layout type */ + \ |
398 | 1 /* NULL filelayout layoutupdate4 payload */) | 392 | 1 /* layoutupdate4 opaqueue len */) |
393 | /* the actual content of layoutupdate4 should | ||
394 | be allocated by drivers and spliced in | ||
395 | using xdr_write_pages */ | ||
399 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) | 396 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) |
400 | #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ | 397 | #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ |
401 | encode_stateid_maxsz + \ | 398 | encode_stateid_maxsz + \ |
@@ -809,14 +806,6 @@ static int nfs4_stat_to_errno(int); | |||
809 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ | 806 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ |
810 | decode_sequence_maxsz + \ | 807 | decode_sequence_maxsz + \ |
811 | decode_reclaim_complete_maxsz) | 808 | decode_reclaim_complete_maxsz) |
812 | #define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \ | ||
813 | encode_sequence_maxsz + \ | ||
814 | encode_putfh_maxsz + \ | ||
815 | encode_getdevicelist_maxsz) | ||
816 | #define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \ | ||
817 | decode_sequence_maxsz + \ | ||
818 | decode_putfh_maxsz + \ | ||
819 | decode_getdevicelist_maxsz) | ||
820 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ | 809 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ |
821 | encode_sequence_maxsz +\ | 810 | encode_sequence_maxsz +\ |
822 | encode_getdeviceinfo_maxsz) | 811 | encode_getdeviceinfo_maxsz) |
@@ -1927,24 +1916,6 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1927 | 1916 | ||
1928 | #ifdef CONFIG_NFS_V4_1 | 1917 | #ifdef CONFIG_NFS_V4_1 |
1929 | static void | 1918 | static void |
1930 | encode_getdevicelist(struct xdr_stream *xdr, | ||
1931 | const struct nfs4_getdevicelist_args *args, | ||
1932 | struct compound_hdr *hdr) | ||
1933 | { | ||
1934 | __be32 *p; | ||
1935 | nfs4_verifier dummy = { | ||
1936 | .data = "dummmmmy", | ||
1937 | }; | ||
1938 | |||
1939 | encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr); | ||
1940 | p = reserve_space(xdr, 16); | ||
1941 | *p++ = cpu_to_be32(args->layoutclass); | ||
1942 | *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); | ||
1943 | xdr_encode_hyper(p, 0ULL); /* cookie */ | ||
1944 | encode_nfs4_verifier(xdr, &dummy); | ||
1945 | } | ||
1946 | |||
1947 | static void | ||
1948 | encode_getdeviceinfo(struct xdr_stream *xdr, | 1919 | encode_getdeviceinfo(struct xdr_stream *xdr, |
1949 | const struct nfs4_getdeviceinfo_args *args, | 1920 | const struct nfs4_getdeviceinfo_args *args, |
1950 | struct compound_hdr *hdr) | 1921 | struct compound_hdr *hdr) |
@@ -1952,12 +1923,15 @@ encode_getdeviceinfo(struct xdr_stream *xdr, | |||
1952 | __be32 *p; | 1923 | __be32 *p; |
1953 | 1924 | ||
1954 | encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr); | 1925 | encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr); |
1955 | p = reserve_space(xdr, 12 + NFS4_DEVICEID4_SIZE); | 1926 | p = reserve_space(xdr, NFS4_DEVICEID4_SIZE + 4 + 4); |
1956 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, | 1927 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, |
1957 | NFS4_DEVICEID4_SIZE); | 1928 | NFS4_DEVICEID4_SIZE); |
1958 | *p++ = cpu_to_be32(args->pdev->layout_type); | 1929 | *p++ = cpu_to_be32(args->pdev->layout_type); |
1959 | *p++ = cpu_to_be32(args->pdev->maxcount); /* gdia_maxcount */ | 1930 | *p++ = cpu_to_be32(args->pdev->maxcount); /* gdia_maxcount */ |
1960 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ | 1931 | |
1932 | p = reserve_space(xdr, 4 + 4); | ||
1933 | *p++ = cpu_to_be32(1); /* bitmap length */ | ||
1934 | *p++ = cpu_to_be32(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE); | ||
1961 | } | 1935 | } |
1962 | 1936 | ||
1963 | static void | 1937 | static void |
@@ -1990,7 +1964,7 @@ encode_layoutget(struct xdr_stream *xdr, | |||
1990 | static int | 1964 | static int |
1991 | encode_layoutcommit(struct xdr_stream *xdr, | 1965 | encode_layoutcommit(struct xdr_stream *xdr, |
1992 | struct inode *inode, | 1966 | struct inode *inode, |
1993 | const struct nfs4_layoutcommit_args *args, | 1967 | struct nfs4_layoutcommit_args *args, |
1994 | struct compound_hdr *hdr) | 1968 | struct compound_hdr *hdr) |
1995 | { | 1969 | { |
1996 | __be32 *p; | 1970 | __be32 *p; |
@@ -2011,11 +1985,16 @@ encode_layoutcommit(struct xdr_stream *xdr, | |||
2011 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ | 1985 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ |
2012 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ | 1986 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ |
2013 | 1987 | ||
2014 | if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) | 1988 | if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) { |
2015 | NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( | 1989 | NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( |
2016 | NFS_I(inode)->layout, xdr, args); | 1990 | NFS_I(inode)->layout, xdr, args); |
2017 | else | 1991 | } else { |
2018 | encode_uint32(xdr, 0); /* no layout-type payload */ | 1992 | encode_uint32(xdr, args->layoutupdate_len); |
1993 | if (args->layoutupdate_pages) { | ||
1994 | xdr_write_pages(xdr, args->layoutupdate_pages, 0, | ||
1995 | args->layoutupdate_len); | ||
1996 | } | ||
1997 | } | ||
2019 | 1998 | ||
2020 | return 0; | 1999 | return 0; |
2021 | } | 2000 | } |
@@ -2893,24 +2872,6 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, | |||
2893 | } | 2872 | } |
2894 | 2873 | ||
2895 | /* | 2874 | /* |
2896 | * Encode GETDEVICELIST request | ||
2897 | */ | ||
2898 | static void nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, | ||
2899 | struct xdr_stream *xdr, | ||
2900 | struct nfs4_getdevicelist_args *args) | ||
2901 | { | ||
2902 | struct compound_hdr hdr = { | ||
2903 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2904 | }; | ||
2905 | |||
2906 | encode_compound_hdr(xdr, req, &hdr); | ||
2907 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2908 | encode_putfh(xdr, args->fh, &hdr); | ||
2909 | encode_getdevicelist(xdr, args, &hdr); | ||
2910 | encode_nops(&hdr); | ||
2911 | } | ||
2912 | |||
2913 | /* | ||
2914 | * Encode GETDEVICEINFO request | 2875 | * Encode GETDEVICEINFO request |
2915 | */ | 2876 | */ |
2916 | static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, | 2877 | static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, |
@@ -5765,54 +5726,6 @@ out_overflow: | |||
5765 | } | 5726 | } |
5766 | 5727 | ||
5767 | #if defined(CONFIG_NFS_V4_1) | 5728 | #if defined(CONFIG_NFS_V4_1) |
5768 | /* | ||
5769 | * TODO: Need to handle case when EOF != true; | ||
5770 | */ | ||
5771 | static int decode_getdevicelist(struct xdr_stream *xdr, | ||
5772 | struct pnfs_devicelist *res) | ||
5773 | { | ||
5774 | __be32 *p; | ||
5775 | int status, i; | ||
5776 | nfs4_verifier verftemp; | ||
5777 | |||
5778 | status = decode_op_hdr(xdr, OP_GETDEVICELIST); | ||
5779 | if (status) | ||
5780 | return status; | ||
5781 | |||
5782 | p = xdr_inline_decode(xdr, 8 + 8 + 4); | ||
5783 | if (unlikely(!p)) | ||
5784 | goto out_overflow; | ||
5785 | |||
5786 | /* TODO: Skip cookie for now */ | ||
5787 | p += 2; | ||
5788 | |||
5789 | /* Read verifier */ | ||
5790 | p = xdr_decode_opaque_fixed(p, verftemp.data, NFS4_VERIFIER_SIZE); | ||
5791 | |||
5792 | res->num_devs = be32_to_cpup(p); | ||
5793 | |||
5794 | dprintk("%s: num_dev %d\n", __func__, res->num_devs); | ||
5795 | |||
5796 | if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { | ||
5797 | printk(KERN_ERR "NFS: %s too many result dev_num %u\n", | ||
5798 | __func__, res->num_devs); | ||
5799 | return -EIO; | ||
5800 | } | ||
5801 | |||
5802 | p = xdr_inline_decode(xdr, | ||
5803 | res->num_devs * NFS4_DEVICEID4_SIZE + 4); | ||
5804 | if (unlikely(!p)) | ||
5805 | goto out_overflow; | ||
5806 | for (i = 0; i < res->num_devs; i++) | ||
5807 | p = xdr_decode_opaque_fixed(p, res->dev_id[i].data, | ||
5808 | NFS4_DEVICEID4_SIZE); | ||
5809 | res->eof = be32_to_cpup(p); | ||
5810 | return 0; | ||
5811 | out_overflow: | ||
5812 | print_overflow_msg(__func__, xdr); | ||
5813 | return -EIO; | ||
5814 | } | ||
5815 | |||
5816 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | 5729 | static int decode_getdeviceinfo(struct xdr_stream *xdr, |
5817 | struct pnfs_device *pdev) | 5730 | struct pnfs_device *pdev) |
5818 | { | 5731 | { |
@@ -5862,9 +5775,16 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, | |||
5862 | p = xdr_inline_decode(xdr, 4 * len); | 5775 | p = xdr_inline_decode(xdr, 4 * len); |
5863 | if (unlikely(!p)) | 5776 | if (unlikely(!p)) |
5864 | goto out_overflow; | 5777 | goto out_overflow; |
5865 | for (i = 0; i < len; i++, p++) { | 5778 | |
5866 | if (be32_to_cpup(p)) { | 5779 | if (be32_to_cpup(p++) & |
5867 | dprintk("%s: notifications not supported\n", | 5780 | ~(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE)) { |
5781 | dprintk("%s: unsupported notification\n", | ||
5782 | __func__); | ||
5783 | } | ||
5784 | |||
5785 | for (i = 1; i < len; i++) { | ||
5786 | if (be32_to_cpup(p++)) { | ||
5787 | dprintk("%s: unsupported notification\n", | ||
5868 | __func__); | 5788 | __func__); |
5869 | return -EIO; | 5789 | return -EIO; |
5870 | } | 5790 | } |
@@ -7097,32 +7017,6 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, | |||
7097 | } | 7017 | } |
7098 | 7018 | ||
7099 | /* | 7019 | /* |
7100 | * Decode GETDEVICELIST response | ||
7101 | */ | ||
7102 | static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, | ||
7103 | struct xdr_stream *xdr, | ||
7104 | struct nfs4_getdevicelist_res *res) | ||
7105 | { | ||
7106 | struct compound_hdr hdr; | ||
7107 | int status; | ||
7108 | |||
7109 | dprintk("encoding getdevicelist!\n"); | ||
7110 | |||
7111 | status = decode_compound_hdr(xdr, &hdr); | ||
7112 | if (status != 0) | ||
7113 | goto out; | ||
7114 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
7115 | if (status != 0) | ||
7116 | goto out; | ||
7117 | status = decode_putfh(xdr); | ||
7118 | if (status != 0) | ||
7119 | goto out; | ||
7120 | status = decode_getdevicelist(xdr, res->devlist); | ||
7121 | out: | ||
7122 | return status; | ||
7123 | } | ||
7124 | |||
7125 | /* | ||
7126 | * Decode GETDEVINFO response | 7020 | * Decode GETDEVINFO response |
7127 | */ | 7021 | */ |
7128 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, | 7022 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, |
@@ -7490,7 +7384,6 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
7490 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), | 7384 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), |
7491 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), | 7385 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), |
7492 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), | 7386 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), |
7493 | PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), | ||
7494 | PROC(BIND_CONN_TO_SESSION, | 7387 | PROC(BIND_CONN_TO_SESSION, |
7495 | enc_bind_conn_to_session, dec_bind_conn_to_session), | 7388 | enc_bind_conn_to_session, dec_bind_conn_to_session), |
7496 | PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), | 7389 | PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index ae05278b3761..c6e4bda63000 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -60,52 +60,6 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d) | |||
60 | kfree(de); | 60 | kfree(de); |
61 | } | 61 | } |
62 | 62 | ||
63 | static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss, | ||
64 | const struct nfs4_deviceid *d_id) | ||
65 | { | ||
66 | struct nfs4_deviceid_node *d; | ||
67 | struct objio_dev_ent *de; | ||
68 | |||
69 | d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id); | ||
70 | if (!d) | ||
71 | return NULL; | ||
72 | |||
73 | de = container_of(d, struct objio_dev_ent, id_node); | ||
74 | return de; | ||
75 | } | ||
76 | |||
77 | static struct objio_dev_ent * | ||
78 | _dev_list_add(const struct nfs_server *nfss, | ||
79 | const struct nfs4_deviceid *d_id, struct osd_dev *od, | ||
80 | gfp_t gfp_flags) | ||
81 | { | ||
82 | struct nfs4_deviceid_node *d; | ||
83 | struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags); | ||
84 | struct objio_dev_ent *n; | ||
85 | |||
86 | if (!de) { | ||
87 | dprintk("%s: -ENOMEM od=%p\n", __func__, od); | ||
88 | return NULL; | ||
89 | } | ||
90 | |||
91 | dprintk("%s: Adding od=%p\n", __func__, od); | ||
92 | nfs4_init_deviceid_node(&de->id_node, | ||
93 | nfss->pnfs_curr_ld, | ||
94 | nfss->nfs_client, | ||
95 | d_id); | ||
96 | de->od.od = od; | ||
97 | |||
98 | d = nfs4_insert_deviceid_node(&de->id_node); | ||
99 | n = container_of(d, struct objio_dev_ent, id_node); | ||
100 | if (n != de) { | ||
101 | dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od); | ||
102 | objio_free_deviceid_node(&de->id_node); | ||
103 | de = n; | ||
104 | } | ||
105 | |||
106 | return de; | ||
107 | } | ||
108 | |||
109 | struct objio_segment { | 63 | struct objio_segment { |
110 | struct pnfs_layout_segment lseg; | 64 | struct pnfs_layout_segment lseg; |
111 | 65 | ||
@@ -130,29 +84,24 @@ struct objio_state { | |||
130 | 84 | ||
131 | /* Send and wait for a get_device_info of devices in the layout, | 85 | /* Send and wait for a get_device_info of devices in the layout, |
132 | then look them up with the osd_initiator library */ | 86 | then look them up with the osd_initiator library */ |
133 | static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, | 87 | struct nfs4_deviceid_node * |
134 | struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id, | 88 | objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, |
135 | gfp_t gfp_flags) | 89 | gfp_t gfp_flags) |
136 | { | 90 | { |
137 | struct pnfs_osd_deviceaddr *deviceaddr; | 91 | struct pnfs_osd_deviceaddr *deviceaddr; |
138 | struct objio_dev_ent *ode; | 92 | struct objio_dev_ent *ode = NULL; |
139 | struct osd_dev *od; | 93 | struct osd_dev *od; |
140 | struct osd_dev_info odi; | 94 | struct osd_dev_info odi; |
141 | bool retry_flag = true; | 95 | bool retry_flag = true; |
96 | __be32 *p; | ||
142 | int err; | 97 | int err; |
143 | 98 | ||
144 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); | 99 | deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags); |
145 | if (ode) { | 100 | if (!deviceaddr) |
146 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ | 101 | return NULL; |
147 | return 0; | ||
148 | } | ||
149 | 102 | ||
150 | err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); | 103 | p = page_address(pdev->pages[0]); |
151 | if (unlikely(err)) { | 104 | pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p); |
152 | dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", | ||
153 | __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); | ||
154 | return err; | ||
155 | } | ||
156 | 105 | ||
157 | odi.systemid_len = deviceaddr->oda_systemid.len; | 106 | odi.systemid_len = deviceaddr->oda_systemid.len; |
158 | if (odi.systemid_len > sizeof(odi.systemid)) { | 107 | if (odi.systemid_len > sizeof(odi.systemid)) { |
@@ -188,14 +137,24 @@ retry_lookup: | |||
188 | goto out; | 137 | goto out; |
189 | } | 138 | } |
190 | 139 | ||
191 | ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, | ||
192 | gfp_flags); | ||
193 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ | ||
194 | dprintk("Adding new dev_id(%llx:%llx)\n", | 140 | dprintk("Adding new dev_id(%llx:%llx)\n", |
195 | _DEVID_LO(d_id), _DEVID_HI(d_id)); | 141 | _DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id)); |
142 | |||
143 | ode = kzalloc(sizeof(*ode), gfp_flags); | ||
144 | if (!ode) { | ||
145 | dprintk("%s: -ENOMEM od=%p\n", __func__, od); | ||
146 | goto out; | ||
147 | } | ||
148 | |||
149 | nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id); | ||
150 | kfree(deviceaddr); | ||
151 | |||
152 | ode->od.od = od; | ||
153 | return &ode->id_node; | ||
154 | |||
196 | out: | 155 | out: |
197 | objlayout_put_deviceinfo(deviceaddr); | 156 | kfree(deviceaddr); |
198 | return err; | 157 | return NULL; |
199 | } | 158 | } |
200 | 159 | ||
201 | static void copy_single_comp(struct ore_components *oc, unsigned c, | 160 | static void copy_single_comp(struct ore_components *oc, unsigned c, |
@@ -254,6 +213,7 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |||
254 | struct xdr_stream *xdr, | 213 | struct xdr_stream *xdr, |
255 | gfp_t gfp_flags) | 214 | gfp_t gfp_flags) |
256 | { | 215 | { |
216 | struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode); | ||
257 | struct objio_segment *objio_seg; | 217 | struct objio_segment *objio_seg; |
258 | struct pnfs_osd_xdr_decode_layout_iter iter; | 218 | struct pnfs_osd_xdr_decode_layout_iter iter; |
259 | struct pnfs_osd_layout layout; | 219 | struct pnfs_osd_layout layout; |
@@ -283,13 +243,21 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |||
283 | objio_seg->oc.first_dev = layout.olo_comps_index; | 243 | objio_seg->oc.first_dev = layout.olo_comps_index; |
284 | cur_comp = 0; | 244 | cur_comp = 0; |
285 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { | 245 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { |
246 | struct nfs4_deviceid_node *d; | ||
247 | struct objio_dev_ent *ode; | ||
248 | |||
286 | copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); | 249 | copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); |
287 | err = objio_devices_lookup(pnfslay, objio_seg, cur_comp, | 250 | |
288 | &src_comp.oc_object_id.oid_device_id, | 251 | d = nfs4_find_get_deviceid(server, |
289 | gfp_flags); | 252 | &src_comp.oc_object_id.oid_device_id, |
290 | if (err) | 253 | pnfslay->plh_lc_cred, gfp_flags); |
254 | if (!d) { | ||
255 | err = -ENXIO; | ||
291 | goto err; | 256 | goto err; |
292 | ++cur_comp; | 257 | } |
258 | |||
259 | ode = container_of(d, struct objio_dev_ent, id_node); | ||
260 | objio_seg->oc.ods[cur_comp++] = &ode->od; | ||
293 | } | 261 | } |
294 | /* pnfs_osd_xdr_decode_layout_comp returns false on error */ | 262 | /* pnfs_osd_xdr_decode_layout_comp returns false on error */ |
295 | if (unlikely(err)) | 263 | if (unlikely(err)) |
@@ -653,6 +621,7 @@ static struct pnfs_layoutdriver_type objlayout_type = { | |||
653 | .flags = PNFS_LAYOUTRET_ON_SETATTR | | 621 | .flags = PNFS_LAYOUTRET_ON_SETATTR | |
654 | PNFS_LAYOUTRET_ON_ERROR, | 622 | PNFS_LAYOUTRET_ON_ERROR, |
655 | 623 | ||
624 | .max_deviceinfo_size = PAGE_SIZE, | ||
656 | .owner = THIS_MODULE, | 625 | .owner = THIS_MODULE, |
657 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, | 626 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, |
658 | .free_layout_hdr = objlayout_free_layout_hdr, | 627 | .free_layout_hdr = objlayout_free_layout_hdr, |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 697a16d11fac..c89357c7a914 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -574,76 +574,6 @@ loop_done: | |||
574 | dprintk("%s: Return\n", __func__); | 574 | dprintk("%s: Return\n", __func__); |
575 | } | 575 | } |
576 | 576 | ||
577 | |||
578 | /* | ||
579 | * Get Device Info API for io engines | ||
580 | */ | ||
581 | struct objlayout_deviceinfo { | ||
582 | struct page *page; | ||
583 | struct pnfs_osd_deviceaddr da; /* This must be last */ | ||
584 | }; | ||
585 | |||
586 | /* Initialize and call nfs_getdeviceinfo, then decode and return a | ||
587 | * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo() | ||
588 | * should be called. | ||
589 | */ | ||
590 | int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | ||
591 | struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, | ||
592 | gfp_t gfp_flags) | ||
593 | { | ||
594 | struct objlayout_deviceinfo *odi; | ||
595 | struct pnfs_device pd; | ||
596 | struct page *page, **pages; | ||
597 | u32 *p; | ||
598 | int err; | ||
599 | |||
600 | page = alloc_page(gfp_flags); | ||
601 | if (!page) | ||
602 | return -ENOMEM; | ||
603 | |||
604 | pages = &page; | ||
605 | pd.pages = pages; | ||
606 | |||
607 | memcpy(&pd.dev_id, d_id, sizeof(*d_id)); | ||
608 | pd.layout_type = LAYOUT_OSD2_OBJECTS; | ||
609 | pd.pages = &page; | ||
610 | pd.pgbase = 0; | ||
611 | pd.pglen = PAGE_SIZE; | ||
612 | pd.mincount = 0; | ||
613 | pd.maxcount = PAGE_SIZE; | ||
614 | |||
615 | err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd, | ||
616 | pnfslay->plh_lc_cred); | ||
617 | dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); | ||
618 | if (err) | ||
619 | goto err_out; | ||
620 | |||
621 | p = page_address(page); | ||
622 | odi = kzalloc(sizeof(*odi), gfp_flags); | ||
623 | if (!odi) { | ||
624 | err = -ENOMEM; | ||
625 | goto err_out; | ||
626 | } | ||
627 | pnfs_osd_xdr_decode_deviceaddr(&odi->da, p); | ||
628 | odi->page = page; | ||
629 | *deviceaddr = &odi->da; | ||
630 | return 0; | ||
631 | |||
632 | err_out: | ||
633 | __free_page(page); | ||
634 | return err; | ||
635 | } | ||
636 | |||
637 | void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) | ||
638 | { | ||
639 | struct objlayout_deviceinfo *odi = container_of(deviceaddr, | ||
640 | struct objlayout_deviceinfo, | ||
641 | da); | ||
642 | |||
643 | __free_page(odi->page); | ||
644 | kfree(odi); | ||
645 | } | ||
646 | |||
647 | enum { | 577 | enum { |
648 | OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64, | 578 | OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64, |
649 | OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1, | 579 | OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1, |
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index fd13f1d2f136..3a0828d57339 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h | |||
@@ -149,11 +149,6 @@ extern void objlayout_read_done(struct objlayout_io_res *oir, | |||
149 | extern void objlayout_write_done(struct objlayout_io_res *oir, | 149 | extern void objlayout_write_done(struct objlayout_io_res *oir, |
150 | ssize_t status, bool sync); | 150 | ssize_t status, bool sync); |
151 | 151 | ||
152 | extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | ||
153 | struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, | ||
154 | gfp_t gfp_flags); | ||
155 | extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr); | ||
156 | |||
157 | /* | 152 | /* |
158 | * exported generic objects function vectors | 153 | * exported generic objects function vectors |
159 | */ | 154 | */ |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ba491926df5f..94e16ec88312 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -116,7 +116,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c) | |||
116 | if (atomic_read(&c->io_count) == 0) | 116 | if (atomic_read(&c->io_count) == 0) |
117 | break; | 117 | break; |
118 | ret = nfs_wait_bit_killable(&q.key); | 118 | ret = nfs_wait_bit_killable(&q.key); |
119 | } while (atomic_read(&c->io_count) != 0); | 119 | } while (atomic_read(&c->io_count) != 0 && !ret); |
120 | finish_wait(wq, &q.wait); | 120 | finish_wait(wq, &q.wait); |
121 | return ret; | 121 | return ret; |
122 | } | 122 | } |
@@ -139,26 +139,49 @@ nfs_iocounter_wait(struct nfs_io_counter *c) | |||
139 | /* | 139 | /* |
140 | * nfs_page_group_lock - lock the head of the page group | 140 | * nfs_page_group_lock - lock the head of the page group |
141 | * @req - request in group that is to be locked | 141 | * @req - request in group that is to be locked |
142 | * @nonblock - if true don't block waiting for lock | ||
142 | * | 143 | * |
143 | * this lock must be held if modifying the page group list | 144 | * this lock must be held if modifying the page group list |
144 | * | 145 | * |
145 | * returns result from wait_on_bit_lock: 0 on success, < 0 on error | 146 | * return 0 on success, < 0 on error: -EDELAY if nonblocking or the |
147 | * result from wait_on_bit_lock | ||
148 | * | ||
149 | * NOTE: calling with nonblock=false should always have set the | ||
150 | * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock | ||
151 | * with TASK_UNINTERRUPTIBLE), so there is no need to check the result. | ||
146 | */ | 152 | */ |
147 | int | 153 | int |
148 | nfs_page_group_lock(struct nfs_page *req, bool wait) | 154 | nfs_page_group_lock(struct nfs_page *req, bool nonblock) |
149 | { | 155 | { |
150 | struct nfs_page *head = req->wb_head; | 156 | struct nfs_page *head = req->wb_head; |
151 | int ret; | ||
152 | 157 | ||
153 | WARN_ON_ONCE(head != head->wb_head); | 158 | WARN_ON_ONCE(head != head->wb_head); |
154 | 159 | ||
155 | do { | 160 | if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) |
156 | ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, | 161 | return 0; |
157 | TASK_UNINTERRUPTIBLE); | ||
158 | } while (wait && ret != 0); | ||
159 | 162 | ||
160 | WARN_ON_ONCE(ret > 0); | 163 | if (!nonblock) |
161 | return ret; | 164 | return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, |
165 | TASK_UNINTERRUPTIBLE); | ||
166 | |||
167 | return -EAGAIN; | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it | ||
172 | * @req - a request in the group | ||
173 | * | ||
174 | * This is a blocking call to wait for the group lock to be cleared. | ||
175 | */ | ||
176 | void | ||
177 | nfs_page_group_lock_wait(struct nfs_page *req) | ||
178 | { | ||
179 | struct nfs_page *head = req->wb_head; | ||
180 | |||
181 | WARN_ON_ONCE(head != head->wb_head); | ||
182 | |||
183 | wait_on_bit(&head->wb_flags, PG_HEADLOCK, | ||
184 | TASK_UNINTERRUPTIBLE); | ||
162 | } | 185 | } |
163 | 186 | ||
164 | /* | 187 | /* |
@@ -219,7 +242,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) | |||
219 | { | 242 | { |
220 | bool ret; | 243 | bool ret; |
221 | 244 | ||
222 | nfs_page_group_lock(req, true); | 245 | nfs_page_group_lock(req, false); |
223 | ret = nfs_page_group_sync_on_bit_locked(req, bit); | 246 | ret = nfs_page_group_sync_on_bit_locked(req, bit); |
224 | nfs_page_group_unlock(req); | 247 | nfs_page_group_unlock(req); |
225 | 248 | ||
@@ -458,6 +481,14 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | |||
458 | return 0; | 481 | return 0; |
459 | } | 482 | } |
460 | 483 | ||
484 | /* | ||
485 | * Limit the request size so that we can still allocate a page array | ||
486 | * for it without upsetting the slab allocator. | ||
487 | */ | ||
488 | if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * | ||
489 | sizeof(struct page) > PAGE_SIZE) | ||
490 | return 0; | ||
491 | |||
461 | return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); | 492 | return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); |
462 | } | 493 | } |
463 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | 494 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); |
@@ -701,10 +732,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | |||
701 | struct nfs_pgio_header *hdr) | 732 | struct nfs_pgio_header *hdr) |
702 | { | 733 | { |
703 | struct nfs_page *req; | 734 | struct nfs_page *req; |
704 | struct page **pages; | 735 | struct page **pages, |
736 | *last_page; | ||
705 | struct list_head *head = &desc->pg_list; | 737 | struct list_head *head = &desc->pg_list; |
706 | struct nfs_commit_info cinfo; | 738 | struct nfs_commit_info cinfo; |
707 | unsigned int pagecount; | 739 | unsigned int pagecount, pageused; |
708 | 740 | ||
709 | pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); | 741 | pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); |
710 | if (!nfs_pgarray_set(&hdr->page_array, pagecount)) | 742 | if (!nfs_pgarray_set(&hdr->page_array, pagecount)) |
@@ -712,12 +744,23 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | |||
712 | 744 | ||
713 | nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); | 745 | nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); |
714 | pages = hdr->page_array.pagevec; | 746 | pages = hdr->page_array.pagevec; |
747 | last_page = NULL; | ||
748 | pageused = 0; | ||
715 | while (!list_empty(head)) { | 749 | while (!list_empty(head)) { |
716 | req = nfs_list_entry(head->next); | 750 | req = nfs_list_entry(head->next); |
717 | nfs_list_remove_request(req); | 751 | nfs_list_remove_request(req); |
718 | nfs_list_add_request(req, &hdr->pages); | 752 | nfs_list_add_request(req, &hdr->pages); |
719 | *pages++ = req->wb_page; | 753 | |
754 | if (WARN_ON_ONCE(pageused >= pagecount)) | ||
755 | return nfs_pgio_error(desc, hdr); | ||
756 | |||
757 | if (!last_page || last_page != req->wb_page) { | ||
758 | *pages++ = last_page = req->wb_page; | ||
759 | pageused++; | ||
760 | } | ||
720 | } | 761 | } |
762 | if (WARN_ON_ONCE(pageused != pagecount)) | ||
763 | return nfs_pgio_error(desc, hdr); | ||
721 | 764 | ||
722 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | 765 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && |
723 | (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) | 766 | (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) |
@@ -788,6 +831,14 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
788 | return false; | 831 | return false; |
789 | if (req_offset(req) != req_offset(prev) + prev->wb_bytes) | 832 | if (req_offset(req) != req_offset(prev) + prev->wb_bytes) |
790 | return false; | 833 | return false; |
834 | if (req->wb_page == prev->wb_page) { | ||
835 | if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes) | ||
836 | return false; | ||
837 | } else { | ||
838 | if (req->wb_pgbase != 0 || | ||
839 | prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) | ||
840 | return false; | ||
841 | } | ||
791 | } | 842 | } |
792 | size = pgio->pg_ops->pg_test(pgio, prev, req); | 843 | size = pgio->pg_ops->pg_test(pgio, prev, req); |
793 | WARN_ON_ONCE(size > req->wb_bytes); | 844 | WARN_ON_ONCE(size > req->wb_bytes); |
@@ -858,13 +909,8 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
858 | struct nfs_page *subreq; | 909 | struct nfs_page *subreq; |
859 | unsigned int bytes_left = 0; | 910 | unsigned int bytes_left = 0; |
860 | unsigned int offset, pgbase; | 911 | unsigned int offset, pgbase; |
861 | int ret; | ||
862 | 912 | ||
863 | ret = nfs_page_group_lock(req, false); | 913 | nfs_page_group_lock(req, false); |
864 | if (ret < 0) { | ||
865 | desc->pg_error = ret; | ||
866 | return 0; | ||
867 | } | ||
868 | 914 | ||
869 | subreq = req; | 915 | subreq = req; |
870 | bytes_left = subreq->wb_bytes; | 916 | bytes_left = subreq->wb_bytes; |
@@ -886,11 +932,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
886 | if (desc->pg_recoalesce) | 932 | if (desc->pg_recoalesce) |
887 | return 0; | 933 | return 0; |
888 | /* retry add_request for this subreq */ | 934 | /* retry add_request for this subreq */ |
889 | ret = nfs_page_group_lock(req, false); | 935 | nfs_page_group_lock(req, false); |
890 | if (ret < 0) { | ||
891 | desc->pg_error = ret; | ||
892 | return 0; | ||
893 | } | ||
894 | continue; | 936 | continue; |
895 | } | 937 | } |
896 | 938 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a3851debf8a2..76de7f568119 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -594,6 +594,9 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, | |||
594 | dprintk("%s freeing layout for inode %lu\n", __func__, | 594 | dprintk("%s freeing layout for inode %lu\n", __func__, |
595 | lo->plh_inode->i_ino); | 595 | lo->plh_inode->i_ino); |
596 | inode = lo->plh_inode; | 596 | inode = lo->plh_inode; |
597 | |||
598 | pnfs_layoutcommit_inode(inode, false); | ||
599 | |||
597 | spin_lock(&inode->i_lock); | 600 | spin_lock(&inode->i_lock); |
598 | list_del_init(&lo->plh_bulk_destroy); | 601 | list_del_init(&lo->plh_bulk_destroy); |
599 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ | 602 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ |
@@ -682,17 +685,6 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2) | |||
682 | return (s32)(s1 - s2) > 0; | 685 | return (s32)(s1 - s2) > 0; |
683 | } | 686 | } |
684 | 687 | ||
685 | static void | ||
686 | pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo, | ||
687 | const nfs4_stateid *new, | ||
688 | struct list_head *free_me_list) | ||
689 | { | ||
690 | if (nfs4_stateid_match_other(&lo->plh_stateid, new)) | ||
691 | return; | ||
692 | /* Layout is new! Kill existing layout segments */ | ||
693 | pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL); | ||
694 | } | ||
695 | |||
696 | /* update lo->plh_stateid with new if is more recent */ | 688 | /* update lo->plh_stateid with new if is more recent */ |
697 | void | 689 | void |
698 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, | 690 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, |
@@ -749,7 +741,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | |||
749 | status = -EAGAIN; | 741 | status = -EAGAIN; |
750 | } else if (!nfs4_valid_open_stateid(open_state)) { | 742 | } else if (!nfs4_valid_open_stateid(open_state)) { |
751 | status = -EBADF; | 743 | status = -EBADF; |
752 | } else if (list_empty(&lo->plh_segs)) { | 744 | } else if (list_empty(&lo->plh_segs) || |
745 | test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { | ||
753 | int seq; | 746 | int seq; |
754 | 747 | ||
755 | do { | 748 | do { |
@@ -864,6 +857,16 @@ _pnfs_return_layout(struct inode *ino) | |||
864 | empty = list_empty(&lo->plh_segs); | 857 | empty = list_empty(&lo->plh_segs); |
865 | pnfs_clear_layoutcommit(ino, &tmp_list); | 858 | pnfs_clear_layoutcommit(ino, &tmp_list); |
866 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); | 859 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); |
860 | |||
861 | if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { | ||
862 | struct pnfs_layout_range range = { | ||
863 | .iomode = IOMODE_ANY, | ||
864 | .offset = 0, | ||
865 | .length = NFS4_MAX_UINT64, | ||
866 | }; | ||
867 | NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); | ||
868 | } | ||
869 | |||
867 | /* Don't send a LAYOUTRETURN if list was initially empty */ | 870 | /* Don't send a LAYOUTRETURN if list was initially empty */ |
868 | if (empty) { | 871 | if (empty) { |
869 | spin_unlock(&ino->i_lock); | 872 | spin_unlock(&ino->i_lock); |
@@ -871,6 +874,8 @@ _pnfs_return_layout(struct inode *ino) | |||
871 | dprintk("NFS: %s no layout segments to return\n", __func__); | 874 | dprintk("NFS: %s no layout segments to return\n", __func__); |
872 | goto out; | 875 | goto out; |
873 | } | 876 | } |
877 | |||
878 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
874 | lo->plh_block_lgets++; | 879 | lo->plh_block_lgets++; |
875 | spin_unlock(&ino->i_lock); | 880 | spin_unlock(&ino->i_lock); |
876 | pnfs_free_lseg_list(&tmp_list); | 881 | pnfs_free_lseg_list(&tmp_list); |
@@ -1358,25 +1363,41 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1358 | goto out; | 1363 | goto out; |
1359 | } | 1364 | } |
1360 | 1365 | ||
1366 | init_lseg(lo, lseg); | ||
1367 | lseg->pls_range = res->range; | ||
1368 | |||
1361 | spin_lock(&ino->i_lock); | 1369 | spin_lock(&ino->i_lock); |
1362 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | 1370 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { |
1363 | dprintk("%s forget reply due to recall\n", __func__); | 1371 | dprintk("%s forget reply due to recall\n", __func__); |
1364 | goto out_forget_reply; | 1372 | goto out_forget_reply; |
1365 | } | 1373 | } |
1366 | 1374 | ||
1367 | if (pnfs_layoutgets_blocked(lo, 1) || | 1375 | if (pnfs_layoutgets_blocked(lo, 1)) { |
1368 | pnfs_layout_stateid_blocked(lo, &res->stateid)) { | ||
1369 | dprintk("%s forget reply due to state\n", __func__); | 1376 | dprintk("%s forget reply due to state\n", __func__); |
1370 | goto out_forget_reply; | 1377 | goto out_forget_reply; |
1371 | } | 1378 | } |
1372 | 1379 | ||
1373 | /* Check that the new stateid matches the old stateid */ | 1380 | if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { |
1374 | pnfs_verify_layout_stateid(lo, &res->stateid, &free_me); | 1381 | /* existing state ID, make sure the sequence number matches. */ |
1375 | /* Done processing layoutget. Set the layout stateid */ | 1382 | if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { |
1376 | pnfs_set_layout_stateid(lo, &res->stateid, false); | 1383 | dprintk("%s forget reply due to sequence\n", __func__); |
1384 | goto out_forget_reply; | ||
1385 | } | ||
1386 | pnfs_set_layout_stateid(lo, &res->stateid, false); | ||
1387 | } else { | ||
1388 | /* | ||
1389 | * We got an entirely new state ID. Mark all segments for the | ||
1390 | * inode invalid, and don't bother validating the stateid | ||
1391 | * sequence number. | ||
1392 | */ | ||
1393 | pnfs_mark_matching_lsegs_invalid(lo, &free_me, NULL); | ||
1394 | |||
1395 | nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); | ||
1396 | lo->plh_barrier = be32_to_cpu(res->stateid.seqid); | ||
1397 | } | ||
1398 | |||
1399 | clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
1377 | 1400 | ||
1378 | init_lseg(lo, lseg); | ||
1379 | lseg->pls_range = res->range; | ||
1380 | pnfs_get_lseg(lseg); | 1401 | pnfs_get_lseg(lseg); |
1381 | pnfs_layout_insert_lseg(lo, lseg); | 1402 | pnfs_layout_insert_lseg(lo, lseg); |
1382 | 1403 | ||
@@ -1797,6 +1818,35 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) | |||
1797 | } | 1818 | } |
1798 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | 1819 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); |
1799 | 1820 | ||
1821 | void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data) | ||
1822 | { | ||
1823 | struct inode *inode = data->inode; | ||
1824 | struct nfs_inode *nfsi = NFS_I(inode); | ||
1825 | bool mark_as_dirty = false; | ||
1826 | |||
1827 | spin_lock(&inode->i_lock); | ||
1828 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | ||
1829 | mark_as_dirty = true; | ||
1830 | dprintk("%s: Set layoutcommit for inode %lu ", | ||
1831 | __func__, inode->i_ino); | ||
1832 | } | ||
1833 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) { | ||
1834 | /* references matched in nfs4_layoutcommit_release */ | ||
1835 | pnfs_get_lseg(data->lseg); | ||
1836 | } | ||
1837 | if (data->lwb > nfsi->layout->plh_lwb) | ||
1838 | nfsi->layout->plh_lwb = data->lwb; | ||
1839 | spin_unlock(&inode->i_lock); | ||
1840 | dprintk("%s: lseg %p end_pos %llu\n", | ||
1841 | __func__, data->lseg, nfsi->layout->plh_lwb); | ||
1842 | |||
1843 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | ||
1844 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | ||
1845 | if (mark_as_dirty) | ||
1846 | mark_inode_dirty_sync(inode); | ||
1847 | } | ||
1848 | EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit); | ||
1849 | |||
1800 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | 1850 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) |
1801 | { | 1851 | { |
1802 | struct nfs_server *nfss = NFS_SERVER(data->args.inode); | 1852 | struct nfs_server *nfss = NFS_SERVER(data->args.inode); |
@@ -1817,6 +1867,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | |||
1817 | int | 1867 | int |
1818 | pnfs_layoutcommit_inode(struct inode *inode, bool sync) | 1868 | pnfs_layoutcommit_inode(struct inode *inode, bool sync) |
1819 | { | 1869 | { |
1870 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | ||
1820 | struct nfs4_layoutcommit_data *data; | 1871 | struct nfs4_layoutcommit_data *data; |
1821 | struct nfs_inode *nfsi = NFS_I(inode); | 1872 | struct nfs_inode *nfsi = NFS_I(inode); |
1822 | loff_t end_pos; | 1873 | loff_t end_pos; |
@@ -1867,6 +1918,20 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1867 | data->args.lastbytewritten = end_pos - 1; | 1918 | data->args.lastbytewritten = end_pos - 1; |
1868 | data->res.server = NFS_SERVER(inode); | 1919 | data->res.server = NFS_SERVER(inode); |
1869 | 1920 | ||
1921 | if (ld->prepare_layoutcommit) { | ||
1922 | status = ld->prepare_layoutcommit(&data->args); | ||
1923 | if (status) { | ||
1924 | spin_lock(&inode->i_lock); | ||
1925 | if (end_pos < nfsi->layout->plh_lwb) | ||
1926 | nfsi->layout->plh_lwb = end_pos; | ||
1927 | spin_unlock(&inode->i_lock); | ||
1928 | put_rpccred(data->cred); | ||
1929 | set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); | ||
1930 | goto clear_layoutcommitting; | ||
1931 | } | ||
1932 | } | ||
1933 | |||
1934 | |||
1870 | status = nfs4_proc_layoutcommit(data, sync); | 1935 | status = nfs4_proc_layoutcommit(data, sync); |
1871 | out: | 1936 | out: |
1872 | if (status) | 1937 | if (status) |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index aca3dff5dae6..693ce42ec683 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -65,12 +65,15 @@ enum { | |||
65 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ | 65 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ |
66 | NFS_LAYOUT_ROC, /* some lseg had roc bit set */ | 66 | NFS_LAYOUT_ROC, /* some lseg had roc bit set */ |
67 | NFS_LAYOUT_RETURN, /* Return this layout ASAP */ | 67 | NFS_LAYOUT_RETURN, /* Return this layout ASAP */ |
68 | NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ | ||
68 | }; | 69 | }; |
69 | 70 | ||
70 | enum layoutdriver_policy_flags { | 71 | enum layoutdriver_policy_flags { |
71 | /* Should the pNFS client commit and return the layout upon a setattr */ | 72 | /* Should the pNFS client commit and return the layout upon truncate to |
73 | * a smaller size */ | ||
72 | PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, | 74 | PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, |
73 | PNFS_LAYOUTRET_ON_ERROR = 1 << 1, | 75 | PNFS_LAYOUTRET_ON_ERROR = 1 << 1, |
76 | PNFS_READ_WHOLE_PAGE = 1 << 2, | ||
74 | }; | 77 | }; |
75 | 78 | ||
76 | struct nfs4_deviceid_node; | 79 | struct nfs4_deviceid_node; |
@@ -82,6 +85,7 @@ struct pnfs_layoutdriver_type { | |||
82 | const char *name; | 85 | const char *name; |
83 | struct module *owner; | 86 | struct module *owner; |
84 | unsigned flags; | 87 | unsigned flags; |
88 | unsigned max_deviceinfo_size; | ||
85 | 89 | ||
86 | int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); | 90 | int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); |
87 | int (*clear_layoutdriver) (struct nfs_server *); | 91 | int (*clear_layoutdriver) (struct nfs_server *); |
@@ -92,6 +96,9 @@ struct pnfs_layoutdriver_type { | |||
92 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); | 96 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); |
93 | void (*free_lseg) (struct pnfs_layout_segment *lseg); | 97 | void (*free_lseg) (struct pnfs_layout_segment *lseg); |
94 | 98 | ||
99 | void (*return_range) (struct pnfs_layout_hdr *lo, | ||
100 | struct pnfs_layout_range *range); | ||
101 | |||
95 | /* test for nfs page cache coalescing */ | 102 | /* test for nfs page cache coalescing */ |
96 | const struct nfs_pageio_ops *pg_read_ops; | 103 | const struct nfs_pageio_ops *pg_read_ops; |
97 | const struct nfs_pageio_ops *pg_write_ops; | 104 | const struct nfs_pageio_ops *pg_write_ops; |
@@ -121,14 +128,17 @@ struct pnfs_layoutdriver_type { | |||
121 | enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int); | 128 | enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int); |
122 | 129 | ||
123 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); | 130 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); |
131 | struct nfs4_deviceid_node * (*alloc_deviceid_node) | ||
132 | (struct nfs_server *server, struct pnfs_device *pdev, | ||
133 | gfp_t gfp_flags); | ||
124 | 134 | ||
125 | void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, | 135 | void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, |
126 | struct xdr_stream *xdr, | 136 | struct xdr_stream *xdr, |
127 | const struct nfs4_layoutreturn_args *args); | 137 | const struct nfs4_layoutreturn_args *args); |
128 | 138 | ||
129 | void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); | 139 | void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); |
130 | 140 | int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args); | |
131 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, | 141 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, |
132 | struct xdr_stream *xdr, | 142 | struct xdr_stream *xdr, |
133 | const struct nfs4_layoutcommit_args *args); | 143 | const struct nfs4_layoutcommit_args *args); |
134 | }; | 144 | }; |
@@ -171,9 +181,6 @@ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); | |||
171 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | 181 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); |
172 | 182 | ||
173 | /* nfs4proc.c */ | 183 | /* nfs4proc.c */ |
174 | extern int nfs4_proc_getdevicelist(struct nfs_server *server, | ||
175 | const struct nfs_fh *fh, | ||
176 | struct pnfs_devicelist *devlist); | ||
177 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 184 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
178 | struct pnfs_device *dev, | 185 | struct pnfs_device *dev, |
179 | struct rpc_cred *cred); | 186 | struct rpc_cred *cred); |
@@ -219,6 +226,7 @@ void pnfs_roc_release(struct inode *ino); | |||
219 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 226 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
220 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); | 227 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); |
221 | void pnfs_set_layoutcommit(struct nfs_pgio_header *); | 228 | void pnfs_set_layoutcommit(struct nfs_pgio_header *); |
229 | void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data); | ||
222 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | 230 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); |
223 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 231 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
224 | int _pnfs_return_layout(struct inode *); | 232 | int _pnfs_return_layout(struct inode *); |
@@ -255,11 +263,12 @@ struct nfs4_deviceid_node { | |||
255 | atomic_t ref; | 263 | atomic_t ref; |
256 | }; | 264 | }; |
257 | 265 | ||
258 | struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 266 | struct nfs4_deviceid_node * |
267 | nfs4_find_get_deviceid(struct nfs_server *server, | ||
268 | const struct nfs4_deviceid *id, struct rpc_cred *cred, | ||
269 | gfp_t gfp_mask); | ||
259 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 270 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); |
260 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, | 271 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *, |
261 | const struct pnfs_layoutdriver_type *, | ||
262 | const struct nfs_client *, | ||
263 | const struct nfs4_deviceid *); | 272 | const struct nfs4_deviceid *); |
264 | struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); | 273 | struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); |
265 | bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); | 274 | bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); |
@@ -267,6 +276,13 @@ void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); | |||
267 | bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); | 276 | bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); |
268 | void nfs4_deviceid_purge_client(const struct nfs_client *); | 277 | void nfs4_deviceid_purge_client(const struct nfs_client *); |
269 | 278 | ||
279 | static inline struct nfs4_deviceid_node * | ||
280 | nfs4_get_deviceid(struct nfs4_deviceid_node *d) | ||
281 | { | ||
282 | atomic_inc(&d->ref); | ||
283 | return d; | ||
284 | } | ||
285 | |||
270 | static inline struct pnfs_layout_segment * | 286 | static inline struct pnfs_layout_segment * |
271 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) | 287 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) |
272 | { | 288 | { |
@@ -368,6 +384,14 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode) | |||
368 | } | 384 | } |
369 | 385 | ||
370 | static inline bool | 386 | static inline bool |
387 | pnfs_ld_read_whole_page(struct inode *inode) | ||
388 | { | ||
389 | if (!pnfs_enabled_sb(NFS_SERVER(inode))) | ||
390 | return false; | ||
391 | return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; | ||
392 | } | ||
393 | |||
394 | static inline bool | ||
371 | pnfs_layoutcommit_outstanding(struct inode *inode) | 395 | pnfs_layoutcommit_outstanding(struct inode *inode) |
372 | { | 396 | { |
373 | struct nfs_inode *nfsi = NFS_I(inode); | 397 | struct nfs_inode *nfsi = NFS_I(inode); |
@@ -443,6 +467,12 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode) | |||
443 | } | 467 | } |
444 | 468 | ||
445 | static inline bool | 469 | static inline bool |
470 | pnfs_ld_read_whole_page(struct inode *inode) | ||
471 | { | ||
472 | return false; | ||
473 | } | ||
474 | |||
475 | static inline bool | ||
446 | pnfs_roc(struct inode *ino) | 476 | pnfs_roc(struct inode *ino) |
447 | { | 477 | { |
448 | return false; | 478 | return false; |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 6da209bd9408..aa2ec0015183 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c | |||
@@ -29,6 +29,9 @@ | |||
29 | */ | 29 | */ |
30 | 30 | ||
31 | #include <linux/export.h> | 31 | #include <linux/export.h> |
32 | #include <linux/nfs_fs.h> | ||
33 | #include "nfs4session.h" | ||
34 | #include "internal.h" | ||
32 | #include "pnfs.h" | 35 | #include "pnfs.h" |
33 | 36 | ||
34 | #define NFSDBG_FACILITY NFSDBG_PNFS | 37 | #define NFSDBG_FACILITY NFSDBG_PNFS |
@@ -89,6 +92,74 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
89 | return NULL; | 92 | return NULL; |
90 | } | 93 | } |
91 | 94 | ||
95 | static struct nfs4_deviceid_node * | ||
96 | nfs4_get_device_info(struct nfs_server *server, | ||
97 | const struct nfs4_deviceid *dev_id, | ||
98 | struct rpc_cred *cred, gfp_t gfp_flags) | ||
99 | { | ||
100 | struct nfs4_deviceid_node *d = NULL; | ||
101 | struct pnfs_device *pdev = NULL; | ||
102 | struct page **pages = NULL; | ||
103 | u32 max_resp_sz; | ||
104 | int max_pages; | ||
105 | int rc, i; | ||
106 | |||
107 | /* | ||
108 | * Use the session max response size as the basis for setting | ||
109 | * GETDEVICEINFO's maxcount | ||
110 | */ | ||
111 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
112 | if (server->pnfs_curr_ld->max_deviceinfo_size && | ||
113 | server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz) | ||
114 | max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size; | ||
115 | max_pages = nfs_page_array_len(0, max_resp_sz); | ||
116 | dprintk("%s: server %p max_resp_sz %u max_pages %d\n", | ||
117 | __func__, server, max_resp_sz, max_pages); | ||
118 | |||
119 | pdev = kzalloc(sizeof(*pdev), gfp_flags); | ||
120 | if (!pdev) | ||
121 | return NULL; | ||
122 | |||
123 | pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); | ||
124 | if (!pages) | ||
125 | goto out_free_pdev; | ||
126 | |||
127 | for (i = 0; i < max_pages; i++) { | ||
128 | pages[i] = alloc_page(gfp_flags); | ||
129 | if (!pages[i]) | ||
130 | goto out_free_pages; | ||
131 | } | ||
132 | |||
133 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | ||
134 | pdev->layout_type = server->pnfs_curr_ld->id; | ||
135 | pdev->pages = pages; | ||
136 | pdev->pgbase = 0; | ||
137 | pdev->pglen = max_resp_sz; | ||
138 | pdev->mincount = 0; | ||
139 | pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | ||
140 | |||
141 | rc = nfs4_proc_getdeviceinfo(server, pdev, cred); | ||
142 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
143 | if (rc) | ||
144 | goto out_free_pages; | ||
145 | |||
146 | /* | ||
147 | * Found new device, need to decode it and then add it to the | ||
148 | * list of known devices for this mountpoint. | ||
149 | */ | ||
150 | d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev, | ||
151 | gfp_flags); | ||
152 | |||
153 | out_free_pages: | ||
154 | for (i = 0; i < max_pages; i++) | ||
155 | __free_page(pages[i]); | ||
156 | kfree(pages); | ||
157 | out_free_pdev: | ||
158 | kfree(pdev); | ||
159 | dprintk("<-- %s d %p\n", __func__, d); | ||
160 | return d; | ||
161 | } | ||
162 | |||
92 | /* | 163 | /* |
93 | * Lookup a deviceid in cache and get a reference count on it if found | 164 | * Lookup a deviceid in cache and get a reference count on it if found |
94 | * | 165 | * |
@@ -96,14 +167,14 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
96 | * @id deviceid to look up | 167 | * @id deviceid to look up |
97 | */ | 168 | */ |
98 | static struct nfs4_deviceid_node * | 169 | static struct nfs4_deviceid_node * |
99 | _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | 170 | __nfs4_find_get_deviceid(struct nfs_server *server, |
100 | const struct nfs_client *clp, const struct nfs4_deviceid *id, | 171 | const struct nfs4_deviceid *id, long hash) |
101 | long hash) | ||
102 | { | 172 | { |
103 | struct nfs4_deviceid_node *d; | 173 | struct nfs4_deviceid_node *d; |
104 | 174 | ||
105 | rcu_read_lock(); | 175 | rcu_read_lock(); |
106 | d = _lookup_deviceid(ld, clp, id, hash); | 176 | d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id, |
177 | hash); | ||
107 | if (d != NULL) | 178 | if (d != NULL) |
108 | atomic_inc(&d->ref); | 179 | atomic_inc(&d->ref); |
109 | rcu_read_unlock(); | 180 | rcu_read_unlock(); |
@@ -111,10 +182,33 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
111 | } | 182 | } |
112 | 183 | ||
113 | struct nfs4_deviceid_node * | 184 | struct nfs4_deviceid_node * |
114 | nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | 185 | nfs4_find_get_deviceid(struct nfs_server *server, |
115 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | 186 | const struct nfs4_deviceid *id, struct rpc_cred *cred, |
187 | gfp_t gfp_mask) | ||
116 | { | 188 | { |
117 | return _find_get_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); | 189 | long hash = nfs4_deviceid_hash(id); |
190 | struct nfs4_deviceid_node *d, *new; | ||
191 | |||
192 | d = __nfs4_find_get_deviceid(server, id, hash); | ||
193 | if (d) | ||
194 | return d; | ||
195 | |||
196 | new = nfs4_get_device_info(server, id, cred, gfp_mask); | ||
197 | if (!new) | ||
198 | return new; | ||
199 | |||
200 | spin_lock(&nfs4_deviceid_lock); | ||
201 | d = __nfs4_find_get_deviceid(server, id, hash); | ||
202 | if (d) { | ||
203 | spin_unlock(&nfs4_deviceid_lock); | ||
204 | server->pnfs_curr_ld->free_deviceid_node(new); | ||
205 | return d; | ||
206 | } | ||
207 | hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); | ||
208 | atomic_inc(&new->ref); | ||
209 | spin_unlock(&nfs4_deviceid_lock); | ||
210 | |||
211 | return new; | ||
118 | } | 212 | } |
119 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); | 213 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); |
120 | 214 | ||
@@ -151,15 +245,13 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
151 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); | 245 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); |
152 | 246 | ||
153 | void | 247 | void |
154 | nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, | 248 | nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, struct nfs_server *server, |
155 | const struct pnfs_layoutdriver_type *ld, | ||
156 | const struct nfs_client *nfs_client, | ||
157 | const struct nfs4_deviceid *id) | 249 | const struct nfs4_deviceid *id) |
158 | { | 250 | { |
159 | INIT_HLIST_NODE(&d->node); | 251 | INIT_HLIST_NODE(&d->node); |
160 | INIT_HLIST_NODE(&d->tmpnode); | 252 | INIT_HLIST_NODE(&d->tmpnode); |
161 | d->ld = ld; | 253 | d->ld = server->pnfs_curr_ld; |
162 | d->nfs_client = nfs_client; | 254 | d->nfs_client = server->nfs_client; |
163 | d->flags = 0; | 255 | d->flags = 0; |
164 | d->deviceid = *id; | 256 | d->deviceid = *id; |
165 | atomic_set(&d->ref, 1); | 257 | atomic_set(&d->ref, 1); |
@@ -167,39 +259,6 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, | |||
167 | EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); | 259 | EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); |
168 | 260 | ||
169 | /* | 261 | /* |
170 | * Uniquely initialize and insert a deviceid node into cache | ||
171 | * | ||
172 | * @new new deviceid node | ||
173 | * Note that the caller must set up the following members: | ||
174 | * new->ld | ||
175 | * new->nfs_client | ||
176 | * new->deviceid | ||
177 | * | ||
178 | * @ret the inserted node, if none found, otherwise, the found entry. | ||
179 | */ | ||
180 | struct nfs4_deviceid_node * | ||
181 | nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) | ||
182 | { | ||
183 | struct nfs4_deviceid_node *d; | ||
184 | long hash; | ||
185 | |||
186 | spin_lock(&nfs4_deviceid_lock); | ||
187 | hash = nfs4_deviceid_hash(&new->deviceid); | ||
188 | d = _find_get_deviceid(new->ld, new->nfs_client, &new->deviceid, hash); | ||
189 | if (d) { | ||
190 | spin_unlock(&nfs4_deviceid_lock); | ||
191 | return d; | ||
192 | } | ||
193 | |||
194 | hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); | ||
195 | spin_unlock(&nfs4_deviceid_lock); | ||
196 | atomic_inc(&new->ref); | ||
197 | |||
198 | return new; | ||
199 | } | ||
200 | EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); | ||
201 | |||
202 | /* | ||
203 | * Dereference a deviceid node and delete it when its reference count drops | 262 | * Dereference a deviceid node and delete it when its reference count drops |
204 | * to zero. | 263 | * to zero. |
205 | * | 264 | * |
@@ -299,4 +358,3 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) | |||
299 | } | 358 | } |
300 | rcu_read_unlock(); | 359 | rcu_read_unlock(); |
301 | } | 360 | } |
302 | |||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e4499d5b51e8..31a11b0e885d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -2065,11 +2065,6 @@ static int nfs23_validate_mount_data(void *options, | |||
2065 | return NFS_TEXT_DATA; | 2065 | return NFS_TEXT_DATA; |
2066 | } | 2066 | } |
2067 | 2067 | ||
2068 | #if !IS_ENABLED(CONFIG_NFS_V3) | ||
2069 | if (args->version == 3) | ||
2070 | goto out_v3_not_compiled; | ||
2071 | #endif /* !CONFIG_NFS_V3 */ | ||
2072 | |||
2073 | return 0; | 2068 | return 0; |
2074 | 2069 | ||
2075 | out_no_data: | 2070 | out_no_data: |
@@ -2085,12 +2080,6 @@ out_no_sec: | |||
2085 | dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); | 2080 | dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); |
2086 | return -EINVAL; | 2081 | return -EINVAL; |
2087 | 2082 | ||
2088 | #if !IS_ENABLED(CONFIG_NFS_V3) | ||
2089 | out_v3_not_compiled: | ||
2090 | dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); | ||
2091 | return -EPROTONOSUPPORT; | ||
2092 | #endif /* !CONFIG_NFS_V3 */ | ||
2093 | |||
2094 | out_nomem: | 2083 | out_nomem: |
2095 | dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); | 2084 | dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); |
2096 | return -ENOMEM; | 2085 | return -ENOMEM; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e3b5cf28bdc5..12493846a2d3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -49,6 +49,9 @@ static const struct nfs_rw_ops nfs_rw_write_ops; | |||
49 | static void nfs_clear_request_commit(struct nfs_page *req); | 49 | static void nfs_clear_request_commit(struct nfs_page *req); |
50 | static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, | 50 | static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, |
51 | struct inode *inode); | 51 | struct inode *inode); |
52 | static struct nfs_page * | ||
53 | nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, | ||
54 | struct page *page); | ||
52 | 55 | ||
53 | static struct kmem_cache *nfs_wdata_cachep; | 56 | static struct kmem_cache *nfs_wdata_cachep; |
54 | static mempool_t *nfs_wdata_mempool; | 57 | static mempool_t *nfs_wdata_mempool; |
@@ -95,38 +98,6 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) | |||
95 | } | 98 | } |
96 | 99 | ||
97 | /* | 100 | /* |
98 | * nfs_page_search_commits_for_head_request_locked | ||
99 | * | ||
100 | * Search through commit lists on @inode for the head request for @page. | ||
101 | * Must be called while holding the inode (which is cinfo) lock. | ||
102 | * | ||
103 | * Returns the head request if found, or NULL if not found. | ||
104 | */ | ||
105 | static struct nfs_page * | ||
106 | nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, | ||
107 | struct page *page) | ||
108 | { | ||
109 | struct nfs_page *freq, *t; | ||
110 | struct nfs_commit_info cinfo; | ||
111 | struct inode *inode = &nfsi->vfs_inode; | ||
112 | |||
113 | nfs_init_cinfo_from_inode(&cinfo, inode); | ||
114 | |||
115 | /* search through pnfs commit lists */ | ||
116 | freq = pnfs_search_commit_reqs(inode, &cinfo, page); | ||
117 | if (freq) | ||
118 | return freq->wb_head; | ||
119 | |||
120 | /* Linearly search the commit list for the correct request */ | ||
121 | list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { | ||
122 | if (freq->wb_page == page) | ||
123 | return freq->wb_head; | ||
124 | } | ||
125 | |||
126 | return NULL; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * nfs_page_find_head_request_locked - find head request associated with @page | 101 | * nfs_page_find_head_request_locked - find head request associated with @page |
131 | * | 102 | * |
132 | * must be called while holding the inode lock. | 103 | * must be called while holding the inode lock. |
@@ -241,7 +212,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req) | |||
241 | unsigned int pos = 0; | 212 | unsigned int pos = 0; |
242 | unsigned int len = nfs_page_length(req->wb_page); | 213 | unsigned int len = nfs_page_length(req->wb_page); |
243 | 214 | ||
244 | nfs_page_group_lock(req, true); | 215 | nfs_page_group_lock(req, false); |
245 | 216 | ||
246 | do { | 217 | do { |
247 | tmp = nfs_page_group_search_locked(req->wb_head, pos); | 218 | tmp = nfs_page_group_search_locked(req->wb_head, pos); |
@@ -271,11 +242,14 @@ static void nfs_mark_uptodate(struct nfs_page *req) | |||
271 | 242 | ||
272 | static int wb_priority(struct writeback_control *wbc) | 243 | static int wb_priority(struct writeback_control *wbc) |
273 | { | 244 | { |
245 | int ret = 0; | ||
274 | if (wbc->for_reclaim) | 246 | if (wbc->for_reclaim) |
275 | return FLUSH_HIGHPRI | FLUSH_STABLE; | 247 | return FLUSH_HIGHPRI | FLUSH_STABLE; |
248 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
249 | ret = FLUSH_COND_STABLE; | ||
276 | if (wbc->for_kupdate || wbc->for_background) | 250 | if (wbc->for_kupdate || wbc->for_background) |
277 | return FLUSH_LOWPRI | FLUSH_COND_STABLE; | 251 | ret |= FLUSH_LOWPRI; |
278 | return FLUSH_COND_STABLE; | 252 | return ret; |
279 | } | 253 | } |
280 | 254 | ||
281 | /* | 255 | /* |
@@ -478,10 +452,23 @@ try_again: | |||
478 | return NULL; | 452 | return NULL; |
479 | } | 453 | } |
480 | 454 | ||
481 | /* lock each request in the page group */ | 455 | /* holding inode lock, so always make a non-blocking call to try the |
482 | ret = nfs_page_group_lock(head, false); | 456 | * page group lock */ |
483 | if (ret < 0) | 457 | ret = nfs_page_group_lock(head, true); |
458 | if (ret < 0) { | ||
459 | spin_unlock(&inode->i_lock); | ||
460 | |||
461 | if (!nonblock && ret == -EAGAIN) { | ||
462 | nfs_page_group_lock_wait(head); | ||
463 | nfs_release_request(head); | ||
464 | goto try_again; | ||
465 | } | ||
466 | |||
467 | nfs_release_request(head); | ||
484 | return ERR_PTR(ret); | 468 | return ERR_PTR(ret); |
469 | } | ||
470 | |||
471 | /* lock each request in the page group */ | ||
485 | subreq = head; | 472 | subreq = head; |
486 | do { | 473 | do { |
487 | /* | 474 | /* |
@@ -718,6 +705,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
718 | if (likely(!PageSwapCache(head->wb_page))) { | 705 | if (likely(!PageSwapCache(head->wb_page))) { |
719 | set_page_private(head->wb_page, 0); | 706 | set_page_private(head->wb_page, 0); |
720 | ClearPagePrivate(head->wb_page); | 707 | ClearPagePrivate(head->wb_page); |
708 | smp_mb__after_atomic(); | ||
709 | wake_up_page(head->wb_page, PG_private); | ||
721 | clear_bit(PG_MAPPED, &head->wb_flags); | 710 | clear_bit(PG_MAPPED, &head->wb_flags); |
722 | } | 711 | } |
723 | nfsi->npages--; | 712 | nfsi->npages--; |
@@ -736,7 +725,38 @@ nfs_mark_request_dirty(struct nfs_page *req) | |||
736 | __set_page_dirty_nobuffers(req->wb_page); | 725 | __set_page_dirty_nobuffers(req->wb_page); |
737 | } | 726 | } |
738 | 727 | ||
739 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | 728 | /* |
729 | * nfs_page_search_commits_for_head_request_locked | ||
730 | * | ||
731 | * Search through commit lists on @inode for the head request for @page. | ||
732 | * Must be called while holding the inode (which is cinfo) lock. | ||
733 | * | ||
734 | * Returns the head request if found, or NULL if not found. | ||
735 | */ | ||
736 | static struct nfs_page * | ||
737 | nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, | ||
738 | struct page *page) | ||
739 | { | ||
740 | struct nfs_page *freq, *t; | ||
741 | struct nfs_commit_info cinfo; | ||
742 | struct inode *inode = &nfsi->vfs_inode; | ||
743 | |||
744 | nfs_init_cinfo_from_inode(&cinfo, inode); | ||
745 | |||
746 | /* search through pnfs commit lists */ | ||
747 | freq = pnfs_search_commit_reqs(inode, &cinfo, page); | ||
748 | if (freq) | ||
749 | return freq->wb_head; | ||
750 | |||
751 | /* Linearly search the commit list for the correct request */ | ||
752 | list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { | ||
753 | if (freq->wb_page == page) | ||
754 | return freq->wb_head; | ||
755 | } | ||
756 | |||
757 | return NULL; | ||
758 | } | ||
759 | |||
740 | /** | 760 | /** |
741 | * nfs_request_add_commit_list - add request to a commit list | 761 | * nfs_request_add_commit_list - add request to a commit list |
742 | * @req: pointer to a struct nfs_page | 762 | * @req: pointer to a struct nfs_page |
@@ -854,36 +874,6 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr) | |||
854 | return hdr->verf.committed != NFS_FILE_SYNC; | 874 | return hdr->verf.committed != NFS_FILE_SYNC; |
855 | } | 875 | } |
856 | 876 | ||
857 | #else | ||
858 | static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, | ||
859 | struct inode *inode) | ||
860 | { | ||
861 | } | ||
862 | |||
863 | void nfs_init_cinfo(struct nfs_commit_info *cinfo, | ||
864 | struct inode *inode, | ||
865 | struct nfs_direct_req *dreq) | ||
866 | { | ||
867 | } | ||
868 | |||
869 | void | ||
870 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | ||
871 | struct nfs_commit_info *cinfo) | ||
872 | { | ||
873 | } | ||
874 | |||
875 | static void | ||
876 | nfs_clear_request_commit(struct nfs_page *req) | ||
877 | { | ||
878 | } | ||
879 | |||
880 | int nfs_write_need_commit(struct nfs_pgio_header *hdr) | ||
881 | { | ||
882 | return 0; | ||
883 | } | ||
884 | |||
885 | #endif | ||
886 | |||
887 | static void nfs_write_completion(struct nfs_pgio_header *hdr) | 877 | static void nfs_write_completion(struct nfs_pgio_header *hdr) |
888 | { | 878 | { |
889 | struct nfs_commit_info cinfo; | 879 | struct nfs_commit_info cinfo; |
@@ -919,7 +909,6 @@ out: | |||
919 | hdr->release(hdr); | 909 | hdr->release(hdr); |
920 | } | 910 | } |
921 | 911 | ||
922 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
923 | unsigned long | 912 | unsigned long |
924 | nfs_reqs_to_commit(struct nfs_commit_info *cinfo) | 913 | nfs_reqs_to_commit(struct nfs_commit_info *cinfo) |
925 | { | 914 | { |
@@ -976,19 +965,6 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, | |||
976 | return ret; | 965 | return ret; |
977 | } | 966 | } |
978 | 967 | ||
979 | #else | ||
980 | unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) | ||
981 | { | ||
982 | return 0; | ||
983 | } | ||
984 | |||
985 | int nfs_scan_commit(struct inode *inode, struct list_head *dst, | ||
986 | struct nfs_commit_info *cinfo) | ||
987 | { | ||
988 | return 0; | ||
989 | } | ||
990 | #endif | ||
991 | |||
992 | /* | 968 | /* |
993 | * Search for an existing write request, and attempt to update | 969 | * Search for an existing write request, and attempt to update |
994 | * it to reflect a new dirty region on a given page. | 970 | * it to reflect a new dirty region on a given page. |
@@ -1381,7 +1357,6 @@ static int nfs_writeback_done(struct rpc_task *task, | |||
1381 | return status; | 1357 | return status; |
1382 | nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); | 1358 | nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); |
1383 | 1359 | ||
1384 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
1385 | if (hdr->res.verf->committed < hdr->args.stable && | 1360 | if (hdr->res.verf->committed < hdr->args.stable && |
1386 | task->tk_status >= 0) { | 1361 | task->tk_status >= 0) { |
1387 | /* We tried a write call, but the server did not | 1362 | /* We tried a write call, but the server did not |
@@ -1403,7 +1378,6 @@ static int nfs_writeback_done(struct rpc_task *task, | |||
1403 | complain = jiffies + 300 * HZ; | 1378 | complain = jiffies + 300 * HZ; |
1404 | } | 1379 | } |
1405 | } | 1380 | } |
1406 | #endif | ||
1407 | 1381 | ||
1408 | /* Deal with the suid/sgid bit corner case */ | 1382 | /* Deal with the suid/sgid bit corner case */ |
1409 | if (nfs_should_remove_suid(inode)) | 1383 | if (nfs_should_remove_suid(inode)) |
@@ -1456,7 +1430,6 @@ static void nfs_writeback_result(struct rpc_task *task, | |||
1456 | } | 1430 | } |
1457 | 1431 | ||
1458 | 1432 | ||
1459 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
1460 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) | 1433 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) |
1461 | { | 1434 | { |
1462 | int ret; | 1435 | int ret; |
@@ -1525,6 +1498,18 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, | |||
1525 | } | 1498 | } |
1526 | EXPORT_SYMBOL_GPL(nfs_initiate_commit); | 1499 | EXPORT_SYMBOL_GPL(nfs_initiate_commit); |
1527 | 1500 | ||
1501 | static loff_t nfs_get_lwb(struct list_head *head) | ||
1502 | { | ||
1503 | loff_t lwb = 0; | ||
1504 | struct nfs_page *req; | ||
1505 | |||
1506 | list_for_each_entry(req, head, wb_list) | ||
1507 | if (lwb < (req_offset(req) + req->wb_bytes)) | ||
1508 | lwb = req_offset(req) + req->wb_bytes; | ||
1509 | |||
1510 | return lwb; | ||
1511 | } | ||
1512 | |||
1528 | /* | 1513 | /* |
1529 | * Set up the argument/result storage required for the RPC call. | 1514 | * Set up the argument/result storage required for the RPC call. |
1530 | */ | 1515 | */ |
@@ -1544,6 +1529,9 @@ void nfs_init_commit(struct nfs_commit_data *data, | |||
1544 | data->inode = inode; | 1529 | data->inode = inode; |
1545 | data->cred = first->wb_context->cred; | 1530 | data->cred = first->wb_context->cred; |
1546 | data->lseg = lseg; /* reference transferred */ | 1531 | data->lseg = lseg; /* reference transferred */ |
1532 | /* only set lwb for pnfs commit */ | ||
1533 | if (lseg) | ||
1534 | data->lwb = nfs_get_lwb(&data->pages); | ||
1547 | data->mds_ops = &nfs_commit_ops; | 1535 | data->mds_ops = &nfs_commit_ops; |
1548 | data->completion_ops = cinfo->completion_ops; | 1536 | data->completion_ops = cinfo->completion_ops; |
1549 | data->dreq = cinfo->dreq; | 1537 | data->dreq = cinfo->dreq; |
@@ -1623,6 +1611,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) | |||
1623 | struct nfs_page *req; | 1611 | struct nfs_page *req; |
1624 | int status = data->task.tk_status; | 1612 | int status = data->task.tk_status; |
1625 | struct nfs_commit_info cinfo; | 1613 | struct nfs_commit_info cinfo; |
1614 | struct nfs_server *nfss; | ||
1626 | 1615 | ||
1627 | while (!list_empty(&data->pages)) { | 1616 | while (!list_empty(&data->pages)) { |
1628 | req = nfs_list_entry(data->pages.next); | 1617 | req = nfs_list_entry(data->pages.next); |
@@ -1656,6 +1645,10 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) | |||
1656 | next: | 1645 | next: |
1657 | nfs_unlock_and_release_request(req); | 1646 | nfs_unlock_and_release_request(req); |
1658 | } | 1647 | } |
1648 | nfss = NFS_SERVER(data->inode); | ||
1649 | if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) | ||
1650 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | ||
1651 | |||
1659 | nfs_init_cinfo(&cinfo, data->inode, data->dreq); | 1652 | nfs_init_cinfo(&cinfo, data->inode, data->dreq); |
1660 | if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) | 1653 | if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) |
1661 | nfs_commit_clear_lock(NFS_I(data->inode)); | 1654 | nfs_commit_clear_lock(NFS_I(data->inode)); |
@@ -1765,12 +1758,6 @@ out_mark_dirty: | |||
1765 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 1758 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); |
1766 | return ret; | 1759 | return ret; |
1767 | } | 1760 | } |
1768 | #else | ||
1769 | static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) | ||
1770 | { | ||
1771 | return 0; | ||
1772 | } | ||
1773 | #endif | ||
1774 | 1761 | ||
1775 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 1762 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
1776 | { | 1763 | { |
diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile index f689ed82af3a..d153ca3ea577 100644 --- a/fs/nfs_common/Makefile +++ b/fs/nfs_common/Makefile | |||
@@ -3,5 +3,6 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o | 5 | obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o |
6 | |||
7 | nfs_acl-objs := nfsacl.o | 6 | nfs_acl-objs := nfsacl.o |
7 | |||
8 | obj-$(CONFIG_GRACE_PERIOD) += grace.o | ||
diff --git a/fs/lockd/grace.c b/fs/nfs_common/grace.c index 6d1ee7204c88..ae6e58ea4de5 100644 --- a/fs/lockd/grace.c +++ b/fs/nfs_common/grace.c | |||
@@ -1,17 +1,20 @@ | |||
1 | /* | 1 | /* |
2 | * Common code for control of lockd and nfsv4 grace periods. | 2 | * Common code for control of lockd and nfsv4 grace periods. |
3 | * | ||
4 | * Transplanted from lockd code | ||
3 | */ | 5 | */ |
4 | 6 | ||
5 | #include <linux/module.h> | 7 | #include <linux/module.h> |
6 | #include <linux/lockd/bind.h> | ||
7 | #include <net/net_namespace.h> | 8 | #include <net/net_namespace.h> |
9 | #include <net/netns/generic.h> | ||
10 | #include <linux/fs.h> | ||
8 | 11 | ||
9 | #include "netns.h" | 12 | static int grace_net_id; |
10 | |||
11 | static DEFINE_SPINLOCK(grace_lock); | 13 | static DEFINE_SPINLOCK(grace_lock); |
12 | 14 | ||
13 | /** | 15 | /** |
14 | * locks_start_grace | 16 | * locks_start_grace |
17 | * @net: net namespace that this lock manager belongs to | ||
15 | * @lm: who this grace period is for | 18 | * @lm: who this grace period is for |
16 | * | 19 | * |
17 | * A grace period is a period during which locks should not be given | 20 | * A grace period is a period during which locks should not be given |
@@ -21,18 +24,20 @@ static DEFINE_SPINLOCK(grace_lock); | |||
21 | * | 24 | * |
22 | * This function is called to start a grace period. | 25 | * This function is called to start a grace period. |
23 | */ | 26 | */ |
24 | void locks_start_grace(struct net *net, struct lock_manager *lm) | 27 | void |
28 | locks_start_grace(struct net *net, struct lock_manager *lm) | ||
25 | { | 29 | { |
26 | struct lockd_net *ln = net_generic(net, lockd_net_id); | 30 | struct list_head *grace_list = net_generic(net, grace_net_id); |
27 | 31 | ||
28 | spin_lock(&grace_lock); | 32 | spin_lock(&grace_lock); |
29 | list_add(&lm->list, &ln->grace_list); | 33 | list_add(&lm->list, grace_list); |
30 | spin_unlock(&grace_lock); | 34 | spin_unlock(&grace_lock); |
31 | } | 35 | } |
32 | EXPORT_SYMBOL_GPL(locks_start_grace); | 36 | EXPORT_SYMBOL_GPL(locks_start_grace); |
33 | 37 | ||
34 | /** | 38 | /** |
35 | * locks_end_grace | 39 | * locks_end_grace |
40 | * @net: net namespace that this lock manager belongs to | ||
36 | * @lm: who this grace period is for | 41 | * @lm: who this grace period is for |
37 | * | 42 | * |
38 | * Call this function to state that the given lock manager is ready to | 43 | * Call this function to state that the given lock manager is ready to |
@@ -41,7 +46,8 @@ EXPORT_SYMBOL_GPL(locks_start_grace); | |||
41 | * Note that callers count on it being safe to call this more than once, | 46 | * Note that callers count on it being safe to call this more than once, |
42 | * and the second call should be a no-op. | 47 | * and the second call should be a no-op. |
43 | */ | 48 | */ |
44 | void locks_end_grace(struct lock_manager *lm) | 49 | void |
50 | locks_end_grace(struct lock_manager *lm) | ||
45 | { | 51 | { |
46 | spin_lock(&grace_lock); | 52 | spin_lock(&grace_lock); |
47 | list_del_init(&lm->list); | 53 | list_del_init(&lm->list); |
@@ -56,10 +62,52 @@ EXPORT_SYMBOL_GPL(locks_end_grace); | |||
56 | * to answer ordinary lock requests, and when they should accept only | 62 | * to answer ordinary lock requests, and when they should accept only |
57 | * lock reclaims. | 63 | * lock reclaims. |
58 | */ | 64 | */ |
59 | int locks_in_grace(struct net *net) | 65 | int |
66 | locks_in_grace(struct net *net) | ||
60 | { | 67 | { |
61 | struct lockd_net *ln = net_generic(net, lockd_net_id); | 68 | struct list_head *grace_list = net_generic(net, grace_net_id); |
62 | 69 | ||
63 | return !list_empty(&ln->grace_list); | 70 | return !list_empty(grace_list); |
64 | } | 71 | } |
65 | EXPORT_SYMBOL_GPL(locks_in_grace); | 72 | EXPORT_SYMBOL_GPL(locks_in_grace); |
73 | |||
74 | static int __net_init | ||
75 | grace_init_net(struct net *net) | ||
76 | { | ||
77 | struct list_head *grace_list = net_generic(net, grace_net_id); | ||
78 | |||
79 | INIT_LIST_HEAD(grace_list); | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | static void __net_exit | ||
84 | grace_exit_net(struct net *net) | ||
85 | { | ||
86 | struct list_head *grace_list = net_generic(net, grace_net_id); | ||
87 | |||
88 | BUG_ON(!list_empty(grace_list)); | ||
89 | } | ||
90 | |||
91 | static struct pernet_operations grace_net_ops = { | ||
92 | .init = grace_init_net, | ||
93 | .exit = grace_exit_net, | ||
94 | .id = &grace_net_id, | ||
95 | .size = sizeof(struct list_head), | ||
96 | }; | ||
97 | |||
98 | static int __init | ||
99 | init_grace(void) | ||
100 | { | ||
101 | return register_pernet_subsys(&grace_net_ops); | ||
102 | } | ||
103 | |||
104 | static void __exit | ||
105 | exit_grace(void) | ||
106 | { | ||
107 | unregister_pernet_subsys(&grace_net_ops); | ||
108 | } | ||
109 | |||
110 | MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>"); | ||
111 | MODULE_LICENSE("GPL"); | ||
112 | module_init(init_grace) | ||
113 | module_exit(exit_grace) | ||
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f3586b645d7d..73395156bdb4 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -71,6 +71,7 @@ config NFSD_V4 | |||
71 | select FS_POSIX_ACL | 71 | select FS_POSIX_ACL |
72 | select SUNRPC_GSS | 72 | select SUNRPC_GSS |
73 | select CRYPTO | 73 | select CRYPTO |
74 | select GRACE_PERIOD | ||
74 | help | 75 | help |
75 | This option enables support in your system's NFS server for | 76 | This option enables support in your system's NFS server for |
76 | version 4 of the NFS protocol (RFC 3530). | 77 | version 4 of the NFS protocol (RFC 3530). |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index e0be57b0f79b..ed2b1151b171 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -49,12 +49,6 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason); | |||
49 | 49 | ||
50 | /* Index of predefined Linux callback client operations */ | 50 | /* Index of predefined Linux callback client operations */ |
51 | 51 | ||
52 | enum { | ||
53 | NFSPROC4_CLNT_CB_NULL = 0, | ||
54 | NFSPROC4_CLNT_CB_RECALL, | ||
55 | NFSPROC4_CLNT_CB_SEQUENCE, | ||
56 | }; | ||
57 | |||
58 | struct nfs4_cb_compound_hdr { | 52 | struct nfs4_cb_compound_hdr { |
59 | /* args */ | 53 | /* args */ |
60 | u32 ident; /* minorversion 0 only */ | 54 | u32 ident; /* minorversion 0 only */ |
@@ -494,7 +488,7 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
494 | static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, | 488 | static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, |
495 | const struct nfsd4_callback *cb) | 489 | const struct nfsd4_callback *cb) |
496 | { | 490 | { |
497 | const struct nfs4_delegation *args = cb->cb_op; | 491 | const struct nfs4_delegation *dp = cb_to_delegation(cb); |
498 | struct nfs4_cb_compound_hdr hdr = { | 492 | struct nfs4_cb_compound_hdr hdr = { |
499 | .ident = cb->cb_clp->cl_cb_ident, | 493 | .ident = cb->cb_clp->cl_cb_ident, |
500 | .minorversion = cb->cb_minorversion, | 494 | .minorversion = cb->cb_minorversion, |
@@ -502,7 +496,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
502 | 496 | ||
503 | encode_cb_compound4args(xdr, &hdr); | 497 | encode_cb_compound4args(xdr, &hdr); |
504 | encode_cb_sequence4args(xdr, cb, &hdr); | 498 | encode_cb_sequence4args(xdr, cb, &hdr); |
505 | encode_cb_recall4args(xdr, args, &hdr); | 499 | encode_cb_recall4args(xdr, dp, &hdr); |
506 | encode_cb_nops(&hdr); | 500 | encode_cb_nops(&hdr); |
507 | } | 501 | } |
508 | 502 | ||
@@ -746,27 +740,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { | |||
746 | 740 | ||
747 | static struct workqueue_struct *callback_wq; | 741 | static struct workqueue_struct *callback_wq; |
748 | 742 | ||
749 | static void run_nfsd4_cb(struct nfsd4_callback *cb) | ||
750 | { | ||
751 | queue_work(callback_wq, &cb->cb_work); | ||
752 | } | ||
753 | |||
754 | static void do_probe_callback(struct nfs4_client *clp) | ||
755 | { | ||
756 | struct nfsd4_callback *cb = &clp->cl_cb_null; | ||
757 | |||
758 | cb->cb_op = NULL; | ||
759 | cb->cb_clp = clp; | ||
760 | |||
761 | cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; | ||
762 | cb->cb_msg.rpc_argp = NULL; | ||
763 | cb->cb_msg.rpc_resp = NULL; | ||
764 | |||
765 | cb->cb_ops = &nfsd4_cb_probe_ops; | ||
766 | |||
767 | run_nfsd4_cb(cb); | ||
768 | } | ||
769 | |||
770 | /* | 743 | /* |
771 | * Poke the callback thread to process any updates to the callback | 744 | * Poke the callback thread to process any updates to the callback |
772 | * parameters, and send a null probe. | 745 | * parameters, and send a null probe. |
@@ -775,7 +748,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp) | |||
775 | { | 748 | { |
776 | clp->cl_cb_state = NFSD4_CB_UNKNOWN; | 749 | clp->cl_cb_state = NFSD4_CB_UNKNOWN; |
777 | set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); | 750 | set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); |
778 | do_probe_callback(clp); | 751 | nfsd4_run_cb(&clp->cl_cb_null); |
779 | } | 752 | } |
780 | 753 | ||
781 | void nfsd4_probe_callback_sync(struct nfs4_client *clp) | 754 | void nfsd4_probe_callback_sync(struct nfs4_client *clp) |
@@ -847,23 +820,9 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) | |||
847 | rpc_wake_up_next(&clp->cl_cb_waitq); | 820 | rpc_wake_up_next(&clp->cl_cb_waitq); |
848 | dprintk("%s: freed slot, new seqid=%d\n", __func__, | 821 | dprintk("%s: freed slot, new seqid=%d\n", __func__, |
849 | clp->cl_cb_session->se_cb_seq_nr); | 822 | clp->cl_cb_session->se_cb_seq_nr); |
850 | |||
851 | /* We're done looking into the sequence information */ | ||
852 | task->tk_msg.rpc_resp = NULL; | ||
853 | } | 823 | } |
854 | } | ||
855 | |||
856 | |||
857 | static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | ||
858 | { | ||
859 | struct nfsd4_callback *cb = calldata; | ||
860 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | ||
861 | struct nfs4_client *clp = cb->cb_clp; | ||
862 | struct rpc_clnt *current_rpc_client = clp->cl_cb_client; | ||
863 | |||
864 | nfsd4_cb_done(task, calldata); | ||
865 | 824 | ||
866 | if (current_rpc_client != task->tk_client) { | 825 | if (clp->cl_cb_client != task->tk_client) { |
867 | /* We're shutting down or changing cl_cb_client; leave | 826 | /* We're shutting down or changing cl_cb_client; leave |
868 | * it to nfsd4_process_cb_update to restart the call if | 827 | * it to nfsd4_process_cb_update to restart the call if |
869 | * necessary. */ | 828 | * necessary. */ |
@@ -872,47 +831,42 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | |||
872 | 831 | ||
873 | if (cb->cb_done) | 832 | if (cb->cb_done) |
874 | return; | 833 | return; |
875 | switch (task->tk_status) { | 834 | |
835 | switch (cb->cb_ops->done(cb, task)) { | ||
876 | case 0: | 836 | case 0: |
877 | cb->cb_done = true; | 837 | task->tk_status = 0; |
838 | rpc_restart_call_prepare(task); | ||
878 | return; | 839 | return; |
879 | case -EBADHANDLE: | 840 | case 1: |
880 | case -NFS4ERR_BAD_STATEID: | ||
881 | /* Race: client probably got cb_recall | ||
882 | * before open reply granting delegation */ | ||
883 | break; | 841 | break; |
884 | default: | 842 | case -1: |
885 | /* Network partition? */ | 843 | /* Network partition? */ |
886 | nfsd4_mark_cb_down(clp, task->tk_status); | 844 | nfsd4_mark_cb_down(clp, task->tk_status); |
845 | break; | ||
846 | default: | ||
847 | BUG(); | ||
887 | } | 848 | } |
888 | if (dp->dl_retries--) { | ||
889 | rpc_delay(task, 2*HZ); | ||
890 | task->tk_status = 0; | ||
891 | rpc_restart_call_prepare(task); | ||
892 | return; | ||
893 | } | ||
894 | nfsd4_mark_cb_down(clp, task->tk_status); | ||
895 | cb->cb_done = true; | 849 | cb->cb_done = true; |
896 | } | 850 | } |
897 | 851 | ||
898 | static void nfsd4_cb_recall_release(void *calldata) | 852 | static void nfsd4_cb_release(void *calldata) |
899 | { | 853 | { |
900 | struct nfsd4_callback *cb = calldata; | 854 | struct nfsd4_callback *cb = calldata; |
901 | struct nfs4_client *clp = cb->cb_clp; | 855 | struct nfs4_client *clp = cb->cb_clp; |
902 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | ||
903 | 856 | ||
904 | if (cb->cb_done) { | 857 | if (cb->cb_done) { |
905 | spin_lock(&clp->cl_lock); | 858 | spin_lock(&clp->cl_lock); |
906 | list_del(&cb->cb_per_client); | 859 | list_del(&cb->cb_per_client); |
907 | spin_unlock(&clp->cl_lock); | 860 | spin_unlock(&clp->cl_lock); |
908 | nfs4_put_stid(&dp->dl_stid); | 861 | |
862 | cb->cb_ops->release(cb); | ||
909 | } | 863 | } |
910 | } | 864 | } |
911 | 865 | ||
912 | static const struct rpc_call_ops nfsd4_cb_recall_ops = { | 866 | static const struct rpc_call_ops nfsd4_cb_ops = { |
913 | .rpc_call_prepare = nfsd4_cb_prepare, | 867 | .rpc_call_prepare = nfsd4_cb_prepare, |
914 | .rpc_call_done = nfsd4_cb_recall_done, | 868 | .rpc_call_done = nfsd4_cb_done, |
915 | .rpc_release = nfsd4_cb_recall_release, | 869 | .rpc_release = nfsd4_cb_release, |
916 | }; | 870 | }; |
917 | 871 | ||
918 | int nfsd4_create_callback_queue(void) | 872 | int nfsd4_create_callback_queue(void) |
@@ -937,16 +891,10 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) | |||
937 | * instead, nfsd4_run_cb_null() will detect the killed | 891 | * instead, nfsd4_run_cb_null() will detect the killed |
938 | * client, destroy the rpc client, and stop: | 892 | * client, destroy the rpc client, and stop: |
939 | */ | 893 | */ |
940 | do_probe_callback(clp); | 894 | nfsd4_run_cb(&clp->cl_cb_null); |
941 | flush_workqueue(callback_wq); | 895 | flush_workqueue(callback_wq); |
942 | } | 896 | } |
943 | 897 | ||
944 | static void nfsd4_release_cb(struct nfsd4_callback *cb) | ||
945 | { | ||
946 | if (cb->cb_ops->rpc_release) | ||
947 | cb->cb_ops->rpc_release(cb); | ||
948 | } | ||
949 | |||
950 | /* requires cl_lock: */ | 898 | /* requires cl_lock: */ |
951 | static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) | 899 | static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) |
952 | { | 900 | { |
@@ -1009,63 +957,49 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) | |||
1009 | } | 957 | } |
1010 | /* Yay, the callback channel's back! Restart any callbacks: */ | 958 | /* Yay, the callback channel's back! Restart any callbacks: */ |
1011 | list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) | 959 | list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) |
1012 | run_nfsd4_cb(cb); | 960 | queue_work(callback_wq, &cb->cb_work); |
1013 | } | 961 | } |
1014 | 962 | ||
1015 | static void | 963 | static void |
1016 | nfsd4_run_callback_rpc(struct nfsd4_callback *cb) | 964 | nfsd4_run_cb_work(struct work_struct *work) |
1017 | { | 965 | { |
966 | struct nfsd4_callback *cb = | ||
967 | container_of(work, struct nfsd4_callback, cb_work); | ||
1018 | struct nfs4_client *clp = cb->cb_clp; | 968 | struct nfs4_client *clp = cb->cb_clp; |
1019 | struct rpc_clnt *clnt; | 969 | struct rpc_clnt *clnt; |
1020 | 970 | ||
971 | if (cb->cb_ops && cb->cb_ops->prepare) | ||
972 | cb->cb_ops->prepare(cb); | ||
973 | |||
1021 | if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) | 974 | if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) |
1022 | nfsd4_process_cb_update(cb); | 975 | nfsd4_process_cb_update(cb); |
1023 | 976 | ||
1024 | clnt = clp->cl_cb_client; | 977 | clnt = clp->cl_cb_client; |
1025 | if (!clnt) { | 978 | if (!clnt) { |
1026 | /* Callback channel broken, or client killed; give up: */ | 979 | /* Callback channel broken, or client killed; give up: */ |
1027 | nfsd4_release_cb(cb); | 980 | if (cb->cb_ops && cb->cb_ops->release) |
981 | cb->cb_ops->release(cb); | ||
1028 | return; | 982 | return; |
1029 | } | 983 | } |
1030 | cb->cb_msg.rpc_cred = clp->cl_cb_cred; | 984 | cb->cb_msg.rpc_cred = clp->cl_cb_cred; |
1031 | rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, | 985 | rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, |
1032 | cb->cb_ops, cb); | 986 | cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); |
1033 | } | 987 | } |
1034 | 988 | ||
1035 | void | 989 | void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, |
1036 | nfsd4_run_cb_null(struct work_struct *w) | 990 | struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op) |
1037 | { | 991 | { |
1038 | struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, | ||
1039 | cb_work); | ||
1040 | nfsd4_run_callback_rpc(cb); | ||
1041 | } | ||
1042 | |||
1043 | void | ||
1044 | nfsd4_run_cb_recall(struct work_struct *w) | ||
1045 | { | ||
1046 | struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, | ||
1047 | cb_work); | ||
1048 | |||
1049 | nfsd4_prepare_cb_recall(cb->cb_op); | ||
1050 | nfsd4_run_callback_rpc(cb); | ||
1051 | } | ||
1052 | |||
1053 | void nfsd4_cb_recall(struct nfs4_delegation *dp) | ||
1054 | { | ||
1055 | struct nfsd4_callback *cb = &dp->dl_recall; | ||
1056 | struct nfs4_client *clp = dp->dl_stid.sc_client; | ||
1057 | |||
1058 | dp->dl_retries = 1; | ||
1059 | cb->cb_op = dp; | ||
1060 | cb->cb_clp = clp; | 992 | cb->cb_clp = clp; |
1061 | cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; | 993 | cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op]; |
1062 | cb->cb_msg.rpc_argp = cb; | 994 | cb->cb_msg.rpc_argp = cb; |
1063 | cb->cb_msg.rpc_resp = cb; | 995 | cb->cb_msg.rpc_resp = cb; |
1064 | 996 | cb->cb_ops = ops; | |
1065 | cb->cb_ops = &nfsd4_cb_recall_ops; | 997 | INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); |
1066 | |||
1067 | INIT_LIST_HEAD(&cb->cb_per_client); | 998 | INIT_LIST_HEAD(&cb->cb_per_client); |
1068 | cb->cb_done = true; | 999 | cb->cb_done = true; |
1000 | } | ||
1069 | 1001 | ||
1070 | run_nfsd4_cb(&dp->dl_recall); | 1002 | void nfsd4_run_cb(struct nfsd4_callback *cb) |
1003 | { | ||
1004 | queue_work(callback_wq, &cb->cb_work); | ||
1071 | } | 1005 | } |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index a0ab0a847d69..e1b3d3d472da 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -215,7 +215,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) | |||
215 | memset(&ent, 0, sizeof(ent)); | 215 | memset(&ent, 0, sizeof(ent)); |
216 | 216 | ||
217 | /* Authentication name */ | 217 | /* Authentication name */ |
218 | if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) | 218 | len = qword_get(&buf, buf1, PAGE_SIZE); |
219 | if (len <= 0 || len >= IDMAP_NAMESZ) | ||
219 | goto out; | 220 | goto out; |
220 | memcpy(ent.authname, buf1, sizeof(ent.authname)); | 221 | memcpy(ent.authname, buf1, sizeof(ent.authname)); |
221 | 222 | ||
@@ -245,12 +246,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) | |||
245 | /* Name */ | 246 | /* Name */ |
246 | error = -EINVAL; | 247 | error = -EINVAL; |
247 | len = qword_get(&buf, buf1, PAGE_SIZE); | 248 | len = qword_get(&buf, buf1, PAGE_SIZE); |
248 | if (len < 0) | 249 | if (len < 0 || len >= IDMAP_NAMESZ) |
249 | goto out; | 250 | goto out; |
250 | if (len == 0) | 251 | if (len == 0) |
251 | set_bit(CACHE_NEGATIVE, &ent.h.flags); | 252 | set_bit(CACHE_NEGATIVE, &ent.h.flags); |
252 | else if (len >= IDMAP_NAMESZ) | ||
253 | goto out; | ||
254 | else | 253 | else |
255 | memcpy(ent.name, buf1, sizeof(ent.name)); | 254 | memcpy(ent.name, buf1, sizeof(ent.name)); |
256 | error = -ENOMEM; | 255 | error = -ENOMEM; |
@@ -259,15 +258,12 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) | |||
259 | goto out; | 258 | goto out; |
260 | 259 | ||
261 | cache_put(&res->h, cd); | 260 | cache_put(&res->h, cd); |
262 | |||
263 | error = 0; | 261 | error = 0; |
264 | out: | 262 | out: |
265 | kfree(buf1); | 263 | kfree(buf1); |
266 | |||
267 | return error; | 264 | return error; |
268 | } | 265 | } |
269 | 266 | ||
270 | |||
271 | static struct ent * | 267 | static struct ent * |
272 | idtoname_lookup(struct cache_detail *cd, struct ent *item) | 268 | idtoname_lookup(struct cache_detail *cd, struct ent *item) |
273 | { | 269 | { |
@@ -368,7 +364,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) | |||
368 | { | 364 | { |
369 | struct ent ent, *res; | 365 | struct ent ent, *res; |
370 | char *buf1; | 366 | char *buf1; |
371 | int error = -EINVAL; | 367 | int len, error = -EINVAL; |
372 | 368 | ||
373 | if (buf[buflen - 1] != '\n') | 369 | if (buf[buflen - 1] != '\n') |
374 | return (-EINVAL); | 370 | return (-EINVAL); |
@@ -381,7 +377,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) | |||
381 | memset(&ent, 0, sizeof(ent)); | 377 | memset(&ent, 0, sizeof(ent)); |
382 | 378 | ||
383 | /* Authentication name */ | 379 | /* Authentication name */ |
384 | if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) | 380 | len = qword_get(&buf, buf1, PAGE_SIZE); |
381 | if (len <= 0 || len >= IDMAP_NAMESZ) | ||
385 | goto out; | 382 | goto out; |
386 | memcpy(ent.authname, buf1, sizeof(ent.authname)); | 383 | memcpy(ent.authname, buf1, sizeof(ent.authname)); |
387 | 384 | ||
@@ -392,8 +389,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) | |||
392 | IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; | 389 | IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; |
393 | 390 | ||
394 | /* Name */ | 391 | /* Name */ |
395 | error = qword_get(&buf, buf1, PAGE_SIZE); | 392 | len = qword_get(&buf, buf1, PAGE_SIZE); |
396 | if (error <= 0 || error >= IDMAP_NAMESZ) | 393 | if (len <= 0 || len >= IDMAP_NAMESZ) |
397 | goto out; | 394 | goto out; |
398 | memcpy(ent.name, buf1, sizeof(ent.name)); | 395 | memcpy(ent.name, buf1, sizeof(ent.name)); |
399 | 396 | ||
@@ -421,7 +418,6 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) | |||
421 | error = 0; | 418 | error = 0; |
422 | out: | 419 | out: |
423 | kfree(buf1); | 420 | kfree(buf1); |
424 | |||
425 | return (error); | 421 | return (error); |
426 | } | 422 | } |
427 | 423 | ||
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5e0dc528a0e8..cdeb3cfd6f32 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -1013,6 +1013,49 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1013 | return status; | 1013 | return status; |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | static __be32 | ||
1017 | nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | ||
1018 | struct nfsd4_seek *seek) | ||
1019 | { | ||
1020 | int whence; | ||
1021 | __be32 status; | ||
1022 | struct file *file; | ||
1023 | |||
1024 | status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, | ||
1025 | &seek->seek_stateid, | ||
1026 | RD_STATE, &file); | ||
1027 | if (status) { | ||
1028 | dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); | ||
1029 | return status; | ||
1030 | } | ||
1031 | |||
1032 | switch (seek->seek_whence) { | ||
1033 | case NFS4_CONTENT_DATA: | ||
1034 | whence = SEEK_DATA; | ||
1035 | break; | ||
1036 | case NFS4_CONTENT_HOLE: | ||
1037 | whence = SEEK_HOLE; | ||
1038 | break; | ||
1039 | default: | ||
1040 | status = nfserr_union_notsupp; | ||
1041 | goto out; | ||
1042 | } | ||
1043 | |||
1044 | /* | ||
1045 | * Note: This call does change file->f_pos, but nothing in NFSD | ||
1046 | * should ever file->f_pos. | ||
1047 | */ | ||
1048 | seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence); | ||
1049 | if (seek->seek_pos < 0) | ||
1050 | status = nfserrno(seek->seek_pos); | ||
1051 | else if (seek->seek_pos >= i_size_read(file_inode(file))) | ||
1052 | seek->seek_eof = true; | ||
1053 | |||
1054 | out: | ||
1055 | fput(file); | ||
1056 | return status; | ||
1057 | } | ||
1058 | |||
1016 | /* This routine never returns NFS_OK! If there are no other errors, it | 1059 | /* This routine never returns NFS_OK! If there are no other errors, it |
1017 | * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the | 1060 | * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the |
1018 | * attributes matched. VERIFY is implemented by mapping NFSERR_SAME | 1061 | * attributes matched. VERIFY is implemented by mapping NFSERR_SAME |
@@ -1881,6 +1924,12 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1881 | .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, | 1924 | .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, |
1882 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, | 1925 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, |
1883 | }, | 1926 | }, |
1927 | |||
1928 | /* NFSv4.2 operations */ | ||
1929 | [OP_SEEK] = { | ||
1930 | .op_func = (nfsd4op_func)nfsd4_seek, | ||
1931 | .op_name = "OP_SEEK", | ||
1932 | }, | ||
1884 | }; | 1933 | }; |
1885 | 1934 | ||
1886 | int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) | 1935 | int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 9c271f42604a..ea95a2bc21b5 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -58,7 +58,7 @@ struct nfsd4_client_tracking_ops { | |||
58 | void (*create)(struct nfs4_client *); | 58 | void (*create)(struct nfs4_client *); |
59 | void (*remove)(struct nfs4_client *); | 59 | void (*remove)(struct nfs4_client *); |
60 | int (*check)(struct nfs4_client *); | 60 | int (*check)(struct nfs4_client *); |
61 | void (*grace_done)(struct nfsd_net *, time_t); | 61 | void (*grace_done)(struct nfsd_net *); |
62 | }; | 62 | }; |
63 | 63 | ||
64 | /* Globals */ | 64 | /* Globals */ |
@@ -188,7 +188,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) | |||
188 | 188 | ||
189 | status = mnt_want_write_file(nn->rec_file); | 189 | status = mnt_want_write_file(nn->rec_file); |
190 | if (status) | 190 | if (status) |
191 | return; | 191 | goto out_creds; |
192 | 192 | ||
193 | dir = nn->rec_file->f_path.dentry; | 193 | dir = nn->rec_file->f_path.dentry; |
194 | /* lock the parent */ | 194 | /* lock the parent */ |
@@ -228,6 +228,7 @@ out_unlock: | |||
228 | user_recovery_dirname); | 228 | user_recovery_dirname); |
229 | } | 229 | } |
230 | mnt_drop_write_file(nn->rec_file); | 230 | mnt_drop_write_file(nn->rec_file); |
231 | out_creds: | ||
231 | nfs4_reset_creds(original_cred); | 232 | nfs4_reset_creds(original_cred); |
232 | } | 233 | } |
233 | 234 | ||
@@ -392,7 +393,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) | |||
392 | } | 393 | } |
393 | 394 | ||
394 | static void | 395 | static void |
395 | nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time) | 396 | nfsd4_recdir_purge_old(struct nfsd_net *nn) |
396 | { | 397 | { |
397 | int status; | 398 | int status; |
398 | 399 | ||
@@ -479,6 +480,16 @@ nfsd4_init_recdir(struct net *net) | |||
479 | return status; | 480 | return status; |
480 | } | 481 | } |
481 | 482 | ||
483 | static void | ||
484 | nfsd4_shutdown_recdir(struct net *net) | ||
485 | { | ||
486 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | ||
487 | |||
488 | if (!nn->rec_file) | ||
489 | return; | ||
490 | fput(nn->rec_file); | ||
491 | nn->rec_file = NULL; | ||
492 | } | ||
482 | 493 | ||
483 | static int | 494 | static int |
484 | nfs4_legacy_state_init(struct net *net) | 495 | nfs4_legacy_state_init(struct net *net) |
@@ -512,10 +523,13 @@ nfsd4_load_reboot_recovery_data(struct net *net) | |||
512 | int status; | 523 | int status; |
513 | 524 | ||
514 | status = nfsd4_init_recdir(net); | 525 | status = nfsd4_init_recdir(net); |
515 | if (!status) | ||
516 | status = nfsd4_recdir_load(net); | ||
517 | if (status) | 526 | if (status) |
518 | printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); | 527 | return status; |
528 | |||
529 | status = nfsd4_recdir_load(net); | ||
530 | if (status) | ||
531 | nfsd4_shutdown_recdir(net); | ||
532 | |||
519 | return status; | 533 | return status; |
520 | } | 534 | } |
521 | 535 | ||
@@ -546,21 +560,12 @@ err: | |||
546 | } | 560 | } |
547 | 561 | ||
548 | static void | 562 | static void |
549 | nfsd4_shutdown_recdir(struct nfsd_net *nn) | ||
550 | { | ||
551 | if (!nn->rec_file) | ||
552 | return; | ||
553 | fput(nn->rec_file); | ||
554 | nn->rec_file = NULL; | ||
555 | } | ||
556 | |||
557 | static void | ||
558 | nfsd4_legacy_tracking_exit(struct net *net) | 563 | nfsd4_legacy_tracking_exit(struct net *net) |
559 | { | 564 | { |
560 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | 565 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); |
561 | 566 | ||
562 | nfs4_release_reclaim(nn); | 567 | nfs4_release_reclaim(nn); |
563 | nfsd4_shutdown_recdir(nn); | 568 | nfsd4_shutdown_recdir(net); |
564 | nfs4_legacy_state_shutdown(net); | 569 | nfs4_legacy_state_shutdown(net); |
565 | } | 570 | } |
566 | 571 | ||
@@ -1016,7 +1021,7 @@ nfsd4_cld_check(struct nfs4_client *clp) | |||
1016 | } | 1021 | } |
1017 | 1022 | ||
1018 | static void | 1023 | static void |
1019 | nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) | 1024 | nfsd4_cld_grace_done(struct nfsd_net *nn) |
1020 | { | 1025 | { |
1021 | int ret; | 1026 | int ret; |
1022 | struct cld_upcall *cup; | 1027 | struct cld_upcall *cup; |
@@ -1029,7 +1034,7 @@ nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) | |||
1029 | } | 1034 | } |
1030 | 1035 | ||
1031 | cup->cu_msg.cm_cmd = Cld_GraceDone; | 1036 | cup->cu_msg.cm_cmd = Cld_GraceDone; |
1032 | cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time; | 1037 | cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time; |
1033 | ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); | 1038 | ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); |
1034 | if (!ret) | 1039 | if (!ret) |
1035 | ret = cup->cu_msg.cm_status; | 1040 | ret = cup->cu_msg.cm_status; |
@@ -1062,6 +1067,8 @@ MODULE_PARM_DESC(cltrack_legacy_disable, | |||
1062 | 1067 | ||
1063 | #define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" | 1068 | #define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" |
1064 | #define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" | 1069 | #define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" |
1070 | #define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION=" | ||
1071 | #define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START=" | ||
1065 | 1072 | ||
1066 | static char * | 1073 | static char * |
1067 | nfsd4_cltrack_legacy_topdir(void) | 1074 | nfsd4_cltrack_legacy_topdir(void) |
@@ -1126,10 +1133,60 @@ nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name) | |||
1126 | return result; | 1133 | return result; |
1127 | } | 1134 | } |
1128 | 1135 | ||
1136 | static char * | ||
1137 | nfsd4_cltrack_client_has_session(struct nfs4_client *clp) | ||
1138 | { | ||
1139 | int copied; | ||
1140 | size_t len; | ||
1141 | char *result; | ||
1142 | |||
1143 | /* prefix + Y/N character + terminating NULL */ | ||
1144 | len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1; | ||
1145 | |||
1146 | result = kmalloc(len, GFP_KERNEL); | ||
1147 | if (!result) | ||
1148 | return result; | ||
1149 | |||
1150 | copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c", | ||
1151 | clp->cl_minorversion ? 'Y' : 'N'); | ||
1152 | if (copied >= len) { | ||
1153 | /* just return nothing if output was truncated */ | ||
1154 | kfree(result); | ||
1155 | return NULL; | ||
1156 | } | ||
1157 | |||
1158 | return result; | ||
1159 | } | ||
1160 | |||
1161 | static char * | ||
1162 | nfsd4_cltrack_grace_start(time_t grace_start) | ||
1163 | { | ||
1164 | int copied; | ||
1165 | size_t len; | ||
1166 | char *result; | ||
1167 | |||
1168 | /* prefix + max width of int64_t string + terminating NULL */ | ||
1169 | len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1; | ||
1170 | |||
1171 | result = kmalloc(len, GFP_KERNEL); | ||
1172 | if (!result) | ||
1173 | return result; | ||
1174 | |||
1175 | copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld", | ||
1176 | grace_start); | ||
1177 | if (copied >= len) { | ||
1178 | /* just return nothing if output was truncated */ | ||
1179 | kfree(result); | ||
1180 | return NULL; | ||
1181 | } | ||
1182 | |||
1183 | return result; | ||
1184 | } | ||
1185 | |||
1129 | static int | 1186 | static int |
1130 | nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) | 1187 | nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1) |
1131 | { | 1188 | { |
1132 | char *envp[2]; | 1189 | char *envp[3]; |
1133 | char *argv[4]; | 1190 | char *argv[4]; |
1134 | int ret; | 1191 | int ret; |
1135 | 1192 | ||
@@ -1140,10 +1197,12 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) | |||
1140 | 1197 | ||
1141 | dprintk("%s: cmd: %s\n", __func__, cmd); | 1198 | dprintk("%s: cmd: %s\n", __func__, cmd); |
1142 | dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); | 1199 | dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); |
1143 | dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)"); | 1200 | dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)"); |
1201 | dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)"); | ||
1144 | 1202 | ||
1145 | envp[0] = legacy; | 1203 | envp[0] = env0; |
1146 | envp[1] = NULL; | 1204 | envp[1] = env1; |
1205 | envp[2] = NULL; | ||
1147 | 1206 | ||
1148 | argv[0] = (char *)cltrack_prog; | 1207 | argv[0] = (char *)cltrack_prog; |
1149 | argv[1] = cmd; | 1208 | argv[1] = cmd; |
@@ -1187,28 +1246,78 @@ bin_to_hex_dup(const unsigned char *src, int srclen) | |||
1187 | } | 1246 | } |
1188 | 1247 | ||
1189 | static int | 1248 | static int |
1190 | nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) | 1249 | nfsd4_umh_cltrack_init(struct net *net) |
1191 | { | 1250 | { |
1251 | int ret; | ||
1252 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | ||
1253 | char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time); | ||
1254 | |||
1192 | /* XXX: The usermode helper s not working in container yet. */ | 1255 | /* XXX: The usermode helper s not working in container yet. */ |
1193 | if (net != &init_net) { | 1256 | if (net != &init_net) { |
1194 | WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " | 1257 | WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " |
1195 | "tracking in a container!\n"); | 1258 | "tracking in a container!\n"); |
1196 | return -EINVAL; | 1259 | return -EINVAL; |
1197 | } | 1260 | } |
1198 | return nfsd4_umh_cltrack_upcall("init", NULL, NULL); | 1261 | |
1262 | ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL); | ||
1263 | kfree(grace_start); | ||
1264 | return ret; | ||
1265 | } | ||
1266 | |||
1267 | static void | ||
1268 | nfsd4_cltrack_upcall_lock(struct nfs4_client *clp) | ||
1269 | { | ||
1270 | wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK, | ||
1271 | TASK_UNINTERRUPTIBLE); | ||
1272 | } | ||
1273 | |||
1274 | static void | ||
1275 | nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp) | ||
1276 | { | ||
1277 | smp_mb__before_atomic(); | ||
1278 | clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags); | ||
1279 | smp_mb__after_atomic(); | ||
1280 | wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK); | ||
1199 | } | 1281 | } |
1200 | 1282 | ||
1201 | static void | 1283 | static void |
1202 | nfsd4_umh_cltrack_create(struct nfs4_client *clp) | 1284 | nfsd4_umh_cltrack_create(struct nfs4_client *clp) |
1203 | { | 1285 | { |
1204 | char *hexid; | 1286 | char *hexid, *has_session, *grace_start; |
1287 | struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); | ||
1288 | |||
1289 | /* | ||
1290 | * With v4.0 clients, there's little difference in outcome between a | ||
1291 | * create and check operation, and we can end up calling into this | ||
1292 | * function multiple times per client (once for each openowner). So, | ||
1293 | * for v4.0 clients skip upcalling once the client has been recorded | ||
1294 | * on stable storage. | ||
1295 | * | ||
1296 | * For v4.1+ clients, the outcome of the two operations is different, | ||
1297 | * so we must ensure that we upcall for the create operation. v4.1+ | ||
1298 | * clients call this on RECLAIM_COMPLETE though, so we should only end | ||
1299 | * up doing a single create upcall per client. | ||
1300 | */ | ||
1301 | if (clp->cl_minorversion == 0 && | ||
1302 | test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) | ||
1303 | return; | ||
1205 | 1304 | ||
1206 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); | 1305 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); |
1207 | if (!hexid) { | 1306 | if (!hexid) { |
1208 | dprintk("%s: can't allocate memory for upcall!\n", __func__); | 1307 | dprintk("%s: can't allocate memory for upcall!\n", __func__); |
1209 | return; | 1308 | return; |
1210 | } | 1309 | } |
1211 | nfsd4_umh_cltrack_upcall("create", hexid, NULL); | 1310 | |
1311 | has_session = nfsd4_cltrack_client_has_session(clp); | ||
1312 | grace_start = nfsd4_cltrack_grace_start(nn->boot_time); | ||
1313 | |||
1314 | nfsd4_cltrack_upcall_lock(clp); | ||
1315 | if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start)) | ||
1316 | set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); | ||
1317 | nfsd4_cltrack_upcall_unlock(clp); | ||
1318 | |||
1319 | kfree(has_session); | ||
1320 | kfree(grace_start); | ||
1212 | kfree(hexid); | 1321 | kfree(hexid); |
1213 | } | 1322 | } |
1214 | 1323 | ||
@@ -1217,12 +1326,21 @@ nfsd4_umh_cltrack_remove(struct nfs4_client *clp) | |||
1217 | { | 1326 | { |
1218 | char *hexid; | 1327 | char *hexid; |
1219 | 1328 | ||
1329 | if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) | ||
1330 | return; | ||
1331 | |||
1220 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); | 1332 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); |
1221 | if (!hexid) { | 1333 | if (!hexid) { |
1222 | dprintk("%s: can't allocate memory for upcall!\n", __func__); | 1334 | dprintk("%s: can't allocate memory for upcall!\n", __func__); |
1223 | return; | 1335 | return; |
1224 | } | 1336 | } |
1225 | nfsd4_umh_cltrack_upcall("remove", hexid, NULL); | 1337 | |
1338 | nfsd4_cltrack_upcall_lock(clp); | ||
1339 | if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) && | ||
1340 | nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0) | ||
1341 | clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); | ||
1342 | nfsd4_cltrack_upcall_unlock(clp); | ||
1343 | |||
1226 | kfree(hexid); | 1344 | kfree(hexid); |
1227 | } | 1345 | } |
1228 | 1346 | ||
@@ -1230,30 +1348,45 @@ static int | |||
1230 | nfsd4_umh_cltrack_check(struct nfs4_client *clp) | 1348 | nfsd4_umh_cltrack_check(struct nfs4_client *clp) |
1231 | { | 1349 | { |
1232 | int ret; | 1350 | int ret; |
1233 | char *hexid, *legacy; | 1351 | char *hexid, *has_session, *legacy; |
1352 | |||
1353 | if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) | ||
1354 | return 0; | ||
1234 | 1355 | ||
1235 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); | 1356 | hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); |
1236 | if (!hexid) { | 1357 | if (!hexid) { |
1237 | dprintk("%s: can't allocate memory for upcall!\n", __func__); | 1358 | dprintk("%s: can't allocate memory for upcall!\n", __func__); |
1238 | return -ENOMEM; | 1359 | return -ENOMEM; |
1239 | } | 1360 | } |
1361 | |||
1362 | has_session = nfsd4_cltrack_client_has_session(clp); | ||
1240 | legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); | 1363 | legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); |
1241 | ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy); | 1364 | |
1365 | nfsd4_cltrack_upcall_lock(clp); | ||
1366 | if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) { | ||
1367 | ret = 0; | ||
1368 | } else { | ||
1369 | ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy); | ||
1370 | if (ret == 0) | ||
1371 | set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); | ||
1372 | } | ||
1373 | nfsd4_cltrack_upcall_unlock(clp); | ||
1374 | kfree(has_session); | ||
1242 | kfree(legacy); | 1375 | kfree(legacy); |
1243 | kfree(hexid); | 1376 | kfree(hexid); |
1377 | |||
1244 | return ret; | 1378 | return ret; |
1245 | } | 1379 | } |
1246 | 1380 | ||
1247 | static void | 1381 | static void |
1248 | nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn, | 1382 | nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn) |
1249 | time_t boot_time) | ||
1250 | { | 1383 | { |
1251 | char *legacy; | 1384 | char *legacy; |
1252 | char timestr[22]; /* FIXME: better way to determine max size? */ | 1385 | char timestr[22]; /* FIXME: better way to determine max size? */ |
1253 | 1386 | ||
1254 | sprintf(timestr, "%ld", boot_time); | 1387 | sprintf(timestr, "%ld", nn->boot_time); |
1255 | legacy = nfsd4_cltrack_legacy_topdir(); | 1388 | legacy = nfsd4_cltrack_legacy_topdir(); |
1256 | nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy); | 1389 | nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL); |
1257 | kfree(legacy); | 1390 | kfree(legacy); |
1258 | } | 1391 | } |
1259 | 1392 | ||
@@ -1356,10 +1489,10 @@ nfsd4_client_record_check(struct nfs4_client *clp) | |||
1356 | } | 1489 | } |
1357 | 1490 | ||
1358 | void | 1491 | void |
1359 | nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time) | 1492 | nfsd4_record_grace_done(struct nfsd_net *nn) |
1360 | { | 1493 | { |
1361 | if (nn->client_tracking_ops) | 1494 | if (nn->client_tracking_ops) |
1362 | nn->client_tracking_ops->grace_done(nn, boot_time); | 1495 | nn->client_tracking_ops->grace_done(nn); |
1363 | } | 1496 | } |
1364 | 1497 | ||
1365 | static int | 1498 | static int |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d1b851548b7a..e9c3afe4b5d3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -96,6 +96,8 @@ static struct kmem_cache *deleg_slab; | |||
96 | 96 | ||
97 | static void free_session(struct nfsd4_session *); | 97 | static void free_session(struct nfsd4_session *); |
98 | 98 | ||
99 | static struct nfsd4_callback_ops nfsd4_cb_recall_ops; | ||
100 | |||
99 | static bool is_session_dead(struct nfsd4_session *ses) | 101 | static bool is_session_dead(struct nfsd4_session *ses) |
100 | { | 102 | { |
101 | return ses->se_flags & NFS4_SESSION_DEAD; | 103 | return ses->se_flags & NFS4_SESSION_DEAD; |
@@ -650,7 +652,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) | |||
650 | INIT_LIST_HEAD(&dp->dl_perclnt); | 652 | INIT_LIST_HEAD(&dp->dl_perclnt); |
651 | INIT_LIST_HEAD(&dp->dl_recall_lru); | 653 | INIT_LIST_HEAD(&dp->dl_recall_lru); |
652 | dp->dl_type = NFS4_OPEN_DELEGATE_READ; | 654 | dp->dl_type = NFS4_OPEN_DELEGATE_READ; |
653 | INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); | 655 | dp->dl_retries = 1; |
656 | nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, | ||
657 | &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); | ||
654 | return dp; | 658 | return dp; |
655 | out_dec: | 659 | out_dec: |
656 | atomic_long_dec(&num_delegations); | 660 | atomic_long_dec(&num_delegations); |
@@ -1870,7 +1874,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, | |||
1870 | free_client(clp); | 1874 | free_client(clp); |
1871 | return NULL; | 1875 | return NULL; |
1872 | } | 1876 | } |
1873 | INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null); | 1877 | nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); |
1874 | clp->cl_time = get_seconds(); | 1878 | clp->cl_time = get_seconds(); |
1875 | clear_bit(0, &clp->cl_cb_slot_busy); | 1879 | clear_bit(0, &clp->cl_cb_slot_busy); |
1876 | copy_verf(clp, verf); | 1880 | copy_verf(clp, verf); |
@@ -3355,8 +3359,9 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) | |||
3355 | return ret; | 3359 | return ret; |
3356 | } | 3360 | } |
3357 | 3361 | ||
3358 | void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) | 3362 | static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) |
3359 | { | 3363 | { |
3364 | struct nfs4_delegation *dp = cb_to_delegation(cb); | ||
3360 | struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, | 3365 | struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, |
3361 | nfsd_net_id); | 3366 | nfsd_net_id); |
3362 | 3367 | ||
@@ -3377,6 +3382,43 @@ void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) | |||
3377 | spin_unlock(&state_lock); | 3382 | spin_unlock(&state_lock); |
3378 | } | 3383 | } |
3379 | 3384 | ||
3385 | static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, | ||
3386 | struct rpc_task *task) | ||
3387 | { | ||
3388 | struct nfs4_delegation *dp = cb_to_delegation(cb); | ||
3389 | |||
3390 | switch (task->tk_status) { | ||
3391 | case 0: | ||
3392 | return 1; | ||
3393 | case -EBADHANDLE: | ||
3394 | case -NFS4ERR_BAD_STATEID: | ||
3395 | /* | ||
3396 | * Race: client probably got cb_recall before open reply | ||
3397 | * granting delegation. | ||
3398 | */ | ||
3399 | if (dp->dl_retries--) { | ||
3400 | rpc_delay(task, 2 * HZ); | ||
3401 | return 0; | ||
3402 | } | ||
3403 | /*FALLTHRU*/ | ||
3404 | default: | ||
3405 | return -1; | ||
3406 | } | ||
3407 | } | ||
3408 | |||
3409 | static void nfsd4_cb_recall_release(struct nfsd4_callback *cb) | ||
3410 | { | ||
3411 | struct nfs4_delegation *dp = cb_to_delegation(cb); | ||
3412 | |||
3413 | nfs4_put_stid(&dp->dl_stid); | ||
3414 | } | ||
3415 | |||
3416 | static struct nfsd4_callback_ops nfsd4_cb_recall_ops = { | ||
3417 | .prepare = nfsd4_cb_recall_prepare, | ||
3418 | .done = nfsd4_cb_recall_done, | ||
3419 | .release = nfsd4_cb_recall_release, | ||
3420 | }; | ||
3421 | |||
3380 | static void nfsd_break_one_deleg(struct nfs4_delegation *dp) | 3422 | static void nfsd_break_one_deleg(struct nfs4_delegation *dp) |
3381 | { | 3423 | { |
3382 | /* | 3424 | /* |
@@ -3387,7 +3429,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) | |||
3387 | * it's safe to take a reference. | 3429 | * it's safe to take a reference. |
3388 | */ | 3430 | */ |
3389 | atomic_inc(&dp->dl_stid.sc_count); | 3431 | atomic_inc(&dp->dl_stid.sc_count); |
3390 | nfsd4_cb_recall(dp); | 3432 | nfsd4_run_cb(&dp->dl_recall); |
3391 | } | 3433 | } |
3392 | 3434 | ||
3393 | /* Called from break_lease() with i_lock held. */ | 3435 | /* Called from break_lease() with i_lock held. */ |
@@ -4113,7 +4155,7 @@ out: | |||
4113 | return status; | 4155 | return status; |
4114 | } | 4156 | } |
4115 | 4157 | ||
4116 | static void | 4158 | void |
4117 | nfsd4_end_grace(struct nfsd_net *nn) | 4159 | nfsd4_end_grace(struct nfsd_net *nn) |
4118 | { | 4160 | { |
4119 | /* do nothing if grace period already ended */ | 4161 | /* do nothing if grace period already ended */ |
@@ -4122,14 +4164,28 @@ nfsd4_end_grace(struct nfsd_net *nn) | |||
4122 | 4164 | ||
4123 | dprintk("NFSD: end of grace period\n"); | 4165 | dprintk("NFSD: end of grace period\n"); |
4124 | nn->grace_ended = true; | 4166 | nn->grace_ended = true; |
4125 | nfsd4_record_grace_done(nn, nn->boot_time); | 4167 | /* |
4168 | * If the server goes down again right now, an NFSv4 | ||
4169 | * client will still be allowed to reclaim after it comes back up, | ||
4170 | * even if it hasn't yet had a chance to reclaim state this time. | ||
4171 | * | ||
4172 | */ | ||
4173 | nfsd4_record_grace_done(nn); | ||
4174 | /* | ||
4175 | * At this point, NFSv4 clients can still reclaim. But if the | ||
4176 | * server crashes, any that have not yet reclaimed will be out | ||
4177 | * of luck on the next boot. | ||
4178 | * | ||
4179 | * (NFSv4.1+ clients are considered to have reclaimed once they | ||
4180 | * call RECLAIM_COMPLETE. NFSv4.0 clients are considered to | ||
4181 | * have reclaimed after their first OPEN.) | ||
4182 | */ | ||
4126 | locks_end_grace(&nn->nfsd4_manager); | 4183 | locks_end_grace(&nn->nfsd4_manager); |
4127 | /* | 4184 | /* |
4128 | * Now that every NFSv4 client has had the chance to recover and | 4185 | * At this point, and once lockd and/or any other containers |
4129 | * to see the (possibly new, possibly shorter) lease time, we | 4186 | * exit their grace period, further reclaims will fail and |
4130 | * can safely set the next grace time to the current lease time: | 4187 | * regular locking can resume. |
4131 | */ | 4188 | */ |
4132 | nn->nfsd4_grace = nn->nfsd4_lease; | ||
4133 | } | 4189 | } |
4134 | 4190 | ||
4135 | static time_t | 4191 | static time_t |
@@ -5664,6 +5720,9 @@ nfs4_check_open_reclaim(clientid_t *clid, | |||
5664 | if (status) | 5720 | if (status) |
5665 | return nfserr_reclaim_bad; | 5721 | return nfserr_reclaim_bad; |
5666 | 5722 | ||
5723 | if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags)) | ||
5724 | return nfserr_no_grace; | ||
5725 | |||
5667 | if (nfsd4_client_record_check(cstate->clp)) | 5726 | if (nfsd4_client_record_check(cstate->clp)) |
5668 | return nfserr_reclaim_bad; | 5727 | return nfserr_reclaim_bad; |
5669 | 5728 | ||
@@ -6361,10 +6420,10 @@ nfs4_state_start_net(struct net *net) | |||
6361 | ret = nfs4_state_create_net(net); | 6420 | ret = nfs4_state_create_net(net); |
6362 | if (ret) | 6421 | if (ret) |
6363 | return ret; | 6422 | return ret; |
6364 | nfsd4_client_tracking_init(net); | ||
6365 | nn->boot_time = get_seconds(); | 6423 | nn->boot_time = get_seconds(); |
6366 | locks_start_grace(net, &nn->nfsd4_manager); | ||
6367 | nn->grace_ended = false; | 6424 | nn->grace_ended = false; |
6425 | locks_start_grace(net, &nn->nfsd4_manager); | ||
6426 | nfsd4_client_tracking_init(net); | ||
6368 | printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", | 6427 | printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", |
6369 | nn->nfsd4_grace, net); | 6428 | nn->nfsd4_grace, net); |
6370 | queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); | 6429 | queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e771a1a7c6f1..eeea7a90eb87 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -1514,6 +1514,22 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str | |||
1514 | } | 1514 | } |
1515 | 1515 | ||
1516 | static __be32 | 1516 | static __be32 |
1517 | nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) | ||
1518 | { | ||
1519 | DECODE_HEAD; | ||
1520 | |||
1521 | status = nfsd4_decode_stateid(argp, &seek->seek_stateid); | ||
1522 | if (status) | ||
1523 | return status; | ||
1524 | |||
1525 | READ_BUF(8 + 4); | ||
1526 | p = xdr_decode_hyper(p, &seek->seek_offset); | ||
1527 | seek->seek_whence = be32_to_cpup(p); | ||
1528 | |||
1529 | DECODE_TAIL; | ||
1530 | } | ||
1531 | |||
1532 | static __be32 | ||
1517 | nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) | 1533 | nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) |
1518 | { | 1534 | { |
1519 | return nfs_ok; | 1535 | return nfs_ok; |
@@ -1586,6 +1602,20 @@ static nfsd4_dec nfsd4_dec_ops[] = { | |||
1586 | [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, | 1602 | [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, |
1587 | [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, | 1603 | [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, |
1588 | [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, | 1604 | [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, |
1605 | |||
1606 | /* new operations for NFSv4.2 */ | ||
1607 | [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1608 | [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1609 | [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1610 | [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1611 | [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1612 | [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1613 | [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1614 | [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1615 | [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1616 | [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1617 | [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, | ||
1618 | [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, | ||
1589 | }; | 1619 | }; |
1590 | 1620 | ||
1591 | static inline bool | 1621 | static inline bool |
@@ -2658,6 +2688,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | |||
2658 | struct xdr_stream *xdr = cd->xdr; | 2688 | struct xdr_stream *xdr = cd->xdr; |
2659 | int start_offset = xdr->buf->len; | 2689 | int start_offset = xdr->buf->len; |
2660 | int cookie_offset; | 2690 | int cookie_offset; |
2691 | u32 name_and_cookie; | ||
2661 | int entry_bytes; | 2692 | int entry_bytes; |
2662 | __be32 nfserr = nfserr_toosmall; | 2693 | __be32 nfserr = nfserr_toosmall; |
2663 | __be64 wire_offset; | 2694 | __be64 wire_offset; |
@@ -2719,7 +2750,14 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | |||
2719 | cd->rd_maxcount -= entry_bytes; | 2750 | cd->rd_maxcount -= entry_bytes; |
2720 | if (!cd->rd_dircount) | 2751 | if (!cd->rd_dircount) |
2721 | goto fail; | 2752 | goto fail; |
2722 | cd->rd_dircount--; | 2753 | /* |
2754 | * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so | ||
2755 | * let's always let through the first entry, at least: | ||
2756 | */ | ||
2757 | name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8; | ||
2758 | if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) | ||
2759 | goto fail; | ||
2760 | cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); | ||
2723 | cd->cookie_offset = cookie_offset; | 2761 | cd->cookie_offset = cookie_offset; |
2724 | skip_entry: | 2762 | skip_entry: |
2725 | cd->common.err = nfs_ok; | 2763 | cd->common.err = nfs_ok; |
@@ -3097,7 +3135,8 @@ static __be32 nfsd4_encode_splice_read( | |||
3097 | 3135 | ||
3098 | buf->page_len = maxcount; | 3136 | buf->page_len = maxcount; |
3099 | buf->len += maxcount; | 3137 | buf->len += maxcount; |
3100 | xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; | 3138 | xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1) |
3139 | / PAGE_SIZE; | ||
3101 | 3140 | ||
3102 | /* Use rest of head for padding and remaining ops: */ | 3141 | /* Use rest of head for padding and remaining ops: */ |
3103 | buf->tail[0].iov_base = xdr->p; | 3142 | buf->tail[0].iov_base = xdr->p; |
@@ -3322,6 +3361,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 | |||
3322 | } | 3361 | } |
3323 | maxcount = min_t(int, maxcount-16, bytes_left); | 3362 | maxcount = min_t(int, maxcount-16, bytes_left); |
3324 | 3363 | ||
3364 | /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */ | ||
3365 | if (!readdir->rd_dircount) | ||
3366 | readdir->rd_dircount = INT_MAX; | ||
3367 | |||
3325 | readdir->xdr = xdr; | 3368 | readdir->xdr = xdr; |
3326 | readdir->rd_maxcount = maxcount; | 3369 | readdir->rd_maxcount = maxcount; |
3327 | readdir->common.err = 0; | 3370 | readdir->common.err = 0; |
@@ -3752,6 +3795,22 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
3752 | } | 3795 | } |
3753 | 3796 | ||
3754 | static __be32 | 3797 | static __be32 |
3798 | nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
3799 | struct nfsd4_seek *seek) | ||
3800 | { | ||
3801 | __be32 *p; | ||
3802 | |||
3803 | if (nfserr) | ||
3804 | return nfserr; | ||
3805 | |||
3806 | p = xdr_reserve_space(&resp->xdr, 4 + 8); | ||
3807 | *p++ = cpu_to_be32(seek->seek_eof); | ||
3808 | p = xdr_encode_hyper(p, seek->seek_pos); | ||
3809 | |||
3810 | return nfserr; | ||
3811 | } | ||
3812 | |||
3813 | static __be32 | ||
3755 | nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) | 3814 | nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) |
3756 | { | 3815 | { |
3757 | return nfserr; | 3816 | return nfserr; |
@@ -3823,6 +3882,20 @@ static nfsd4_enc nfsd4_enc_ops[] = { | |||
3823 | [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, | 3882 | [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, |
3824 | [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, | 3883 | [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, |
3825 | [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, | 3884 | [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, |
3885 | |||
3886 | /* NFSv4.2 operations */ | ||
3887 | [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, | ||
3888 | [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop, | ||
3889 | [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop, | ||
3890 | [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, | ||
3891 | [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, | ||
3892 | [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, | ||
3893 | [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, | ||
3894 | [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, | ||
3895 | [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop, | ||
3896 | [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, | ||
3897 | [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, | ||
3898 | [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, | ||
3826 | }; | 3899 | }; |
3827 | 3900 | ||
3828 | /* | 3901 | /* |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 4e042105fb6e..ca73ca79a0ee 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -49,6 +49,7 @@ enum { | |||
49 | NFSD_Leasetime, | 49 | NFSD_Leasetime, |
50 | NFSD_Gracetime, | 50 | NFSD_Gracetime, |
51 | NFSD_RecoveryDir, | 51 | NFSD_RecoveryDir, |
52 | NFSD_V4EndGrace, | ||
52 | #endif | 53 | #endif |
53 | }; | 54 | }; |
54 | 55 | ||
@@ -68,6 +69,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size); | |||
68 | static ssize_t write_leasetime(struct file *file, char *buf, size_t size); | 69 | static ssize_t write_leasetime(struct file *file, char *buf, size_t size); |
69 | static ssize_t write_gracetime(struct file *file, char *buf, size_t size); | 70 | static ssize_t write_gracetime(struct file *file, char *buf, size_t size); |
70 | static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); | 71 | static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); |
72 | static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size); | ||
71 | #endif | 73 | #endif |
72 | 74 | ||
73 | static ssize_t (*write_op[])(struct file *, char *, size_t) = { | 75 | static ssize_t (*write_op[])(struct file *, char *, size_t) = { |
@@ -84,6 +86,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { | |||
84 | [NFSD_Leasetime] = write_leasetime, | 86 | [NFSD_Leasetime] = write_leasetime, |
85 | [NFSD_Gracetime] = write_gracetime, | 87 | [NFSD_Gracetime] = write_gracetime, |
86 | [NFSD_RecoveryDir] = write_recoverydir, | 88 | [NFSD_RecoveryDir] = write_recoverydir, |
89 | [NFSD_V4EndGrace] = write_v4_end_grace, | ||
87 | #endif | 90 | #endif |
88 | }; | 91 | }; |
89 | 92 | ||
@@ -1077,6 +1080,47 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) | |||
1077 | return rv; | 1080 | return rv; |
1078 | } | 1081 | } |
1079 | 1082 | ||
1083 | /** | ||
1084 | * write_v4_end_grace - release grace period for nfsd's v4.x lock manager | ||
1085 | * | ||
1086 | * Input: | ||
1087 | * buf: ignored | ||
1088 | * size: zero | ||
1089 | * OR | ||
1090 | * | ||
1091 | * Input: | ||
1092 | * buf: any value | ||
1093 | * size: non-zero length of C string in @buf | ||
1094 | * Output: | ||
1095 | * passed-in buffer filled with "Y" or "N" with a newline | ||
1096 | * and NULL-terminated C string. This indicates whether | ||
1097 | * the grace period has ended in the current net | ||
1098 | * namespace. Return code is the size in bytes of the | ||
1099 | * string. Writing a string that starts with 'Y', 'y', or | ||
1100 | * '1' to the file will end the grace period for nfsd's v4 | ||
1101 | * lock manager. | ||
1102 | */ | ||
1103 | static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) | ||
1104 | { | ||
1105 | struct net *net = file->f_dentry->d_sb->s_fs_info; | ||
1106 | struct nfsd_net *nn = net_generic(net, nfsd_net_id); | ||
1107 | |||
1108 | if (size > 0) { | ||
1109 | switch(buf[0]) { | ||
1110 | case 'Y': | ||
1111 | case 'y': | ||
1112 | case '1': | ||
1113 | nfsd4_end_grace(nn); | ||
1114 | break; | ||
1115 | default: | ||
1116 | return -EINVAL; | ||
1117 | } | ||
1118 | } | ||
1119 | |||
1120 | return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%c\n", | ||
1121 | nn->grace_ended ? 'Y' : 'N'); | ||
1122 | } | ||
1123 | |||
1080 | #endif | 1124 | #endif |
1081 | 1125 | ||
1082 | /*----------------------------------------------------------------------------*/ | 1126 | /*----------------------------------------------------------------------------*/ |
@@ -1110,6 +1154,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) | |||
1110 | [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, | 1154 | [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, |
1111 | [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, | 1155 | [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, |
1112 | [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, | 1156 | [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, |
1157 | [NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO}, | ||
1113 | #endif | 1158 | #endif |
1114 | /* last one */ {""} | 1159 | /* last one */ {""} |
1115 | }; | 1160 | }; |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index e883a5868be6..88026fc6a981 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -209,8 +209,10 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) | |||
209 | * fix that case easily. | 209 | * fix that case easily. |
210 | */ | 210 | */ |
211 | struct cred *new = prepare_creds(); | 211 | struct cred *new = prepare_creds(); |
212 | if (!new) | 212 | if (!new) { |
213 | return nfserrno(-ENOMEM); | 213 | error = nfserrno(-ENOMEM); |
214 | goto out; | ||
215 | } | ||
214 | new->cap_effective = | 216 | new->cap_effective = |
215 | cap_raise_nfsd_set(new->cap_effective, | 217 | cap_raise_nfsd_set(new->cap_effective, |
216 | new->cap_permitted); | 218 | new->cap_permitted); |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 64f291a25a8c..2712042a66b1 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -62,16 +62,21 @@ typedef struct { | |||
62 | (s)->si_generation | 62 | (s)->si_generation |
63 | 63 | ||
64 | struct nfsd4_callback { | 64 | struct nfsd4_callback { |
65 | void *cb_op; | ||
66 | struct nfs4_client *cb_clp; | 65 | struct nfs4_client *cb_clp; |
67 | struct list_head cb_per_client; | 66 | struct list_head cb_per_client; |
68 | u32 cb_minorversion; | 67 | u32 cb_minorversion; |
69 | struct rpc_message cb_msg; | 68 | struct rpc_message cb_msg; |
70 | const struct rpc_call_ops *cb_ops; | 69 | struct nfsd4_callback_ops *cb_ops; |
71 | struct work_struct cb_work; | 70 | struct work_struct cb_work; |
72 | bool cb_done; | 71 | bool cb_done; |
73 | }; | 72 | }; |
74 | 73 | ||
74 | struct nfsd4_callback_ops { | ||
75 | void (*prepare)(struct nfsd4_callback *); | ||
76 | int (*done)(struct nfsd4_callback *, struct rpc_task *); | ||
77 | void (*release)(struct nfsd4_callback *); | ||
78 | }; | ||
79 | |||
75 | /* | 80 | /* |
76 | * A core object that represents a "common" stateid. These are generally | 81 | * A core object that represents a "common" stateid. These are generally |
77 | * embedded within the different (more specific) stateid objects and contain | 82 | * embedded within the different (more specific) stateid objects and contain |
@@ -127,6 +132,9 @@ struct nfs4_delegation { | |||
127 | struct nfsd4_callback dl_recall; | 132 | struct nfsd4_callback dl_recall; |
128 | }; | 133 | }; |
129 | 134 | ||
135 | #define cb_to_delegation(cb) \ | ||
136 | container_of(cb, struct nfs4_delegation, dl_recall) | ||
137 | |||
130 | /* client delegation callback info */ | 138 | /* client delegation callback info */ |
131 | struct nfs4_cb_conn { | 139 | struct nfs4_cb_conn { |
132 | /* SETCLIENTID info */ | 140 | /* SETCLIENTID info */ |
@@ -306,6 +314,7 @@ struct nfs4_client { | |||
306 | #define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ | 314 | #define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ |
307 | #define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ | 315 | #define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ |
308 | #define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ | 316 | #define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ |
317 | #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ | ||
309 | #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ | 318 | #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ |
310 | 1 << NFSD4_CLIENT_CB_KILL) | 319 | 1 << NFSD4_CLIENT_CB_KILL) |
311 | unsigned long cl_flags; | 320 | unsigned long cl_flags; |
@@ -516,6 +525,13 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) | |||
516 | #define RD_STATE 0x00000010 | 525 | #define RD_STATE 0x00000010 |
517 | #define WR_STATE 0x00000020 | 526 | #define WR_STATE 0x00000020 |
518 | 527 | ||
528 | enum nfsd4_cb_op { | ||
529 | NFSPROC4_CLNT_CB_NULL = 0, | ||
530 | NFSPROC4_CLNT_CB_RECALL, | ||
531 | NFSPROC4_CLNT_CB_SEQUENCE, | ||
532 | }; | ||
533 | |||
534 | |||
519 | struct nfsd4_compound_state; | 535 | struct nfsd4_compound_state; |
520 | struct nfsd_net; | 536 | struct nfsd_net; |
521 | 537 | ||
@@ -530,12 +546,12 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, | |||
530 | extern __be32 nfs4_check_open_reclaim(clientid_t *clid, | 546 | extern __be32 nfs4_check_open_reclaim(clientid_t *clid, |
531 | struct nfsd4_compound_state *cstate, struct nfsd_net *nn); | 547 | struct nfsd4_compound_state *cstate, struct nfsd_net *nn); |
532 | extern int set_callback_cred(void); | 548 | extern int set_callback_cred(void); |
533 | void nfsd4_run_cb_null(struct work_struct *w); | ||
534 | void nfsd4_run_cb_recall(struct work_struct *w); | ||
535 | extern void nfsd4_probe_callback(struct nfs4_client *clp); | 549 | extern void nfsd4_probe_callback(struct nfs4_client *clp); |
536 | extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); | 550 | extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); |
537 | extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); | 551 | extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); |
538 | extern void nfsd4_cb_recall(struct nfs4_delegation *dp); | 552 | extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, |
553 | struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); | ||
554 | extern void nfsd4_run_cb(struct nfsd4_callback *cb); | ||
539 | extern int nfsd4_create_callback_queue(void); | 555 | extern int nfsd4_create_callback_queue(void); |
540 | extern void nfsd4_destroy_callback_queue(void); | 556 | extern void nfsd4_destroy_callback_queue(void); |
541 | extern void nfsd4_shutdown_callback(struct nfs4_client *); | 557 | extern void nfsd4_shutdown_callback(struct nfs4_client *); |
@@ -544,13 +560,16 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, | |||
544 | struct nfsd_net *nn); | 560 | struct nfsd_net *nn); |
545 | extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); | 561 | extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); |
546 | 562 | ||
563 | /* grace period management */ | ||
564 | void nfsd4_end_grace(struct nfsd_net *nn); | ||
565 | |||
547 | /* nfs4recover operations */ | 566 | /* nfs4recover operations */ |
548 | extern int nfsd4_client_tracking_init(struct net *net); | 567 | extern int nfsd4_client_tracking_init(struct net *net); |
549 | extern void nfsd4_client_tracking_exit(struct net *net); | 568 | extern void nfsd4_client_tracking_exit(struct net *net); |
550 | extern void nfsd4_client_record_create(struct nfs4_client *clp); | 569 | extern void nfsd4_client_record_create(struct nfs4_client *clp); |
551 | extern void nfsd4_client_record_remove(struct nfs4_client *clp); | 570 | extern void nfsd4_client_record_remove(struct nfs4_client *clp); |
552 | extern int nfsd4_client_record_check(struct nfs4_client *clp); | 571 | extern int nfsd4_client_record_check(struct nfs4_client *clp); |
553 | extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); | 572 | extern void nfsd4_record_grace_done(struct nfsd_net *nn); |
554 | 573 | ||
555 | /* nfs fault injection functions */ | 574 | /* nfs fault injection functions */ |
556 | #ifdef CONFIG_NFSD_FAULT_INJECTION | 575 | #ifdef CONFIG_NFSD_FAULT_INJECTION |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f501a9b5c9df..965cffd17a0c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -445,6 +445,16 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
445 | if (err) | 445 | if (err) |
446 | goto out; | 446 | goto out; |
447 | size_change = 1; | 447 | size_change = 1; |
448 | |||
449 | /* | ||
450 | * RFC5661, Section 18.30.4: | ||
451 | * Changing the size of a file with SETATTR indirectly | ||
452 | * changes the time_modify and change attributes. | ||
453 | * | ||
454 | * (and similar for the older RFCs) | ||
455 | */ | ||
456 | if (iap->ia_size != i_size_read(inode)) | ||
457 | iap->ia_valid |= ATTR_MTIME; | ||
448 | } | 458 | } |
449 | 459 | ||
450 | iap->ia_valid |= ATTR_CTIME; | 460 | iap->ia_valid |= ATTR_CTIME; |
@@ -649,6 +659,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, | |||
649 | { | 659 | { |
650 | struct path path; | 660 | struct path path; |
651 | struct inode *inode; | 661 | struct inode *inode; |
662 | struct file *file; | ||
652 | int flags = O_RDONLY|O_LARGEFILE; | 663 | int flags = O_RDONLY|O_LARGEFILE; |
653 | __be32 err; | 664 | __be32 err; |
654 | int host_err = 0; | 665 | int host_err = 0; |
@@ -703,19 +714,25 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, | |||
703 | else | 714 | else |
704 | flags = O_WRONLY|O_LARGEFILE; | 715 | flags = O_WRONLY|O_LARGEFILE; |
705 | } | 716 | } |
706 | *filp = dentry_open(&path, flags, current_cred()); | ||
707 | if (IS_ERR(*filp)) { | ||
708 | host_err = PTR_ERR(*filp); | ||
709 | *filp = NULL; | ||
710 | } else { | ||
711 | host_err = ima_file_check(*filp, may_flags); | ||
712 | 717 | ||
713 | if (may_flags & NFSD_MAY_64BIT_COOKIE) | 718 | file = dentry_open(&path, flags, current_cred()); |
714 | (*filp)->f_mode |= FMODE_64BITHASH; | 719 | if (IS_ERR(file)) { |
715 | else | 720 | host_err = PTR_ERR(file); |
716 | (*filp)->f_mode |= FMODE_32BITHASH; | 721 | goto out_nfserr; |
717 | } | 722 | } |
718 | 723 | ||
724 | host_err = ima_file_check(file, may_flags); | ||
725 | if (host_err) { | ||
726 | nfsd_close(file); | ||
727 | goto out_nfserr; | ||
728 | } | ||
729 | |||
730 | if (may_flags & NFSD_MAY_64BIT_COOKIE) | ||
731 | file->f_mode |= FMODE_64BITHASH; | ||
732 | else | ||
733 | file->f_mode |= FMODE_32BITHASH; | ||
734 | |||
735 | *filp = file; | ||
719 | out_nfserr: | 736 | out_nfserr: |
720 | err = nfserrno(host_err); | 737 | err = nfserrno(host_err); |
721 | out: | 738 | out: |
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 465e7799742a..5720e9457f33 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
@@ -428,6 +428,17 @@ struct nfsd4_reclaim_complete { | |||
428 | u32 rca_one_fs; | 428 | u32 rca_one_fs; |
429 | }; | 429 | }; |
430 | 430 | ||
431 | struct nfsd4_seek { | ||
432 | /* request */ | ||
433 | stateid_t seek_stateid; | ||
434 | loff_t seek_offset; | ||
435 | u32 seek_whence; | ||
436 | |||
437 | /* response */ | ||
438 | u32 seek_eof; | ||
439 | loff_t seek_pos; | ||
440 | }; | ||
441 | |||
431 | struct nfsd4_op { | 442 | struct nfsd4_op { |
432 | int opnum; | 443 | int opnum; |
433 | __be32 status; | 444 | __be32 status; |
@@ -473,6 +484,9 @@ struct nfsd4_op { | |||
473 | struct nfsd4_reclaim_complete reclaim_complete; | 484 | struct nfsd4_reclaim_complete reclaim_complete; |
474 | struct nfsd4_test_stateid test_stateid; | 485 | struct nfsd4_test_stateid test_stateid; |
475 | struct nfsd4_free_stateid free_stateid; | 486 | struct nfsd4_free_stateid free_stateid; |
487 | |||
488 | /* NFSv4.2 */ | ||
489 | struct nfsd4_seek seek; | ||
476 | } u; | 490 | } u; |
477 | struct nfs4_replay * replay; | 491 | struct nfs4_replay * replay; |
478 | }; | 492 | }; |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 6252b173a465..d071e7f23de2 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/buffer_head.h> | 24 | #include <linux/buffer_head.h> |
25 | #include <linux/gfp.h> | 25 | #include <linux/gfp.h> |
26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
27 | #include <linux/pagemap.h> | ||
27 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
28 | #include <linux/aio.h> | 29 | #include <linux/aio.h> |
29 | #include "nilfs.h" | 30 | #include "nilfs.h" |
@@ -219,10 +220,10 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) | |||
219 | 220 | ||
220 | static int nilfs_set_page_dirty(struct page *page) | 221 | static int nilfs_set_page_dirty(struct page *page) |
221 | { | 222 | { |
223 | struct inode *inode = page->mapping->host; | ||
222 | int ret = __set_page_dirty_nobuffers(page); | 224 | int ret = __set_page_dirty_nobuffers(page); |
223 | 225 | ||
224 | if (page_has_buffers(page)) { | 226 | if (page_has_buffers(page)) { |
225 | struct inode *inode = page->mapping->host; | ||
226 | unsigned nr_dirty = 0; | 227 | unsigned nr_dirty = 0; |
227 | struct buffer_head *bh, *head; | 228 | struct buffer_head *bh, *head; |
228 | 229 | ||
@@ -245,6 +246,10 @@ static int nilfs_set_page_dirty(struct page *page) | |||
245 | 246 | ||
246 | if (nr_dirty) | 247 | if (nr_dirty) |
247 | nilfs_set_file_dirty(inode, nr_dirty); | 248 | nilfs_set_file_dirty(inode, nr_dirty); |
249 | } else if (ret) { | ||
250 | unsigned nr_dirty = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
251 | |||
252 | nilfs_set_file_dirty(inode, nr_dirty); | ||
248 | } | 253 | } |
249 | return ret; | 254 | return ret; |
250 | } | 255 | } |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b13992a41bd9..c991616acca9 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -78,7 +78,7 @@ static int create_fd(struct fsnotify_group *group, | |||
78 | 78 | ||
79 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | 79 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); |
80 | 80 | ||
81 | client_fd = get_unused_fd(); | 81 | client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); |
82 | if (client_fd < 0) | 82 | if (client_fd < 0) |
83 | return client_fd; | 83 | return client_fd; |
84 | 84 | ||
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 238a5930cb3c..9d7e2b9659cb 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c | |||
@@ -42,7 +42,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode) | |||
42 | { | 42 | { |
43 | struct { | 43 | struct { |
44 | struct file_handle handle; | 44 | struct file_handle handle; |
45 | u8 pad[64]; | 45 | u8 pad[MAX_HANDLE_SZ]; |
46 | } f; | 46 | } f; |
47 | int size, ret, i; | 47 | int size, ret, i; |
48 | 48 | ||
@@ -50,7 +50,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode) | |||
50 | size = f.handle.handle_bytes >> 2; | 50 | size = f.handle.handle_bytes >> 2; |
51 | 51 | ||
52 | ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0); | 52 | ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0); |
53 | if ((ret == 255) || (ret == -ENOSPC)) { | 53 | if ((ret == FILEID_INVALID) || (ret < 0)) { |
54 | WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); | 54 | WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); |
55 | return 0; | 55 | return 0; |
56 | } | 56 | } |
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 85e7d2b431d9..9c0898c4cfe1 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h | |||
@@ -23,9 +23,6 @@ extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, | |||
23 | struct fsnotify_group *group, struct vfsmount *mnt, | 23 | struct fsnotify_group *group, struct vfsmount *mnt, |
24 | int allow_dups); | 24 | int allow_dups); |
25 | 25 | ||
26 | /* final kfree of a group */ | ||
27 | extern void fsnotify_final_destroy_group(struct fsnotify_group *group); | ||
28 | |||
29 | /* vfsmount specific destruction of a mark */ | 26 | /* vfsmount specific destruction of a mark */ |
30 | extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark); | 27 | extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark); |
31 | /* inode specific destruction of a mark */ | 28 | /* inode specific destruction of a mark */ |
diff --git a/fs/notify/group.c b/fs/notify/group.c index ad1995980456..d16b62cb2854 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c | |||
@@ -31,7 +31,7 @@ | |||
31 | /* | 31 | /* |
32 | * Final freeing of a group | 32 | * Final freeing of a group |
33 | */ | 33 | */ |
34 | void fsnotify_final_destroy_group(struct fsnotify_group *group) | 34 | static void fsnotify_final_destroy_group(struct fsnotify_group *group) |
35 | { | 35 | { |
36 | if (group->ops->free_group_priv) | 36 | if (group->ops->free_group_priv) |
37 | group->ops->free_group_priv(group); | 37 | group->ops->free_group_priv(group); |
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 0f88bc0b4e6c..7d888d77d59a 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c | |||
@@ -165,8 +165,10 @@ static void inotify_free_group_priv(struct fsnotify_group *group) | |||
165 | /* ideally the idr is empty and we won't hit the BUG in the callback */ | 165 | /* ideally the idr is empty and we won't hit the BUG in the callback */ |
166 | idr_for_each(&group->inotify_data.idr, idr_callback, group); | 166 | idr_for_each(&group->inotify_data.idr, idr_callback, group); |
167 | idr_destroy(&group->inotify_data.idr); | 167 | idr_destroy(&group->inotify_data.idr); |
168 | atomic_dec(&group->inotify_data.user->inotify_devs); | 168 | if (group->inotify_data.user) { |
169 | free_uid(group->inotify_data.user); | 169 | atomic_dec(&group->inotify_data.user->inotify_devs); |
170 | free_uid(group->inotify_data.user); | ||
171 | } | ||
170 | } | 172 | } |
171 | 173 | ||
172 | static void inotify_free_event(struct fsnotify_event *fsn_event) | 174 | static void inotify_free_event(struct fsnotify_event *fsn_event) |
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c index dd6103cc93c1..825a54e8f490 100644 --- a/fs/ntfs/debug.c +++ b/fs/ntfs/debug.c | |||
@@ -112,7 +112,7 @@ void __ntfs_error(const char *function, const struct super_block *sb, | |||
112 | /* If 1, output debug messages, and if 0, don't. */ | 112 | /* If 1, output debug messages, and if 0, don't. */ |
113 | int debug_msgs = 0; | 113 | int debug_msgs = 0; |
114 | 114 | ||
115 | void __ntfs_debug (const char *file, int line, const char *function, | 115 | void __ntfs_debug(const char *file, int line, const char *function, |
116 | const char *fmt, ...) | 116 | const char *fmt, ...) |
117 | { | 117 | { |
118 | struct va_format vaf; | 118 | struct va_format vaf; |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index f5ec1ce7a532..643faa44f22b 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. | 2 | * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. |
3 | * | 3 | * |
4 | * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. | 4 | * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. |
5 | * | 5 | * |
6 | * This program/include file is free software; you can redistribute it and/or | 6 | * This program/include file is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public License as published | 7 | * modify it under the terms of the GNU General Public License as published |
@@ -410,7 +410,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, | |||
410 | BUG_ON(!nr_pages); | 410 | BUG_ON(!nr_pages); |
411 | err = nr = 0; | 411 | err = nr = 0; |
412 | do { | 412 | do { |
413 | pages[nr] = find_lock_page(mapping, index); | 413 | pages[nr] = find_get_page_flags(mapping, index, FGP_LOCK | |
414 | FGP_ACCESSED); | ||
414 | if (!pages[nr]) { | 415 | if (!pages[nr]) { |
415 | if (!*cached_page) { | 416 | if (!*cached_page) { |
416 | *cached_page = page_cache_alloc(mapping); | 417 | *cached_page = page_cache_alloc(mapping); |
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 6c3296e546c3..9e1e112074fb 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -3208,7 +3208,7 @@ static void __exit exit_ntfs_fs(void) | |||
3208 | } | 3208 | } |
3209 | 3209 | ||
3210 | MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>"); | 3210 | MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>"); |
3211 | MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc."); | 3211 | MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc."); |
3212 | MODULE_VERSION(NTFS_VERSION); | 3212 | MODULE_VERSION(NTFS_VERSION); |
3213 | MODULE_LICENSE("GPL"); | 3213 | MODULE_LICENSE("GPL"); |
3214 | #ifdef DEBUG | 3214 | #ifdef DEBUG |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 4a231a166cf8..1ef547e49373 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -1481,8 +1481,16 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, | |||
1481 | handle_t *handle; | 1481 | handle_t *handle; |
1482 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; | 1482 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; |
1483 | 1483 | ||
1484 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
1485 | if (IS_ERR(handle)) { | ||
1486 | ret = PTR_ERR(handle); | ||
1487 | mlog_errno(ret); | ||
1488 | goto out; | ||
1489 | } | ||
1490 | |||
1484 | page = find_or_create_page(mapping, 0, GFP_NOFS); | 1491 | page = find_or_create_page(mapping, 0, GFP_NOFS); |
1485 | if (!page) { | 1492 | if (!page) { |
1493 | ocfs2_commit_trans(osb, handle); | ||
1486 | ret = -ENOMEM; | 1494 | ret = -ENOMEM; |
1487 | mlog_errno(ret); | 1495 | mlog_errno(ret); |
1488 | goto out; | 1496 | goto out; |
@@ -1494,13 +1502,6 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, | |||
1494 | wc->w_pages[0] = wc->w_target_page = page; | 1502 | wc->w_pages[0] = wc->w_target_page = page; |
1495 | wc->w_num_pages = 1; | 1503 | wc->w_num_pages = 1; |
1496 | 1504 | ||
1497 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
1498 | if (IS_ERR(handle)) { | ||
1499 | ret = PTR_ERR(handle); | ||
1500 | mlog_errno(ret); | ||
1501 | goto out; | ||
1502 | } | ||
1503 | |||
1504 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, | 1505 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, |
1505 | OCFS2_JOURNAL_ACCESS_WRITE); | 1506 | OCFS2_JOURNAL_ACCESS_WRITE); |
1506 | if (ret) { | 1507 | if (ret) { |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 73039295d0d1..d13385448168 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -2572,6 +2572,25 @@ int o2hb_check_node_heartbeating(u8 node_num) | |||
2572 | } | 2572 | } |
2573 | EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating); | 2573 | EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating); |
2574 | 2574 | ||
2575 | int o2hb_check_node_heartbeating_no_sem(u8 node_num) | ||
2576 | { | ||
2577 | unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
2578 | unsigned long flags; | ||
2579 | |||
2580 | spin_lock_irqsave(&o2hb_live_lock, flags); | ||
2581 | o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map)); | ||
2582 | spin_unlock_irqrestore(&o2hb_live_lock, flags); | ||
2583 | if (!test_bit(node_num, testing_map)) { | ||
2584 | mlog(ML_HEARTBEAT, | ||
2585 | "node (%u) does not have heartbeating enabled.\n", | ||
2586 | node_num); | ||
2587 | return 0; | ||
2588 | } | ||
2589 | |||
2590 | return 1; | ||
2591 | } | ||
2592 | EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating_no_sem); | ||
2593 | |||
2575 | int o2hb_check_node_heartbeating_from_callback(u8 node_num) | 2594 | int o2hb_check_node_heartbeating_from_callback(u8 node_num) |
2576 | { | 2595 | { |
2577 | unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 2596 | unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 00ad8e8fea51..3ef5137dc362 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
@@ -80,6 +80,7 @@ void o2hb_fill_node_map(unsigned long *map, | |||
80 | void o2hb_exit(void); | 80 | void o2hb_exit(void); |
81 | int o2hb_init(void); | 81 | int o2hb_init(void); |
82 | int o2hb_check_node_heartbeating(u8 node_num); | 82 | int o2hb_check_node_heartbeating(u8 node_num); |
83 | int o2hb_check_node_heartbeating_no_sem(u8 node_num); | ||
83 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); | 84 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); |
84 | int o2hb_check_local_node_heartbeating(void); | 85 | int o2hb_check_local_node_heartbeating(void); |
85 | void o2hb_stop_all_regions(void); | 86 | void o2hb_stop_all_regions(void); |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 73ba81928bce..27d1242c8383 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c | |||
@@ -185,29 +185,13 @@ static const struct seq_operations nst_seq_ops = { | |||
185 | static int nst_fop_open(struct inode *inode, struct file *file) | 185 | static int nst_fop_open(struct inode *inode, struct file *file) |
186 | { | 186 | { |
187 | struct o2net_send_tracking *dummy_nst; | 187 | struct o2net_send_tracking *dummy_nst; |
188 | struct seq_file *seq; | ||
189 | int ret; | ||
190 | 188 | ||
191 | dummy_nst = kmalloc(sizeof(struct o2net_send_tracking), GFP_KERNEL); | 189 | dummy_nst = __seq_open_private(file, &nst_seq_ops, sizeof(*dummy_nst)); |
192 | if (dummy_nst == NULL) { | 190 | if (!dummy_nst) |
193 | ret = -ENOMEM; | 191 | return -ENOMEM; |
194 | goto out; | ||
195 | } | ||
196 | dummy_nst->st_task = NULL; | ||
197 | |||
198 | ret = seq_open(file, &nst_seq_ops); | ||
199 | if (ret) | ||
200 | goto out; | ||
201 | |||
202 | seq = file->private_data; | ||
203 | seq->private = dummy_nst; | ||
204 | o2net_debug_add_nst(dummy_nst); | 192 | o2net_debug_add_nst(dummy_nst); |
205 | 193 | ||
206 | dummy_nst = NULL; | 194 | return 0; |
207 | |||
208 | out: | ||
209 | kfree(dummy_nst); | ||
210 | return ret; | ||
211 | } | 195 | } |
212 | 196 | ||
213 | static int nst_fop_release(struct inode *inode, struct file *file) | 197 | static int nst_fop_release(struct inode *inode, struct file *file) |
@@ -412,33 +396,27 @@ static const struct seq_operations sc_seq_ops = { | |||
412 | .show = sc_seq_show, | 396 | .show = sc_seq_show, |
413 | }; | 397 | }; |
414 | 398 | ||
415 | static int sc_common_open(struct file *file, struct o2net_sock_debug *sd) | 399 | static int sc_common_open(struct file *file, int ctxt) |
416 | { | 400 | { |
401 | struct o2net_sock_debug *sd; | ||
417 | struct o2net_sock_container *dummy_sc; | 402 | struct o2net_sock_container *dummy_sc; |
418 | struct seq_file *seq; | ||
419 | int ret; | ||
420 | 403 | ||
421 | dummy_sc = kmalloc(sizeof(struct o2net_sock_container), GFP_KERNEL); | 404 | dummy_sc = kzalloc(sizeof(*dummy_sc), GFP_KERNEL); |
422 | if (dummy_sc == NULL) { | 405 | if (!dummy_sc) |
423 | ret = -ENOMEM; | 406 | return -ENOMEM; |
424 | goto out; | ||
425 | } | ||
426 | dummy_sc->sc_page = NULL; | ||
427 | 407 | ||
428 | ret = seq_open(file, &sc_seq_ops); | 408 | sd = __seq_open_private(file, &sc_seq_ops, sizeof(*sd)); |
429 | if (ret) | 409 | if (!sd) { |
430 | goto out; | 410 | kfree(dummy_sc); |
411 | return -ENOMEM; | ||
412 | } | ||
431 | 413 | ||
432 | seq = file->private_data; | 414 | sd->dbg_ctxt = ctxt; |
433 | seq->private = sd; | ||
434 | sd->dbg_sock = dummy_sc; | 415 | sd->dbg_sock = dummy_sc; |
435 | o2net_debug_add_sc(dummy_sc); | ||
436 | 416 | ||
437 | dummy_sc = NULL; | 417 | o2net_debug_add_sc(dummy_sc); |
438 | 418 | ||
439 | out: | 419 | return 0; |
440 | kfree(dummy_sc); | ||
441 | return ret; | ||
442 | } | 420 | } |
443 | 421 | ||
444 | static int sc_fop_release(struct inode *inode, struct file *file) | 422 | static int sc_fop_release(struct inode *inode, struct file *file) |
@@ -453,16 +431,7 @@ static int sc_fop_release(struct inode *inode, struct file *file) | |||
453 | 431 | ||
454 | static int stats_fop_open(struct inode *inode, struct file *file) | 432 | static int stats_fop_open(struct inode *inode, struct file *file) |
455 | { | 433 | { |
456 | struct o2net_sock_debug *sd; | 434 | return sc_common_open(file, SHOW_SOCK_STATS); |
457 | |||
458 | sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL); | ||
459 | if (sd == NULL) | ||
460 | return -ENOMEM; | ||
461 | |||
462 | sd->dbg_ctxt = SHOW_SOCK_STATS; | ||
463 | sd->dbg_sock = NULL; | ||
464 | |||
465 | return sc_common_open(file, sd); | ||
466 | } | 435 | } |
467 | 436 | ||
468 | static const struct file_operations stats_seq_fops = { | 437 | static const struct file_operations stats_seq_fops = { |
@@ -474,16 +443,7 @@ static const struct file_operations stats_seq_fops = { | |||
474 | 443 | ||
475 | static int sc_fop_open(struct inode *inode, struct file *file) | 444 | static int sc_fop_open(struct inode *inode, struct file *file) |
476 | { | 445 | { |
477 | struct o2net_sock_debug *sd; | 446 | return sc_common_open(file, SHOW_SOCK_CONTAINERS); |
478 | |||
479 | sd = kmalloc(sizeof(struct o2net_sock_debug), GFP_KERNEL); | ||
480 | if (sd == NULL) | ||
481 | return -ENOMEM; | ||
482 | |||
483 | sd->dbg_ctxt = SHOW_SOCK_CONTAINERS; | ||
484 | sd->dbg_sock = NULL; | ||
485 | |||
486 | return sc_common_open(file, sd); | ||
487 | } | 447 | } |
488 | 448 | ||
489 | static const struct file_operations sc_seq_fops = { | 449 | static const struct file_operations sc_seq_fops = { |
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index 1ec141e758d7..62e8ec619b4c 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c | |||
@@ -160,9 +160,18 @@ static void o2quo_make_decision(struct work_struct *work) | |||
160 | } | 160 | } |
161 | 161 | ||
162 | out: | 162 | out: |
163 | spin_unlock(&qs->qs_lock); | 163 | if (fence) { |
164 | if (fence) | 164 | spin_unlock(&qs->qs_lock); |
165 | o2quo_fence_self(); | 165 | o2quo_fence_self(); |
166 | } else { | ||
167 | mlog(ML_NOTICE, "not fencing this node, heartbeating: %d, " | ||
168 | "connected: %d, lowest: %d (%sreachable)\n", | ||
169 | qs->qs_heartbeating, qs->qs_connected, lowest_hb, | ||
170 | lowest_reachable ? "" : "un"); | ||
171 | spin_unlock(&qs->qs_lock); | ||
172 | |||
173 | } | ||
174 | |||
166 | } | 175 | } |
167 | 176 | ||
168 | static void o2quo_set_hold(struct o2quo_state *qs, u8 node) | 177 | static void o2quo_set_hold(struct o2quo_state *qs, u8 node) |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 681691bc233a..97de0fbd9f78 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -536,7 +536,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
536 | if (nn->nn_persistent_error || nn->nn_sc_valid) | 536 | if (nn->nn_persistent_error || nn->nn_sc_valid) |
537 | wake_up(&nn->nn_sc_wq); | 537 | wake_up(&nn->nn_sc_wq); |
538 | 538 | ||
539 | if (!was_err && nn->nn_persistent_error) { | 539 | if (was_valid && !was_err && nn->nn_persistent_error) { |
540 | o2quo_conn_err(o2net_num_from_nn(nn)); | 540 | o2quo_conn_err(o2net_num_from_nn(nn)); |
541 | queue_delayed_work(o2net_wq, &nn->nn_still_up, | 541 | queue_delayed_work(o2net_wq, &nn->nn_still_up, |
542 | msecs_to_jiffies(O2NET_QUORUM_DELAY_MS)); | 542 | msecs_to_jiffies(O2NET_QUORUM_DELAY_MS)); |
@@ -1480,6 +1480,14 @@ static int o2net_set_nodelay(struct socket *sock) | |||
1480 | return ret; | 1480 | return ret; |
1481 | } | 1481 | } |
1482 | 1482 | ||
1483 | static int o2net_set_usertimeout(struct socket *sock) | ||
1484 | { | ||
1485 | int user_timeout = O2NET_TCP_USER_TIMEOUT; | ||
1486 | |||
1487 | return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, | ||
1488 | (char *)&user_timeout, sizeof(user_timeout)); | ||
1489 | } | ||
1490 | |||
1483 | static void o2net_initialize_handshake(void) | 1491 | static void o2net_initialize_handshake(void) |
1484 | { | 1492 | { |
1485 | o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( | 1493 | o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( |
@@ -1536,16 +1544,20 @@ static void o2net_idle_timer(unsigned long data) | |||
1536 | #endif | 1544 | #endif |
1537 | 1545 | ||
1538 | printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been " | 1546 | printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been " |
1539 | "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc), | 1547 | "idle for %lu.%lu secs.\n", |
1540 | msecs / 1000, msecs % 1000); | 1548 | SC_NODEF_ARGS(sc), msecs / 1000, msecs % 1000); |
1541 | 1549 | ||
1542 | /* | 1550 | /* idle timerout happen, don't shutdown the connection, but |
1543 | * Initialize the nn_timeout so that the next connection attempt | 1551 | * make fence decision. Maybe the connection can recover before |
1544 | * will continue in o2net_start_connect. | 1552 | * the decision is made. |
1545 | */ | 1553 | */ |
1546 | atomic_set(&nn->nn_timeout, 1); | 1554 | atomic_set(&nn->nn_timeout, 1); |
1555 | o2quo_conn_err(o2net_num_from_nn(nn)); | ||
1556 | queue_delayed_work(o2net_wq, &nn->nn_still_up, | ||
1557 | msecs_to_jiffies(O2NET_QUORUM_DELAY_MS)); | ||
1558 | |||
1559 | o2net_sc_reset_idle_timer(sc); | ||
1547 | 1560 | ||
1548 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | ||
1549 | } | 1561 | } |
1550 | 1562 | ||
1551 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) | 1563 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) |
@@ -1560,6 +1572,15 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) | |||
1560 | 1572 | ||
1561 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) | 1573 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) |
1562 | { | 1574 | { |
1575 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); | ||
1576 | |||
1577 | /* clear fence decision since the connection recover from timeout*/ | ||
1578 | if (atomic_read(&nn->nn_timeout)) { | ||
1579 | o2quo_conn_up(o2net_num_from_nn(nn)); | ||
1580 | cancel_delayed_work(&nn->nn_still_up); | ||
1581 | atomic_set(&nn->nn_timeout, 0); | ||
1582 | } | ||
1583 | |||
1563 | /* Only push out an existing timer */ | 1584 | /* Only push out an existing timer */ |
1564 | if (timer_pending(&sc->sc_idle_timeout)) | 1585 | if (timer_pending(&sc->sc_idle_timeout)) |
1565 | o2net_sc_reset_idle_timer(sc); | 1586 | o2net_sc_reset_idle_timer(sc); |
@@ -1580,7 +1601,15 @@ static void o2net_start_connect(struct work_struct *work) | |||
1580 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; | 1601 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; |
1581 | int ret = 0, stop; | 1602 | int ret = 0, stop; |
1582 | unsigned int timeout; | 1603 | unsigned int timeout; |
1604 | unsigned int noio_flag; | ||
1583 | 1605 | ||
1606 | /* | ||
1607 | * sock_create allocates the sock with GFP_KERNEL. We must set | ||
1608 | * per-process flag PF_MEMALLOC_NOIO so that all allocations done | ||
1609 | * by this process are done as if GFP_NOIO was specified. So we | ||
1610 | * are not reentering filesystem while doing memory reclaim. | ||
1611 | */ | ||
1612 | noio_flag = memalloc_noio_save(); | ||
1584 | /* if we're greater we initiate tx, otherwise we accept */ | 1613 | /* if we're greater we initiate tx, otherwise we accept */ |
1585 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) | 1614 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) |
1586 | goto out; | 1615 | goto out; |
@@ -1650,6 +1679,12 @@ static void o2net_start_connect(struct work_struct *work) | |||
1650 | goto out; | 1679 | goto out; |
1651 | } | 1680 | } |
1652 | 1681 | ||
1682 | ret = o2net_set_usertimeout(sock); | ||
1683 | if (ret) { | ||
1684 | mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret); | ||
1685 | goto out; | ||
1686 | } | ||
1687 | |||
1653 | o2net_register_callbacks(sc->sc_sock->sk, sc); | 1688 | o2net_register_callbacks(sc->sc_sock->sk, sc); |
1654 | 1689 | ||
1655 | spin_lock(&nn->nn_lock); | 1690 | spin_lock(&nn->nn_lock); |
@@ -1683,6 +1718,7 @@ out: | |||
1683 | if (mynode) | 1718 | if (mynode) |
1684 | o2nm_node_put(mynode); | 1719 | o2nm_node_put(mynode); |
1685 | 1720 | ||
1721 | memalloc_noio_restore(noio_flag); | ||
1686 | return; | 1722 | return; |
1687 | } | 1723 | } |
1688 | 1724 | ||
@@ -1694,7 +1730,8 @@ static void o2net_connect_expired(struct work_struct *work) | |||
1694 | spin_lock(&nn->nn_lock); | 1730 | spin_lock(&nn->nn_lock); |
1695 | if (!nn->nn_sc_valid) { | 1731 | if (!nn->nn_sc_valid) { |
1696 | printk(KERN_NOTICE "o2net: No connection established with " | 1732 | printk(KERN_NOTICE "o2net: No connection established with " |
1697 | "node %u after %u.%u seconds, giving up.\n", | 1733 | "node %u after %u.%u seconds, check network and" |
1734 | " cluster configuration.\n", | ||
1698 | o2net_num_from_nn(nn), | 1735 | o2net_num_from_nn(nn), |
1699 | o2net_idle_timeout() / 1000, | 1736 | o2net_idle_timeout() / 1000, |
1700 | o2net_idle_timeout() % 1000); | 1737 | o2net_idle_timeout() % 1000); |
@@ -1808,6 +1845,15 @@ static int o2net_accept_one(struct socket *sock, int *more) | |||
1808 | struct o2nm_node *local_node = NULL; | 1845 | struct o2nm_node *local_node = NULL; |
1809 | struct o2net_sock_container *sc = NULL; | 1846 | struct o2net_sock_container *sc = NULL; |
1810 | struct o2net_node *nn; | 1847 | struct o2net_node *nn; |
1848 | unsigned int noio_flag; | ||
1849 | |||
1850 | /* | ||
1851 | * sock_create_lite allocates the sock with GFP_KERNEL. We must set | ||
1852 | * per-process flag PF_MEMALLOC_NOIO so that all allocations done | ||
1853 | * by this process are done as if GFP_NOIO was specified. So we | ||
1854 | * are not reentering filesystem while doing memory reclaim. | ||
1855 | */ | ||
1856 | noio_flag = memalloc_noio_save(); | ||
1811 | 1857 | ||
1812 | BUG_ON(sock == NULL); | 1858 | BUG_ON(sock == NULL); |
1813 | *more = 0; | 1859 | *more = 0; |
@@ -1831,6 +1877,12 @@ static int o2net_accept_one(struct socket *sock, int *more) | |||
1831 | goto out; | 1877 | goto out; |
1832 | } | 1878 | } |
1833 | 1879 | ||
1880 | ret = o2net_set_usertimeout(new_sock); | ||
1881 | if (ret) { | ||
1882 | mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret); | ||
1883 | goto out; | ||
1884 | } | ||
1885 | |||
1834 | slen = sizeof(sin); | 1886 | slen = sizeof(sin); |
1835 | ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, | 1887 | ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, |
1836 | &slen, 1); | 1888 | &slen, 1); |
@@ -1918,6 +1970,8 @@ out: | |||
1918 | o2nm_node_put(local_node); | 1970 | o2nm_node_put(local_node); |
1919 | if (sc) | 1971 | if (sc) |
1920 | sc_put(sc); | 1972 | sc_put(sc); |
1973 | |||
1974 | memalloc_noio_restore(noio_flag); | ||
1921 | return ret; | 1975 | return ret; |
1922 | } | 1976 | } |
1923 | 1977 | ||
@@ -2113,17 +2167,13 @@ int o2net_init(void) | |||
2113 | o2quo_init(); | 2167 | o2quo_init(); |
2114 | 2168 | ||
2115 | if (o2net_debugfs_init()) | 2169 | if (o2net_debugfs_init()) |
2116 | return -ENOMEM; | 2170 | goto out; |
2117 | 2171 | ||
2118 | o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); | 2172 | o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); |
2119 | o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | 2173 | o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); |
2120 | o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | 2174 | o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); |
2121 | if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp) { | 2175 | if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp) |
2122 | kfree(o2net_hand); | 2176 | goto out; |
2123 | kfree(o2net_keep_req); | ||
2124 | kfree(o2net_keep_resp); | ||
2125 | return -ENOMEM; | ||
2126 | } | ||
2127 | 2177 | ||
2128 | o2net_hand->protocol_version = cpu_to_be64(O2NET_PROTOCOL_VERSION); | 2178 | o2net_hand->protocol_version = cpu_to_be64(O2NET_PROTOCOL_VERSION); |
2129 | o2net_hand->connector_id = cpu_to_be64(1); | 2179 | o2net_hand->connector_id = cpu_to_be64(1); |
@@ -2148,6 +2198,14 @@ int o2net_init(void) | |||
2148 | } | 2198 | } |
2149 | 2199 | ||
2150 | return 0; | 2200 | return 0; |
2201 | |||
2202 | out: | ||
2203 | kfree(o2net_hand); | ||
2204 | kfree(o2net_keep_req); | ||
2205 | kfree(o2net_keep_resp); | ||
2206 | |||
2207 | o2quo_exit(); | ||
2208 | return -ENOMEM; | ||
2151 | } | 2209 | } |
2152 | 2210 | ||
2153 | void o2net_exit(void) | 2211 | void o2net_exit(void) |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index 5bada2a69b50..c571e849fda4 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
@@ -63,6 +63,7 @@ typedef void (o2net_post_msg_handler_func)(int status, void *data, | |||
63 | #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000 | 63 | #define O2NET_KEEPALIVE_DELAY_MS_DEFAULT 2000 |
64 | #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000 | 64 | #define O2NET_IDLE_TIMEOUT_MS_DEFAULT 30000 |
65 | 65 | ||
66 | #define O2NET_TCP_USER_TIMEOUT 0x7fffffff | ||
66 | 67 | ||
67 | /* TODO: figure this out.... */ | 68 | /* TODO: figure this out.... */ |
68 | static inline int o2net_link_down(int err, struct socket *sock) | 69 | static inline int o2net_link_down(int err, struct socket *sock) |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 18f13c2e4a10..149eb556b8c6 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -647,41 +647,30 @@ static const struct seq_operations debug_lockres_ops = { | |||
647 | static int debug_lockres_open(struct inode *inode, struct file *file) | 647 | static int debug_lockres_open(struct inode *inode, struct file *file) |
648 | { | 648 | { |
649 | struct dlm_ctxt *dlm = inode->i_private; | 649 | struct dlm_ctxt *dlm = inode->i_private; |
650 | int ret = -ENOMEM; | 650 | struct debug_lockres *dl; |
651 | struct seq_file *seq; | 651 | void *buf; |
652 | struct debug_lockres *dl = NULL; | ||
653 | 652 | ||
654 | dl = kzalloc(sizeof(struct debug_lockres), GFP_KERNEL); | 653 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
655 | if (!dl) { | 654 | if (!buf) |
656 | mlog_errno(ret); | ||
657 | goto bail; | 655 | goto bail; |
658 | } | ||
659 | 656 | ||
660 | dl->dl_len = PAGE_SIZE; | 657 | dl = __seq_open_private(file, &debug_lockres_ops, sizeof(*dl)); |
661 | dl->dl_buf = kmalloc(dl->dl_len, GFP_KERNEL); | 658 | if (!dl) |
662 | if (!dl->dl_buf) { | 659 | goto bailfree; |
663 | mlog_errno(ret); | ||
664 | goto bail; | ||
665 | } | ||
666 | 660 | ||
667 | ret = seq_open(file, &debug_lockres_ops); | 661 | dl->dl_len = PAGE_SIZE; |
668 | if (ret) { | 662 | dl->dl_buf = buf; |
669 | mlog_errno(ret); | ||
670 | goto bail; | ||
671 | } | ||
672 | |||
673 | seq = file->private_data; | ||
674 | seq->private = dl; | ||
675 | 663 | ||
676 | dlm_grab(dlm); | 664 | dlm_grab(dlm); |
677 | dl->dl_ctxt = dlm; | 665 | dl->dl_ctxt = dlm; |
678 | 666 | ||
679 | return 0; | 667 | return 0; |
668 | |||
669 | bailfree: | ||
670 | kfree(buf); | ||
680 | bail: | 671 | bail: |
681 | if (dl) | 672 | mlog_errno(-ENOMEM); |
682 | kfree(dl->dl_buf); | 673 | return -ENOMEM; |
683 | kfree(dl); | ||
684 | return ret; | ||
685 | } | 674 | } |
686 | 675 | ||
687 | static int debug_lockres_release(struct inode *inode, struct file *file) | 676 | static int debug_lockres_release(struct inode *inode, struct file *file) |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 3fcf205ee900..02d315fef432 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -839,7 +839,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, | |||
839 | * to back off and try again. This gives heartbeat a chance | 839 | * to back off and try again. This gives heartbeat a chance |
840 | * to catch up. | 840 | * to catch up. |
841 | */ | 841 | */ |
842 | if (!o2hb_check_node_heartbeating(query->node_idx)) { | 842 | if (!o2hb_check_node_heartbeating_no_sem(query->node_idx)) { |
843 | mlog(0, "node %u is not in our live map yet\n", | 843 | mlog(0, "node %u is not in our live map yet\n", |
844 | query->node_idx); | 844 | query->node_idx); |
845 | 845 | ||
@@ -1975,24 +1975,22 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
1975 | 1975 | ||
1976 | dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); | 1976 | dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); |
1977 | if (!dlm) { | 1977 | if (!dlm) { |
1978 | mlog_errno(-ENOMEM); | 1978 | ret = -ENOMEM; |
1979 | mlog_errno(ret); | ||
1979 | goto leave; | 1980 | goto leave; |
1980 | } | 1981 | } |
1981 | 1982 | ||
1982 | dlm->name = kstrdup(domain, GFP_KERNEL); | 1983 | dlm->name = kstrdup(domain, GFP_KERNEL); |
1983 | if (dlm->name == NULL) { | 1984 | if (dlm->name == NULL) { |
1984 | mlog_errno(-ENOMEM); | 1985 | ret = -ENOMEM; |
1985 | kfree(dlm); | 1986 | mlog_errno(ret); |
1986 | dlm = NULL; | ||
1987 | goto leave; | 1987 | goto leave; |
1988 | } | 1988 | } |
1989 | 1989 | ||
1990 | dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES); | 1990 | dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES); |
1991 | if (!dlm->lockres_hash) { | 1991 | if (!dlm->lockres_hash) { |
1992 | mlog_errno(-ENOMEM); | 1992 | ret = -ENOMEM; |
1993 | kfree(dlm->name); | 1993 | mlog_errno(ret); |
1994 | kfree(dlm); | ||
1995 | dlm = NULL; | ||
1996 | goto leave; | 1994 | goto leave; |
1997 | } | 1995 | } |
1998 | 1996 | ||
@@ -2002,11 +2000,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
2002 | dlm->master_hash = (struct hlist_head **) | 2000 | dlm->master_hash = (struct hlist_head **) |
2003 | dlm_alloc_pagevec(DLM_HASH_PAGES); | 2001 | dlm_alloc_pagevec(DLM_HASH_PAGES); |
2004 | if (!dlm->master_hash) { | 2002 | if (!dlm->master_hash) { |
2005 | mlog_errno(-ENOMEM); | 2003 | ret = -ENOMEM; |
2006 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | 2004 | mlog_errno(ret); |
2007 | kfree(dlm->name); | ||
2008 | kfree(dlm); | ||
2009 | dlm = NULL; | ||
2010 | goto leave; | 2005 | goto leave; |
2011 | } | 2006 | } |
2012 | 2007 | ||
@@ -2017,14 +2012,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
2017 | dlm->node_num = o2nm_this_node(); | 2012 | dlm->node_num = o2nm_this_node(); |
2018 | 2013 | ||
2019 | ret = dlm_create_debugfs_subroot(dlm); | 2014 | ret = dlm_create_debugfs_subroot(dlm); |
2020 | if (ret < 0) { | 2015 | if (ret < 0) |
2021 | dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); | ||
2022 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | ||
2023 | kfree(dlm->name); | ||
2024 | kfree(dlm); | ||
2025 | dlm = NULL; | ||
2026 | goto leave; | 2016 | goto leave; |
2027 | } | ||
2028 | 2017 | ||
2029 | spin_lock_init(&dlm->spinlock); | 2018 | spin_lock_init(&dlm->spinlock); |
2030 | spin_lock_init(&dlm->master_lock); | 2019 | spin_lock_init(&dlm->master_lock); |
@@ -2085,6 +2074,19 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
2085 | atomic_read(&dlm->dlm_refs.refcount)); | 2074 | atomic_read(&dlm->dlm_refs.refcount)); |
2086 | 2075 | ||
2087 | leave: | 2076 | leave: |
2077 | if (ret < 0 && dlm) { | ||
2078 | if (dlm->master_hash) | ||
2079 | dlm_free_pagevec((void **)dlm->master_hash, | ||
2080 | DLM_HASH_PAGES); | ||
2081 | |||
2082 | if (dlm->lockres_hash) | ||
2083 | dlm_free_pagevec((void **)dlm->lockres_hash, | ||
2084 | DLM_HASH_PAGES); | ||
2085 | |||
2086 | kfree(dlm->name); | ||
2087 | kfree(dlm); | ||
2088 | dlm = NULL; | ||
2089 | } | ||
2088 | return dlm; | 2090 | return dlm; |
2089 | } | 2091 | } |
2090 | 2092 | ||
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3ec906ef5d9a..215e41abf101 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -625,9 +625,6 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | |||
625 | return res; | 625 | return res; |
626 | 626 | ||
627 | error: | 627 | error: |
628 | if (res && res->lockname.name) | ||
629 | kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); | ||
630 | |||
631 | if (res) | 628 | if (res) |
632 | kmem_cache_free(dlm_lockres_cache, res); | 629 | kmem_cache_free(dlm_lockres_cache, res); |
633 | return NULL; | 630 | return NULL; |
@@ -655,12 +652,9 @@ void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm, | |||
655 | clear_bit(bit, res->refmap); | 652 | clear_bit(bit, res->refmap); |
656 | } | 653 | } |
657 | 654 | ||
658 | 655 | static void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | |
659 | void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | ||
660 | struct dlm_lock_resource *res) | 656 | struct dlm_lock_resource *res) |
661 | { | 657 | { |
662 | assert_spin_locked(&res->spinlock); | ||
663 | |||
664 | res->inflight_locks++; | 658 | res->inflight_locks++; |
665 | 659 | ||
666 | mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name, | 660 | mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name, |
@@ -668,6 +662,13 @@ void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | |||
668 | __builtin_return_address(0)); | 662 | __builtin_return_address(0)); |
669 | } | 663 | } |
670 | 664 | ||
665 | void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | ||
666 | struct dlm_lock_resource *res) | ||
667 | { | ||
668 | assert_spin_locked(&res->spinlock); | ||
669 | __dlm_lockres_grab_inflight_ref(dlm, res); | ||
670 | } | ||
671 | |||
671 | void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | 672 | void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, |
672 | struct dlm_lock_resource *res) | 673 | struct dlm_lock_resource *res) |
673 | { | 674 | { |
@@ -894,10 +895,8 @@ lookup: | |||
894 | /* finally add the lockres to its hash bucket */ | 895 | /* finally add the lockres to its hash bucket */ |
895 | __dlm_insert_lockres(dlm, res); | 896 | __dlm_insert_lockres(dlm, res); |
896 | 897 | ||
897 | /* Grab inflight ref to pin the resource */ | 898 | /* since this lockres is new it doesn't not require the spinlock */ |
898 | spin_lock(&res->spinlock); | 899 | __dlm_lockres_grab_inflight_ref(dlm, res); |
899 | dlm_lockres_grab_inflight_ref(dlm, res); | ||
900 | spin_unlock(&res->spinlock); | ||
901 | 900 | ||
902 | /* get an extra ref on the mle in case this is a BLOCK | 901 | /* get an extra ref on the mle in case this is a BLOCK |
903 | * if so, the creator of the BLOCK may try to put the last | 902 | * if so, the creator of the BLOCK may try to put the last |
@@ -2037,6 +2036,10 @@ kill: | |||
2037 | "and killing the other node now! This node is OK and can continue.\n"); | 2036 | "and killing the other node now! This node is OK and can continue.\n"); |
2038 | __dlm_print_one_lock_resource(res); | 2037 | __dlm_print_one_lock_resource(res); |
2039 | spin_unlock(&res->spinlock); | 2038 | spin_unlock(&res->spinlock); |
2039 | spin_lock(&dlm->master_lock); | ||
2040 | if (mle) | ||
2041 | __dlm_put_mle(mle); | ||
2042 | spin_unlock(&dlm->master_lock); | ||
2040 | spin_unlock(&dlm->spinlock); | 2043 | spin_unlock(&dlm->spinlock); |
2041 | *ret_data = (void *)res; | 2044 | *ret_data = (void *)res; |
2042 | dlm_put(dlm); | 2045 | dlm_put(dlm); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 45067faf5695..3365839d2971 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -1710,9 +1710,12 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1710 | BUG(); | 1710 | BUG(); |
1711 | } else | 1711 | } else |
1712 | __dlm_lockres_grab_inflight_worker(dlm, res); | 1712 | __dlm_lockres_grab_inflight_worker(dlm, res); |
1713 | } else /* put.. incase we are not the master */ | 1713 | spin_unlock(&res->spinlock); |
1714 | } else { | ||
1715 | /* put.. incase we are not the master */ | ||
1716 | spin_unlock(&res->spinlock); | ||
1714 | dlm_lockres_put(res); | 1717 | dlm_lockres_put(res); |
1715 | spin_unlock(&res->spinlock); | 1718 | } |
1716 | } | 1719 | } |
1717 | spin_unlock(&dlm->spinlock); | 1720 | spin_unlock(&dlm->spinlock); |
1718 | 1721 | ||
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 52cfe99ae056..21262f2b1654 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -2892,37 +2892,24 @@ static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) | |||
2892 | 2892 | ||
2893 | static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) | 2893 | static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) |
2894 | { | 2894 | { |
2895 | int ret; | ||
2896 | struct ocfs2_dlm_seq_priv *priv; | 2895 | struct ocfs2_dlm_seq_priv *priv; |
2897 | struct seq_file *seq; | ||
2898 | struct ocfs2_super *osb; | 2896 | struct ocfs2_super *osb; |
2899 | 2897 | ||
2900 | priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL); | 2898 | priv = __seq_open_private(file, &ocfs2_dlm_seq_ops, sizeof(*priv)); |
2901 | if (!priv) { | 2899 | if (!priv) { |
2902 | ret = -ENOMEM; | 2900 | mlog_errno(-ENOMEM); |
2903 | mlog_errno(ret); | 2901 | return -ENOMEM; |
2904 | goto out; | ||
2905 | } | 2902 | } |
2903 | |||
2906 | osb = inode->i_private; | 2904 | osb = inode->i_private; |
2907 | ocfs2_get_dlm_debug(osb->osb_dlm_debug); | 2905 | ocfs2_get_dlm_debug(osb->osb_dlm_debug); |
2908 | priv->p_dlm_debug = osb->osb_dlm_debug; | 2906 | priv->p_dlm_debug = osb->osb_dlm_debug; |
2909 | INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); | 2907 | INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list); |
2910 | 2908 | ||
2911 | ret = seq_open(file, &ocfs2_dlm_seq_ops); | ||
2912 | if (ret) { | ||
2913 | kfree(priv); | ||
2914 | mlog_errno(ret); | ||
2915 | goto out; | ||
2916 | } | ||
2917 | |||
2918 | seq = file->private_data; | ||
2919 | seq->private = priv; | ||
2920 | |||
2921 | ocfs2_add_lockres_tracking(&priv->p_iter_res, | 2909 | ocfs2_add_lockres_tracking(&priv->p_iter_res, |
2922 | priv->p_dlm_debug); | 2910 | priv->p_dlm_debug); |
2923 | 2911 | ||
2924 | out: | 2912 | return 0; |
2925 | return ret; | ||
2926 | } | 2913 | } |
2927 | 2914 | ||
2928 | static const struct file_operations ocfs2_dlm_debug_fops = { | 2915 | static const struct file_operations ocfs2_dlm_debug_fops = { |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2930e231f3f9..324dc93ac896 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -760,7 +760,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | |||
760 | struct address_space *mapping = inode->i_mapping; | 760 | struct address_space *mapping = inode->i_mapping; |
761 | struct page *page; | 761 | struct page *page; |
762 | unsigned long index = abs_from >> PAGE_CACHE_SHIFT; | 762 | unsigned long index = abs_from >> PAGE_CACHE_SHIFT; |
763 | handle_t *handle = NULL; | 763 | handle_t *handle; |
764 | int ret = 0; | 764 | int ret = 0; |
765 | unsigned zero_from, zero_to, block_start, block_end; | 765 | unsigned zero_from, zero_to, block_start, block_end; |
766 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 766 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
@@ -769,11 +769,17 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | |||
769 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); | 769 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); |
770 | BUG_ON(abs_from & (inode->i_blkbits - 1)); | 770 | BUG_ON(abs_from & (inode->i_blkbits - 1)); |
771 | 771 | ||
772 | handle = ocfs2_zero_start_ordered_transaction(inode, di_bh); | ||
773 | if (IS_ERR(handle)) { | ||
774 | ret = PTR_ERR(handle); | ||
775 | goto out; | ||
776 | } | ||
777 | |||
772 | page = find_or_create_page(mapping, index, GFP_NOFS); | 778 | page = find_or_create_page(mapping, index, GFP_NOFS); |
773 | if (!page) { | 779 | if (!page) { |
774 | ret = -ENOMEM; | 780 | ret = -ENOMEM; |
775 | mlog_errno(ret); | 781 | mlog_errno(ret); |
776 | goto out; | 782 | goto out_commit_trans; |
777 | } | 783 | } |
778 | 784 | ||
779 | /* Get the offsets within the page that we want to zero */ | 785 | /* Get the offsets within the page that we want to zero */ |
@@ -805,15 +811,6 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | |||
805 | goto out_unlock; | 811 | goto out_unlock; |
806 | } | 812 | } |
807 | 813 | ||
808 | if (!handle) { | ||
809 | handle = ocfs2_zero_start_ordered_transaction(inode, | ||
810 | di_bh); | ||
811 | if (IS_ERR(handle)) { | ||
812 | ret = PTR_ERR(handle); | ||
813 | handle = NULL; | ||
814 | break; | ||
815 | } | ||
816 | } | ||
817 | 814 | ||
818 | /* must not update i_size! */ | 815 | /* must not update i_size! */ |
819 | ret = block_commit_write(page, block_start + 1, | 816 | ret = block_commit_write(page, block_start + 1, |
@@ -824,27 +821,29 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | |||
824 | ret = 0; | 821 | ret = 0; |
825 | } | 822 | } |
826 | 823 | ||
824 | /* | ||
825 | * fs-writeback will release the dirty pages without page lock | ||
826 | * whose offset are over inode size, the release happens at | ||
827 | * block_write_full_page(). | ||
828 | */ | ||
829 | i_size_write(inode, abs_to); | ||
830 | inode->i_blocks = ocfs2_inode_sector_count(inode); | ||
831 | di->i_size = cpu_to_le64((u64)i_size_read(inode)); | ||
832 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
833 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | ||
834 | di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | ||
835 | di->i_mtime_nsec = di->i_ctime_nsec; | ||
827 | if (handle) { | 836 | if (handle) { |
828 | /* | ||
829 | * fs-writeback will release the dirty pages without page lock | ||
830 | * whose offset are over inode size, the release happens at | ||
831 | * block_write_full_page(). | ||
832 | */ | ||
833 | i_size_write(inode, abs_to); | ||
834 | inode->i_blocks = ocfs2_inode_sector_count(inode); | ||
835 | di->i_size = cpu_to_le64((u64)i_size_read(inode)); | ||
836 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
837 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | ||
838 | di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | ||
839 | di->i_mtime_nsec = di->i_ctime_nsec; | ||
840 | ocfs2_journal_dirty(handle, di_bh); | 837 | ocfs2_journal_dirty(handle, di_bh); |
841 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | 838 | ocfs2_update_inode_fsync_trans(handle, inode, 1); |
842 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
843 | } | 839 | } |
844 | 840 | ||
845 | out_unlock: | 841 | out_unlock: |
846 | unlock_page(page); | 842 | unlock_page(page); |
847 | page_cache_release(page); | 843 | page_cache_release(page); |
844 | out_commit_trans: | ||
845 | if (handle) | ||
846 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
848 | out: | 847 | out: |
849 | return ret; | 848 | return ret; |
850 | } | 849 | } |
@@ -1253,7 +1252,7 @@ bail: | |||
1253 | brelse(bh); | 1252 | brelse(bh); |
1254 | 1253 | ||
1255 | /* Release quota pointers in case we acquired them */ | 1254 | /* Release quota pointers in case we acquired them */ |
1256 | for (qtype = 0; qtype < MAXQUOTAS; qtype++) | 1255 | for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++) |
1257 | dqput(transfer_to[qtype]); | 1256 | dqput(transfer_to[qtype]); |
1258 | 1257 | ||
1259 | if (!status && attr->ia_valid & ATTR_MODE) { | 1258 | if (!status && attr->ia_valid & ATTR_MODE) { |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index a6c991c0fc98..a9b76de46047 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -162,7 +162,7 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode) | |||
162 | { | 162 | { |
163 | int c_to_s_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits - 9; | 163 | int c_to_s_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits - 9; |
164 | 164 | ||
165 | return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits); | 165 | return (blkcnt_t)OCFS2_I(inode)->ip_clusters << c_to_s_bits; |
166 | } | 166 | } |
167 | 167 | ||
168 | /* Validate that a bh contains a valid inode */ | 168 | /* Validate that a bh contains a valid inode */ |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 6f66b3751ace..53e6c40ed4c6 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -35,9 +35,8 @@ | |||
35 | copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) | 35 | copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * This call is void because we are already reporting an error that may | 38 | * This is just a best-effort to tell userspace that this request |
39 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's | 39 | * caused the error. |
40 | * just a best-effort to tell userspace that this request caused the error. | ||
41 | */ | 40 | */ |
42 | static inline void o2info_set_request_error(struct ocfs2_info_request *kreq, | 41 | static inline void o2info_set_request_error(struct ocfs2_info_request *kreq, |
43 | struct ocfs2_info_request __user *req) | 42 | struct ocfs2_info_request __user *req) |
@@ -146,136 +145,105 @@ bail: | |||
146 | static int ocfs2_info_handle_blocksize(struct inode *inode, | 145 | static int ocfs2_info_handle_blocksize(struct inode *inode, |
147 | struct ocfs2_info_request __user *req) | 146 | struct ocfs2_info_request __user *req) |
148 | { | 147 | { |
149 | int status = -EFAULT; | ||
150 | struct ocfs2_info_blocksize oib; | 148 | struct ocfs2_info_blocksize oib; |
151 | 149 | ||
152 | if (o2info_from_user(oib, req)) | 150 | if (o2info_from_user(oib, req)) |
153 | goto bail; | 151 | return -EFAULT; |
154 | 152 | ||
155 | oib.ib_blocksize = inode->i_sb->s_blocksize; | 153 | oib.ib_blocksize = inode->i_sb->s_blocksize; |
156 | 154 | ||
157 | o2info_set_request_filled(&oib.ib_req); | 155 | o2info_set_request_filled(&oib.ib_req); |
158 | 156 | ||
159 | if (o2info_to_user(oib, req)) | 157 | if (o2info_to_user(oib, req)) |
160 | goto bail; | 158 | return -EFAULT; |
161 | |||
162 | status = 0; | ||
163 | bail: | ||
164 | if (status) | ||
165 | o2info_set_request_error(&oib.ib_req, req); | ||
166 | 159 | ||
167 | return status; | 160 | return 0; |
168 | } | 161 | } |
169 | 162 | ||
170 | static int ocfs2_info_handle_clustersize(struct inode *inode, | 163 | static int ocfs2_info_handle_clustersize(struct inode *inode, |
171 | struct ocfs2_info_request __user *req) | 164 | struct ocfs2_info_request __user *req) |
172 | { | 165 | { |
173 | int status = -EFAULT; | ||
174 | struct ocfs2_info_clustersize oic; | 166 | struct ocfs2_info_clustersize oic; |
175 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 167 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
176 | 168 | ||
177 | if (o2info_from_user(oic, req)) | 169 | if (o2info_from_user(oic, req)) |
178 | goto bail; | 170 | return -EFAULT; |
179 | 171 | ||
180 | oic.ic_clustersize = osb->s_clustersize; | 172 | oic.ic_clustersize = osb->s_clustersize; |
181 | 173 | ||
182 | o2info_set_request_filled(&oic.ic_req); | 174 | o2info_set_request_filled(&oic.ic_req); |
183 | 175 | ||
184 | if (o2info_to_user(oic, req)) | 176 | if (o2info_to_user(oic, req)) |
185 | goto bail; | 177 | return -EFAULT; |
186 | |||
187 | status = 0; | ||
188 | bail: | ||
189 | if (status) | ||
190 | o2info_set_request_error(&oic.ic_req, req); | ||
191 | 178 | ||
192 | return status; | 179 | return 0; |
193 | } | 180 | } |
194 | 181 | ||
195 | static int ocfs2_info_handle_maxslots(struct inode *inode, | 182 | static int ocfs2_info_handle_maxslots(struct inode *inode, |
196 | struct ocfs2_info_request __user *req) | 183 | struct ocfs2_info_request __user *req) |
197 | { | 184 | { |
198 | int status = -EFAULT; | ||
199 | struct ocfs2_info_maxslots oim; | 185 | struct ocfs2_info_maxslots oim; |
200 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 186 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
201 | 187 | ||
202 | if (o2info_from_user(oim, req)) | 188 | if (o2info_from_user(oim, req)) |
203 | goto bail; | 189 | return -EFAULT; |
204 | 190 | ||
205 | oim.im_max_slots = osb->max_slots; | 191 | oim.im_max_slots = osb->max_slots; |
206 | 192 | ||
207 | o2info_set_request_filled(&oim.im_req); | 193 | o2info_set_request_filled(&oim.im_req); |
208 | 194 | ||
209 | if (o2info_to_user(oim, req)) | 195 | if (o2info_to_user(oim, req)) |
210 | goto bail; | 196 | return -EFAULT; |
211 | 197 | ||
212 | status = 0; | 198 | return 0; |
213 | bail: | ||
214 | if (status) | ||
215 | o2info_set_request_error(&oim.im_req, req); | ||
216 | |||
217 | return status; | ||
218 | } | 199 | } |
219 | 200 | ||
220 | static int ocfs2_info_handle_label(struct inode *inode, | 201 | static int ocfs2_info_handle_label(struct inode *inode, |
221 | struct ocfs2_info_request __user *req) | 202 | struct ocfs2_info_request __user *req) |
222 | { | 203 | { |
223 | int status = -EFAULT; | ||
224 | struct ocfs2_info_label oil; | 204 | struct ocfs2_info_label oil; |
225 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 205 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
226 | 206 | ||
227 | if (o2info_from_user(oil, req)) | 207 | if (o2info_from_user(oil, req)) |
228 | goto bail; | 208 | return -EFAULT; |
229 | 209 | ||
230 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); | 210 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); |
231 | 211 | ||
232 | o2info_set_request_filled(&oil.il_req); | 212 | o2info_set_request_filled(&oil.il_req); |
233 | 213 | ||
234 | if (o2info_to_user(oil, req)) | 214 | if (o2info_to_user(oil, req)) |
235 | goto bail; | 215 | return -EFAULT; |
236 | 216 | ||
237 | status = 0; | 217 | return 0; |
238 | bail: | ||
239 | if (status) | ||
240 | o2info_set_request_error(&oil.il_req, req); | ||
241 | |||
242 | return status; | ||
243 | } | 218 | } |
244 | 219 | ||
245 | static int ocfs2_info_handle_uuid(struct inode *inode, | 220 | static int ocfs2_info_handle_uuid(struct inode *inode, |
246 | struct ocfs2_info_request __user *req) | 221 | struct ocfs2_info_request __user *req) |
247 | { | 222 | { |
248 | int status = -EFAULT; | ||
249 | struct ocfs2_info_uuid oiu; | 223 | struct ocfs2_info_uuid oiu; |
250 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 224 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
251 | 225 | ||
252 | if (o2info_from_user(oiu, req)) | 226 | if (o2info_from_user(oiu, req)) |
253 | goto bail; | 227 | return -EFAULT; |
254 | 228 | ||
255 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); | 229 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); |
256 | 230 | ||
257 | o2info_set_request_filled(&oiu.iu_req); | 231 | o2info_set_request_filled(&oiu.iu_req); |
258 | 232 | ||
259 | if (o2info_to_user(oiu, req)) | 233 | if (o2info_to_user(oiu, req)) |
260 | goto bail; | 234 | return -EFAULT; |
261 | |||
262 | status = 0; | ||
263 | bail: | ||
264 | if (status) | ||
265 | o2info_set_request_error(&oiu.iu_req, req); | ||
266 | 235 | ||
267 | return status; | 236 | return 0; |
268 | } | 237 | } |
269 | 238 | ||
270 | static int ocfs2_info_handle_fs_features(struct inode *inode, | 239 | static int ocfs2_info_handle_fs_features(struct inode *inode, |
271 | struct ocfs2_info_request __user *req) | 240 | struct ocfs2_info_request __user *req) |
272 | { | 241 | { |
273 | int status = -EFAULT; | ||
274 | struct ocfs2_info_fs_features oif; | 242 | struct ocfs2_info_fs_features oif; |
275 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 243 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
276 | 244 | ||
277 | if (o2info_from_user(oif, req)) | 245 | if (o2info_from_user(oif, req)) |
278 | goto bail; | 246 | return -EFAULT; |
279 | 247 | ||
280 | oif.if_compat_features = osb->s_feature_compat; | 248 | oif.if_compat_features = osb->s_feature_compat; |
281 | oif.if_incompat_features = osb->s_feature_incompat; | 249 | oif.if_incompat_features = osb->s_feature_incompat; |
@@ -284,39 +252,28 @@ static int ocfs2_info_handle_fs_features(struct inode *inode, | |||
284 | o2info_set_request_filled(&oif.if_req); | 252 | o2info_set_request_filled(&oif.if_req); |
285 | 253 | ||
286 | if (o2info_to_user(oif, req)) | 254 | if (o2info_to_user(oif, req)) |
287 | goto bail; | 255 | return -EFAULT; |
288 | 256 | ||
289 | status = 0; | 257 | return 0; |
290 | bail: | ||
291 | if (status) | ||
292 | o2info_set_request_error(&oif.if_req, req); | ||
293 | |||
294 | return status; | ||
295 | } | 258 | } |
296 | 259 | ||
297 | static int ocfs2_info_handle_journal_size(struct inode *inode, | 260 | static int ocfs2_info_handle_journal_size(struct inode *inode, |
298 | struct ocfs2_info_request __user *req) | 261 | struct ocfs2_info_request __user *req) |
299 | { | 262 | { |
300 | int status = -EFAULT; | ||
301 | struct ocfs2_info_journal_size oij; | 263 | struct ocfs2_info_journal_size oij; |
302 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 264 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
303 | 265 | ||
304 | if (o2info_from_user(oij, req)) | 266 | if (o2info_from_user(oij, req)) |
305 | goto bail; | 267 | return -EFAULT; |
306 | 268 | ||
307 | oij.ij_journal_size = i_size_read(osb->journal->j_inode); | 269 | oij.ij_journal_size = i_size_read(osb->journal->j_inode); |
308 | 270 | ||
309 | o2info_set_request_filled(&oij.ij_req); | 271 | o2info_set_request_filled(&oij.ij_req); |
310 | 272 | ||
311 | if (o2info_to_user(oij, req)) | 273 | if (o2info_to_user(oij, req)) |
312 | goto bail; | 274 | return -EFAULT; |
313 | 275 | ||
314 | status = 0; | 276 | return 0; |
315 | bail: | ||
316 | if (status) | ||
317 | o2info_set_request_error(&oij.ij_req, req); | ||
318 | |||
319 | return status; | ||
320 | } | 277 | } |
321 | 278 | ||
322 | static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, | 279 | static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, |
@@ -373,7 +330,7 @@ static int ocfs2_info_handle_freeinode(struct inode *inode, | |||
373 | u32 i; | 330 | u32 i; |
374 | u64 blkno = -1; | 331 | u64 blkno = -1; |
375 | char namebuf[40]; | 332 | char namebuf[40]; |
376 | int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE; | 333 | int status, type = INODE_ALLOC_SYSTEM_INODE; |
377 | struct ocfs2_info_freeinode *oifi = NULL; | 334 | struct ocfs2_info_freeinode *oifi = NULL; |
378 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 335 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
379 | struct inode *inode_alloc = NULL; | 336 | struct inode *inode_alloc = NULL; |
@@ -385,8 +342,10 @@ static int ocfs2_info_handle_freeinode(struct inode *inode, | |||
385 | goto out_err; | 342 | goto out_err; |
386 | } | 343 | } |
387 | 344 | ||
388 | if (o2info_from_user(*oifi, req)) | 345 | if (o2info_from_user(*oifi, req)) { |
389 | goto bail; | 346 | status = -EFAULT; |
347 | goto out_free; | ||
348 | } | ||
390 | 349 | ||
391 | oifi->ifi_slotnum = osb->max_slots; | 350 | oifi->ifi_slotnum = osb->max_slots; |
392 | 351 | ||
@@ -424,14 +383,16 @@ static int ocfs2_info_handle_freeinode(struct inode *inode, | |||
424 | 383 | ||
425 | o2info_set_request_filled(&oifi->ifi_req); | 384 | o2info_set_request_filled(&oifi->ifi_req); |
426 | 385 | ||
427 | if (o2info_to_user(*oifi, req)) | 386 | if (o2info_to_user(*oifi, req)) { |
428 | goto bail; | 387 | status = -EFAULT; |
388 | goto out_free; | ||
389 | } | ||
429 | 390 | ||
430 | status = 0; | 391 | status = 0; |
431 | bail: | 392 | bail: |
432 | if (status) | 393 | if (status) |
433 | o2info_set_request_error(&oifi->ifi_req, req); | 394 | o2info_set_request_error(&oifi->ifi_req, req); |
434 | 395 | out_free: | |
435 | kfree(oifi); | 396 | kfree(oifi); |
436 | out_err: | 397 | out_err: |
437 | return status; | 398 | return status; |
@@ -658,7 +619,7 @@ static int ocfs2_info_handle_freefrag(struct inode *inode, | |||
658 | { | 619 | { |
659 | u64 blkno = -1; | 620 | u64 blkno = -1; |
660 | char namebuf[40]; | 621 | char namebuf[40]; |
661 | int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE; | 622 | int status, type = GLOBAL_BITMAP_SYSTEM_INODE; |
662 | 623 | ||
663 | struct ocfs2_info_freefrag *oiff; | 624 | struct ocfs2_info_freefrag *oiff; |
664 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 625 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
@@ -671,8 +632,10 @@ static int ocfs2_info_handle_freefrag(struct inode *inode, | |||
671 | goto out_err; | 632 | goto out_err; |
672 | } | 633 | } |
673 | 634 | ||
674 | if (o2info_from_user(*oiff, req)) | 635 | if (o2info_from_user(*oiff, req)) { |
675 | goto bail; | 636 | status = -EFAULT; |
637 | goto out_free; | ||
638 | } | ||
676 | /* | 639 | /* |
677 | * chunksize from userspace should be power of 2. | 640 | * chunksize from userspace should be power of 2. |
678 | */ | 641 | */ |
@@ -711,14 +674,14 @@ static int ocfs2_info_handle_freefrag(struct inode *inode, | |||
711 | 674 | ||
712 | if (o2info_to_user(*oiff, req)) { | 675 | if (o2info_to_user(*oiff, req)) { |
713 | status = -EFAULT; | 676 | status = -EFAULT; |
714 | goto bail; | 677 | goto out_free; |
715 | } | 678 | } |
716 | 679 | ||
717 | status = 0; | 680 | status = 0; |
718 | bail: | 681 | bail: |
719 | if (status) | 682 | if (status) |
720 | o2info_set_request_error(&oiff->iff_req, req); | 683 | o2info_set_request_error(&oiff->iff_req, req); |
721 | 684 | out_free: | |
722 | kfree(oiff); | 685 | kfree(oiff); |
723 | out_err: | 686 | out_err: |
724 | return status; | 687 | return status; |
@@ -727,23 +690,17 @@ out_err: | |||
727 | static int ocfs2_info_handle_unknown(struct inode *inode, | 690 | static int ocfs2_info_handle_unknown(struct inode *inode, |
728 | struct ocfs2_info_request __user *req) | 691 | struct ocfs2_info_request __user *req) |
729 | { | 692 | { |
730 | int status = -EFAULT; | ||
731 | struct ocfs2_info_request oir; | 693 | struct ocfs2_info_request oir; |
732 | 694 | ||
733 | if (o2info_from_user(oir, req)) | 695 | if (o2info_from_user(oir, req)) |
734 | goto bail; | 696 | return -EFAULT; |
735 | 697 | ||
736 | o2info_clear_request_filled(&oir); | 698 | o2info_clear_request_filled(&oir); |
737 | 699 | ||
738 | if (o2info_to_user(oir, req)) | 700 | if (o2info_to_user(oir, req)) |
739 | goto bail; | 701 | return -EFAULT; |
740 | 702 | ||
741 | status = 0; | 703 | return 0; |
742 | bail: | ||
743 | if (status) | ||
744 | o2info_set_request_error(&oir, req); | ||
745 | |||
746 | return status; | ||
747 | } | 704 | } |
748 | 705 | ||
749 | /* | 706 | /* |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 6219aaadeb08..74caffeeee1d 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
@@ -404,7 +404,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode, | |||
404 | * 'vict_blkno' was out of the valid range. | 404 | * 'vict_blkno' was out of the valid range. |
405 | */ | 405 | */ |
406 | if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || | 406 | if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || |
407 | (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) << | 407 | (vict_blkno >= ((u64)le32_to_cpu(ac_dinode->id1.bitmap1.i_total) << |
408 | bits_per_unit))) { | 408 | bits_per_unit))) { |
409 | ret = -EINVAL; | 409 | ret = -EINVAL; |
410 | goto out; | 410 | goto out; |
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index f266d67df3c6..1eae330193a6 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h | |||
@@ -17,6 +17,9 @@ | |||
17 | 17 | ||
18 | #include "ocfs2.h" | 18 | #include "ocfs2.h" |
19 | 19 | ||
20 | /* Number of quota types we support */ | ||
21 | #define OCFS2_MAXQUOTAS 2 | ||
22 | |||
20 | /* | 23 | /* |
21 | * In-memory structures | 24 | * In-memory structures |
22 | */ | 25 | */ |
@@ -39,7 +42,7 @@ struct ocfs2_recovery_chunk { | |||
39 | }; | 42 | }; |
40 | 43 | ||
41 | struct ocfs2_quota_recovery { | 44 | struct ocfs2_quota_recovery { |
42 | struct list_head r_list[MAXQUOTAS]; /* List of chunks to recover */ | 45 | struct list_head r_list[OCFS2_MAXQUOTAS]; /* List of chunks to recover */ |
43 | }; | 46 | }; |
44 | 47 | ||
45 | /* In-memory structure with quota header information */ | 48 | /* In-memory structure with quota header information */ |
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index b990a62cff50..c93d67220887 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -336,8 +336,8 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) | |||
336 | int ocfs2_global_read_info(struct super_block *sb, int type) | 336 | int ocfs2_global_read_info(struct super_block *sb, int type) |
337 | { | 337 | { |
338 | struct inode *gqinode = NULL; | 338 | struct inode *gqinode = NULL; |
339 | unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, | 339 | unsigned int ino[OCFS2_MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, |
340 | GROUP_QUOTA_SYSTEM_INODE }; | 340 | GROUP_QUOTA_SYSTEM_INODE }; |
341 | struct ocfs2_global_disk_dqinfo dinfo; | 341 | struct ocfs2_global_disk_dqinfo dinfo; |
342 | struct mem_dqinfo *info = sb_dqinfo(sb, type); | 342 | struct mem_dqinfo *info = sb_dqinfo(sb, type); |
343 | struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; | 343 | struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; |
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 2001862bf2b1..10b653930ee2 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c | |||
@@ -166,12 +166,12 @@ static int ocfs2_read_quota_block(struct inode *inode, u64 v_block, | |||
166 | /* Check whether we understand format of quota files */ | 166 | /* Check whether we understand format of quota files */ |
167 | static int ocfs2_local_check_quota_file(struct super_block *sb, int type) | 167 | static int ocfs2_local_check_quota_file(struct super_block *sb, int type) |
168 | { | 168 | { |
169 | unsigned int lmagics[MAXQUOTAS] = OCFS2_LOCAL_QMAGICS; | 169 | unsigned int lmagics[OCFS2_MAXQUOTAS] = OCFS2_LOCAL_QMAGICS; |
170 | unsigned int lversions[MAXQUOTAS] = OCFS2_LOCAL_QVERSIONS; | 170 | unsigned int lversions[OCFS2_MAXQUOTAS] = OCFS2_LOCAL_QVERSIONS; |
171 | unsigned int gmagics[MAXQUOTAS] = OCFS2_GLOBAL_QMAGICS; | 171 | unsigned int gmagics[OCFS2_MAXQUOTAS] = OCFS2_GLOBAL_QMAGICS; |
172 | unsigned int gversions[MAXQUOTAS] = OCFS2_GLOBAL_QVERSIONS; | 172 | unsigned int gversions[OCFS2_MAXQUOTAS] = OCFS2_GLOBAL_QVERSIONS; |
173 | unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, | 173 | unsigned int ino[OCFS2_MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, |
174 | GROUP_QUOTA_SYSTEM_INODE }; | 174 | GROUP_QUOTA_SYSTEM_INODE }; |
175 | struct buffer_head *bh = NULL; | 175 | struct buffer_head *bh = NULL; |
176 | struct inode *linode = sb_dqopt(sb)->files[type]; | 176 | struct inode *linode = sb_dqopt(sb)->files[type]; |
177 | struct inode *ginode = NULL; | 177 | struct inode *ginode = NULL; |
@@ -336,7 +336,7 @@ void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec) | |||
336 | { | 336 | { |
337 | int type; | 337 | int type; |
338 | 338 | ||
339 | for (type = 0; type < MAXQUOTAS; type++) | 339 | for (type = 0; type < OCFS2_MAXQUOTAS; type++) |
340 | free_recovery_list(&(rec->r_list[type])); | 340 | free_recovery_list(&(rec->r_list[type])); |
341 | kfree(rec); | 341 | kfree(rec); |
342 | } | 342 | } |
@@ -382,7 +382,7 @@ static struct ocfs2_quota_recovery *ocfs2_alloc_quota_recovery(void) | |||
382 | rec = kmalloc(sizeof(struct ocfs2_quota_recovery), GFP_NOFS); | 382 | rec = kmalloc(sizeof(struct ocfs2_quota_recovery), GFP_NOFS); |
383 | if (!rec) | 383 | if (!rec) |
384 | return NULL; | 384 | return NULL; |
385 | for (type = 0; type < MAXQUOTAS; type++) | 385 | for (type = 0; type < OCFS2_MAXQUOTAS; type++) |
386 | INIT_LIST_HEAD(&(rec->r_list[type])); | 386 | INIT_LIST_HEAD(&(rec->r_list[type])); |
387 | return rec; | 387 | return rec; |
388 | } | 388 | } |
@@ -392,10 +392,11 @@ struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery( | |||
392 | struct ocfs2_super *osb, | 392 | struct ocfs2_super *osb, |
393 | int slot_num) | 393 | int slot_num) |
394 | { | 394 | { |
395 | unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, | 395 | unsigned int feature[OCFS2_MAXQUOTAS] = { |
396 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; | 396 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA, |
397 | unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, | 397 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; |
398 | LOCAL_GROUP_QUOTA_SYSTEM_INODE }; | 398 | unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, |
399 | LOCAL_GROUP_QUOTA_SYSTEM_INODE }; | ||
399 | struct super_block *sb = osb->sb; | 400 | struct super_block *sb = osb->sb; |
400 | struct ocfs2_local_disk_dqinfo *ldinfo; | 401 | struct ocfs2_local_disk_dqinfo *ldinfo; |
401 | struct inode *lqinode; | 402 | struct inode *lqinode; |
@@ -412,7 +413,7 @@ struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery( | |||
412 | return ERR_PTR(-ENOMEM); | 413 | return ERR_PTR(-ENOMEM); |
413 | /* First init... */ | 414 | /* First init... */ |
414 | 415 | ||
415 | for (type = 0; type < MAXQUOTAS; type++) { | 416 | for (type = 0; type < OCFS2_MAXQUOTAS; type++) { |
416 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) | 417 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) |
417 | continue; | 418 | continue; |
418 | /* At this point, journal of the slot is already replayed so | 419 | /* At this point, journal of the slot is already replayed so |
@@ -589,8 +590,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, | |||
589 | struct ocfs2_quota_recovery *rec, | 590 | struct ocfs2_quota_recovery *rec, |
590 | int slot_num) | 591 | int slot_num) |
591 | { | 592 | { |
592 | unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, | 593 | unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, |
593 | LOCAL_GROUP_QUOTA_SYSTEM_INODE }; | 594 | LOCAL_GROUP_QUOTA_SYSTEM_INODE }; |
594 | struct super_block *sb = osb->sb; | 595 | struct super_block *sb = osb->sb; |
595 | struct ocfs2_local_disk_dqinfo *ldinfo; | 596 | struct ocfs2_local_disk_dqinfo *ldinfo; |
596 | struct buffer_head *bh; | 597 | struct buffer_head *bh; |
@@ -604,7 +605,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, | |||
604 | "slot %u\n", osb->dev_str, slot_num); | 605 | "slot %u\n", osb->dev_str, slot_num); |
605 | 606 | ||
606 | mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); | 607 | mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); |
607 | for (type = 0; type < MAXQUOTAS; type++) { | 608 | for (type = 0; type < OCFS2_MAXQUOTAS; type++) { |
608 | if (list_empty(&(rec->r_list[type]))) | 609 | if (list_empty(&(rec->r_list[type]))) |
609 | continue; | 610 | continue; |
610 | trace_ocfs2_finish_quota_recovery(slot_num); | 611 | trace_ocfs2_finish_quota_recovery(slot_num); |
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 13a8537d8e8b..720aa389e0ea 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
@@ -591,7 +591,7 @@ static int ocfs2_control_release(struct inode *inode, struct file *file) | |||
591 | */ | 591 | */ |
592 | ocfs2_control_this_node = -1; | 592 | ocfs2_control_this_node = -1; |
593 | running_proto.pv_major = 0; | 593 | running_proto.pv_major = 0; |
594 | running_proto.pv_major = 0; | 594 | running_proto.pv_minor = 0; |
595 | } | 595 | } |
596 | 596 | ||
597 | out: | 597 | out: |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index ddb662b32447..93c85bc745e1 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -899,11 +899,12 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend) | |||
899 | { | 899 | { |
900 | int type; | 900 | int type; |
901 | struct super_block *sb = osb->sb; | 901 | struct super_block *sb = osb->sb; |
902 | unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, | 902 | unsigned int feature[OCFS2_MAXQUOTAS] = { |
903 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; | 903 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA, |
904 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; | ||
904 | int status = 0; | 905 | int status = 0; |
905 | 906 | ||
906 | for (type = 0; type < MAXQUOTAS; type++) { | 907 | for (type = 0; type < OCFS2_MAXQUOTAS; type++) { |
907 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) | 908 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) |
908 | continue; | 909 | continue; |
909 | if (unsuspend) | 910 | if (unsuspend) |
@@ -927,17 +928,19 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend) | |||
927 | 928 | ||
928 | static int ocfs2_enable_quotas(struct ocfs2_super *osb) | 929 | static int ocfs2_enable_quotas(struct ocfs2_super *osb) |
929 | { | 930 | { |
930 | struct inode *inode[MAXQUOTAS] = { NULL, NULL }; | 931 | struct inode *inode[OCFS2_MAXQUOTAS] = { NULL, NULL }; |
931 | struct super_block *sb = osb->sb; | 932 | struct super_block *sb = osb->sb; |
932 | unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, | 933 | unsigned int feature[OCFS2_MAXQUOTAS] = { |
933 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; | 934 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA, |
934 | unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, | 935 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; |
936 | unsigned int ino[OCFS2_MAXQUOTAS] = { | ||
937 | LOCAL_USER_QUOTA_SYSTEM_INODE, | ||
935 | LOCAL_GROUP_QUOTA_SYSTEM_INODE }; | 938 | LOCAL_GROUP_QUOTA_SYSTEM_INODE }; |
936 | int status; | 939 | int status; |
937 | int type; | 940 | int type; |
938 | 941 | ||
939 | sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE; | 942 | sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE; |
940 | for (type = 0; type < MAXQUOTAS; type++) { | 943 | for (type = 0; type < OCFS2_MAXQUOTAS; type++) { |
941 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) | 944 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) |
942 | continue; | 945 | continue; |
943 | inode[type] = ocfs2_get_system_file_inode(osb, ino[type], | 946 | inode[type] = ocfs2_get_system_file_inode(osb, ino[type], |
@@ -952,12 +955,12 @@ static int ocfs2_enable_quotas(struct ocfs2_super *osb) | |||
952 | goto out_quota_off; | 955 | goto out_quota_off; |
953 | } | 956 | } |
954 | 957 | ||
955 | for (type = 0; type < MAXQUOTAS; type++) | 958 | for (type = 0; type < OCFS2_MAXQUOTAS; type++) |
956 | iput(inode[type]); | 959 | iput(inode[type]); |
957 | return 0; | 960 | return 0; |
958 | out_quota_off: | 961 | out_quota_off: |
959 | ocfs2_disable_quotas(osb); | 962 | ocfs2_disable_quotas(osb); |
960 | for (type = 0; type < MAXQUOTAS; type++) | 963 | for (type = 0; type < OCFS2_MAXQUOTAS; type++) |
961 | iput(inode[type]); | 964 | iput(inode[type]); |
962 | mlog_errno(status); | 965 | mlog_errno(status); |
963 | return status; | 966 | return status; |
@@ -972,7 +975,7 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) | |||
972 | 975 | ||
973 | /* We mostly ignore errors in this function because there's not much | 976 | /* We mostly ignore errors in this function because there's not much |
974 | * we can do when we see them */ | 977 | * we can do when we see them */ |
975 | for (type = 0; type < MAXQUOTAS; type++) { | 978 | for (type = 0; type < OCFS2_MAXQUOTAS; type++) { |
976 | if (!sb_has_quota_loaded(sb, type)) | 979 | if (!sb_has_quota_loaded(sb, type)) |
977 | continue; | 980 | continue; |
978 | /* Cancel periodic syncing before we grab dqonoff_mutex */ | 981 | /* Cancel periodic syncing before we grab dqonoff_mutex */ |
@@ -993,8 +996,9 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) | |||
993 | /* Handle quota on quotactl */ | 996 | /* Handle quota on quotactl */ |
994 | static int ocfs2_quota_on(struct super_block *sb, int type, int format_id) | 997 | static int ocfs2_quota_on(struct super_block *sb, int type, int format_id) |
995 | { | 998 | { |
996 | unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, | 999 | unsigned int feature[OCFS2_MAXQUOTAS] = { |
997 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; | 1000 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA, |
1001 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; | ||
998 | 1002 | ||
999 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) | 1003 | if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) |
1000 | return -EINVAL; | 1004 | return -EINVAL; |
@@ -2532,6 +2536,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) | |||
2532 | kfree(osb->journal); | 2536 | kfree(osb->journal); |
2533 | kfree(osb->local_alloc_copy); | 2537 | kfree(osb->local_alloc_copy); |
2534 | kfree(osb->uuid_str); | 2538 | kfree(osb->uuid_str); |
2539 | kfree(osb->vol_label); | ||
2535 | ocfs2_put_dlm_debug(osb->osb_dlm_debug); | 2540 | ocfs2_put_dlm_debug(osb->osb_dlm_debug); |
2536 | memset(osb, 0, sizeof(struct ocfs2_super)); | 2541 | memset(osb, 0, sizeof(struct ocfs2_super)); |
2537 | } | 2542 | } |
diff --git a/fs/pnode.c b/fs/pnode.c index 302bf22c4a30..aae331a5d03b 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
@@ -381,6 +381,7 @@ static void __propagate_umount(struct mount *mnt) | |||
381 | * other children | 381 | * other children |
382 | */ | 382 | */ |
383 | if (child && list_empty(&child->mnt_mounts)) { | 383 | if (child && list_empty(&child->mnt_mounts)) { |
384 | list_del_init(&child->mnt_child); | ||
384 | hlist_del_init_rcu(&child->mnt_hash); | 385 | hlist_del_init_rcu(&child->mnt_hash); |
385 | hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); | 386 | hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); |
386 | } | 387 | } |
diff --git a/fs/proc/base.c b/fs/proc/base.c index baf852b648ad..950100e326a1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -376,37 +376,6 @@ static const struct file_operations proc_lstats_operations = { | |||
376 | 376 | ||
377 | #endif | 377 | #endif |
378 | 378 | ||
379 | #ifdef CONFIG_CGROUPS | ||
380 | static int cgroup_open(struct inode *inode, struct file *file) | ||
381 | { | ||
382 | struct pid *pid = PROC_I(inode)->pid; | ||
383 | return single_open(file, proc_cgroup_show, pid); | ||
384 | } | ||
385 | |||
386 | static const struct file_operations proc_cgroup_operations = { | ||
387 | .open = cgroup_open, | ||
388 | .read = seq_read, | ||
389 | .llseek = seq_lseek, | ||
390 | .release = single_release, | ||
391 | }; | ||
392 | #endif | ||
393 | |||
394 | #ifdef CONFIG_PROC_PID_CPUSET | ||
395 | |||
396 | static int cpuset_open(struct inode *inode, struct file *file) | ||
397 | { | ||
398 | struct pid *pid = PROC_I(inode)->pid; | ||
399 | return single_open(file, proc_cpuset_show, pid); | ||
400 | } | ||
401 | |||
402 | static const struct file_operations proc_cpuset_operations = { | ||
403 | .open = cpuset_open, | ||
404 | .read = seq_read, | ||
405 | .llseek = seq_lseek, | ||
406 | .release = single_release, | ||
407 | }; | ||
408 | #endif | ||
409 | |||
410 | static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, | 379 | static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, |
411 | struct pid *pid, struct task_struct *task) | 380 | struct pid *pid, struct task_struct *task) |
412 | { | 381 | { |
@@ -632,29 +601,35 @@ static const struct file_operations proc_single_file_operations = { | |||
632 | .release = single_release, | 601 | .release = single_release, |
633 | }; | 602 | }; |
634 | 603 | ||
635 | static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) | 604 | |
605 | struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) | ||
636 | { | 606 | { |
637 | struct task_struct *task = get_proc_task(file_inode(file)); | 607 | struct task_struct *task = get_proc_task(inode); |
638 | struct mm_struct *mm; | 608 | struct mm_struct *mm = ERR_PTR(-ESRCH); |
639 | 609 | ||
640 | if (!task) | 610 | if (task) { |
641 | return -ESRCH; | 611 | mm = mm_access(task, mode); |
612 | put_task_struct(task); | ||
642 | 613 | ||
643 | mm = mm_access(task, mode); | 614 | if (!IS_ERR_OR_NULL(mm)) { |
644 | put_task_struct(task); | 615 | /* ensure this mm_struct can't be freed */ |
616 | atomic_inc(&mm->mm_count); | ||
617 | /* but do not pin its memory */ | ||
618 | mmput(mm); | ||
619 | } | ||
620 | } | ||
621 | |||
622 | return mm; | ||
623 | } | ||
624 | |||
625 | static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) | ||
626 | { | ||
627 | struct mm_struct *mm = proc_mem_open(inode, mode); | ||
645 | 628 | ||
646 | if (IS_ERR(mm)) | 629 | if (IS_ERR(mm)) |
647 | return PTR_ERR(mm); | 630 | return PTR_ERR(mm); |
648 | 631 | ||
649 | if (mm) { | ||
650 | /* ensure this mm_struct can't be freed */ | ||
651 | atomic_inc(&mm->mm_count); | ||
652 | /* but do not pin its memory */ | ||
653 | mmput(mm); | ||
654 | } | ||
655 | |||
656 | file->private_data = mm; | 632 | file->private_data = mm; |
657 | |||
658 | return 0; | 633 | return 0; |
659 | } | 634 | } |
660 | 635 | ||
@@ -2573,10 +2548,10 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2573 | REG("latency", S_IRUGO, proc_lstats_operations), | 2548 | REG("latency", S_IRUGO, proc_lstats_operations), |
2574 | #endif | 2549 | #endif |
2575 | #ifdef CONFIG_PROC_PID_CPUSET | 2550 | #ifdef CONFIG_PROC_PID_CPUSET |
2576 | REG("cpuset", S_IRUGO, proc_cpuset_operations), | 2551 | ONE("cpuset", S_IRUGO, proc_cpuset_show), |
2577 | #endif | 2552 | #endif |
2578 | #ifdef CONFIG_CGROUPS | 2553 | #ifdef CONFIG_CGROUPS |
2579 | REG("cgroup", S_IRUGO, proc_cgroup_operations), | 2554 | ONE("cgroup", S_IRUGO, proc_cgroup_show), |
2580 | #endif | 2555 | #endif |
2581 | ONE("oom_score", S_IRUGO, proc_oom_score), | 2556 | ONE("oom_score", S_IRUGO, proc_oom_score), |
2582 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), | 2557 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), |
@@ -2919,10 +2894,10 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2919 | REG("latency", S_IRUGO, proc_lstats_operations), | 2894 | REG("latency", S_IRUGO, proc_lstats_operations), |
2920 | #endif | 2895 | #endif |
2921 | #ifdef CONFIG_PROC_PID_CPUSET | 2896 | #ifdef CONFIG_PROC_PID_CPUSET |
2922 | REG("cpuset", S_IRUGO, proc_cpuset_operations), | 2897 | ONE("cpuset", S_IRUGO, proc_cpuset_show), |
2923 | #endif | 2898 | #endif |
2924 | #ifdef CONFIG_CGROUPS | 2899 | #ifdef CONFIG_CGROUPS |
2925 | REG("cgroup", S_IRUGO, proc_cgroup_operations), | 2900 | ONE("cgroup", S_IRUGO, proc_cgroup_show), |
2926 | #endif | 2901 | #endif |
2927 | ONE("oom_score", S_IRUGO, proc_oom_score), | 2902 | ONE("oom_score", S_IRUGO, proc_oom_score), |
2928 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), | 2903 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 7da13e49128a..aa7a0ee182e1 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -268,8 +268,9 @@ extern int proc_remount(struct super_block *, int *, char *); | |||
268 | * task_[no]mmu.c | 268 | * task_[no]mmu.c |
269 | */ | 269 | */ |
270 | struct proc_maps_private { | 270 | struct proc_maps_private { |
271 | struct pid *pid; | 271 | struct inode *inode; |
272 | struct task_struct *task; | 272 | struct task_struct *task; |
273 | struct mm_struct *mm; | ||
273 | #ifdef CONFIG_MMU | 274 | #ifdef CONFIG_MMU |
274 | struct vm_area_struct *tail_vma; | 275 | struct vm_area_struct *tail_vma; |
275 | #endif | 276 | #endif |
@@ -278,6 +279,8 @@ struct proc_maps_private { | |||
278 | #endif | 279 | #endif |
279 | }; | 280 | }; |
280 | 281 | ||
282 | struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode); | ||
283 | |||
281 | extern const struct file_operations proc_pid_maps_operations; | 284 | extern const struct file_operations proc_pid_maps_operations; |
282 | extern const struct file_operations proc_tid_maps_operations; | 285 | extern const struct file_operations proc_tid_maps_operations; |
283 | extern const struct file_operations proc_pid_numa_maps_operations; | 286 | extern const struct file_operations proc_pid_numa_maps_operations; |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 6df8d0722c97..91a4e6426321 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -610,8 +610,10 @@ static void __init proc_kcore_text_init(void) | |||
610 | struct kcore_list kcore_modules; | 610 | struct kcore_list kcore_modules; |
611 | static void __init add_modules_range(void) | 611 | static void __init add_modules_range(void) |
612 | { | 612 | { |
613 | kclist_add(&kcore_modules, (void *)MODULES_VADDR, | 613 | if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { |
614 | kclist_add(&kcore_modules, (void *)MODULES_VADDR, | ||
614 | MODULES_END - MODULES_VADDR, KCORE_VMALLOC); | 615 | MODULES_END - MODULES_VADDR, KCORE_VMALLOC); |
616 | } | ||
615 | } | 617 | } |
616 | #else | 618 | #else |
617 | static void __init add_modules_range(void) | 619 | static void __init add_modules_range(void) |
diff --git a/fs/proc/page.c b/fs/proc/page.c index e647c55275d9..1e3187da1fed 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
@@ -133,6 +133,9 @@ u64 stable_page_flags(struct page *page) | |||
133 | if (PageBuddy(page)) | 133 | if (PageBuddy(page)) |
134 | u |= 1 << KPF_BUDDY; | 134 | u |= 1 << KPF_BUDDY; |
135 | 135 | ||
136 | if (PageBalloon(page)) | ||
137 | u |= 1 << KPF_BALLOON; | ||
138 | |||
136 | u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); | 139 | u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); |
137 | 140 | ||
138 | u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); | 141 | u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index dfc791c42d64..b7a7dc963a35 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -87,32 +87,14 @@ unsigned long task_statm(struct mm_struct *mm, | |||
87 | 87 | ||
88 | #ifdef CONFIG_NUMA | 88 | #ifdef CONFIG_NUMA |
89 | /* | 89 | /* |
90 | * These functions are for numa_maps but called in generic **maps seq_file | 90 | * Save get_task_policy() for show_numa_map(). |
91 | * ->start(), ->stop() ops. | ||
92 | * | ||
93 | * numa_maps scans all vmas under mmap_sem and checks their mempolicy. | ||
94 | * Each mempolicy object is controlled by reference counting. The problem here | ||
95 | * is how to avoid accessing dead mempolicy object. | ||
96 | * | ||
97 | * Because we're holding mmap_sem while reading seq_file, it's safe to access | ||
98 | * each vma's mempolicy, no vma objects will never drop refs to mempolicy. | ||
99 | * | ||
100 | * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy | ||
101 | * is set and replaced under mmap_sem but unrefed and cleared under task_lock(). | ||
102 | * So, without task_lock(), we cannot trust get_vma_policy() because we cannot | ||
103 | * gurantee the task never exits under us. But taking task_lock() around | ||
104 | * get_vma_plicy() causes lock order problem. | ||
105 | * | ||
106 | * To access task->mempolicy without lock, we hold a reference count of an | ||
107 | * object pointed by task->mempolicy and remember it. This will guarantee | ||
108 | * that task->mempolicy points to an alive object or NULL in numa_maps accesses. | ||
109 | */ | 91 | */ |
110 | static void hold_task_mempolicy(struct proc_maps_private *priv) | 92 | static void hold_task_mempolicy(struct proc_maps_private *priv) |
111 | { | 93 | { |
112 | struct task_struct *task = priv->task; | 94 | struct task_struct *task = priv->task; |
113 | 95 | ||
114 | task_lock(task); | 96 | task_lock(task); |
115 | priv->task_mempolicy = task->mempolicy; | 97 | priv->task_mempolicy = get_task_policy(task); |
116 | mpol_get(priv->task_mempolicy); | 98 | mpol_get(priv->task_mempolicy); |
117 | task_unlock(task); | 99 | task_unlock(task); |
118 | } | 100 | } |
@@ -129,124 +111,154 @@ static void release_task_mempolicy(struct proc_maps_private *priv) | |||
129 | } | 111 | } |
130 | #endif | 112 | #endif |
131 | 113 | ||
132 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) | 114 | static void vma_stop(struct proc_maps_private *priv) |
133 | { | 115 | { |
134 | if (vma && vma != priv->tail_vma) { | 116 | struct mm_struct *mm = priv->mm; |
135 | struct mm_struct *mm = vma->vm_mm; | 117 | |
136 | release_task_mempolicy(priv); | 118 | release_task_mempolicy(priv); |
137 | up_read(&mm->mmap_sem); | 119 | up_read(&mm->mmap_sem); |
138 | mmput(mm); | 120 | mmput(mm); |
139 | } | 121 | } |
122 | |||
123 | static struct vm_area_struct * | ||
124 | m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma) | ||
125 | { | ||
126 | if (vma == priv->tail_vma) | ||
127 | return NULL; | ||
128 | return vma->vm_next ?: priv->tail_vma; | ||
129 | } | ||
130 | |||
131 | static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma) | ||
132 | { | ||
133 | if (m->count < m->size) /* vma is copied successfully */ | ||
134 | m->version = m_next_vma(m->private, vma) ? vma->vm_start : -1UL; | ||
140 | } | 135 | } |
141 | 136 | ||
142 | static void *m_start(struct seq_file *m, loff_t *pos) | 137 | static void *m_start(struct seq_file *m, loff_t *ppos) |
143 | { | 138 | { |
144 | struct proc_maps_private *priv = m->private; | 139 | struct proc_maps_private *priv = m->private; |
145 | unsigned long last_addr = m->version; | 140 | unsigned long last_addr = m->version; |
146 | struct mm_struct *mm; | 141 | struct mm_struct *mm; |
147 | struct vm_area_struct *vma, *tail_vma = NULL; | 142 | struct vm_area_struct *vma; |
148 | loff_t l = *pos; | 143 | unsigned int pos = *ppos; |
149 | |||
150 | /* Clear the per syscall fields in priv */ | ||
151 | priv->task = NULL; | ||
152 | priv->tail_vma = NULL; | ||
153 | |||
154 | /* | ||
155 | * We remember last_addr rather than next_addr to hit with | ||
156 | * vmacache most of the time. We have zero last_addr at | ||
157 | * the beginning and also after lseek. We will have -1 last_addr | ||
158 | * after the end of the vmas. | ||
159 | */ | ||
160 | 144 | ||
145 | /* See m_cache_vma(). Zero at the start or after lseek. */ | ||
161 | if (last_addr == -1UL) | 146 | if (last_addr == -1UL) |
162 | return NULL; | 147 | return NULL; |
163 | 148 | ||
164 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | 149 | priv->task = get_proc_task(priv->inode); |
165 | if (!priv->task) | 150 | if (!priv->task) |
166 | return ERR_PTR(-ESRCH); | 151 | return ERR_PTR(-ESRCH); |
167 | 152 | ||
168 | mm = mm_access(priv->task, PTRACE_MODE_READ); | 153 | mm = priv->mm; |
169 | if (!mm || IS_ERR(mm)) | 154 | if (!mm || !atomic_inc_not_zero(&mm->mm_users)) |
170 | return mm; | 155 | return NULL; |
171 | down_read(&mm->mmap_sem); | ||
172 | 156 | ||
173 | tail_vma = get_gate_vma(priv->task->mm); | 157 | down_read(&mm->mmap_sem); |
174 | priv->tail_vma = tail_vma; | ||
175 | hold_task_mempolicy(priv); | 158 | hold_task_mempolicy(priv); |
176 | /* Start with last addr hint */ | 159 | priv->tail_vma = get_gate_vma(mm); |
177 | vma = find_vma(mm, last_addr); | 160 | |
178 | if (last_addr && vma) { | 161 | if (last_addr) { |
179 | vma = vma->vm_next; | 162 | vma = find_vma(mm, last_addr); |
180 | goto out; | 163 | if (vma && (vma = m_next_vma(priv, vma))) |
164 | return vma; | ||
181 | } | 165 | } |
182 | 166 | ||
183 | /* | 167 | m->version = 0; |
184 | * Check the vma index is within the range and do | 168 | if (pos < mm->map_count) { |
185 | * sequential scan until m_index. | 169 | for (vma = mm->mmap; pos; pos--) { |
186 | */ | 170 | m->version = vma->vm_start; |
187 | vma = NULL; | ||
188 | if ((unsigned long)l < mm->map_count) { | ||
189 | vma = mm->mmap; | ||
190 | while (l-- && vma) | ||
191 | vma = vma->vm_next; | 171 | vma = vma->vm_next; |
192 | goto out; | 172 | } |
173 | return vma; | ||
193 | } | 174 | } |
194 | 175 | ||
195 | if (l != mm->map_count) | 176 | /* we do not bother to update m->version in this case */ |
196 | tail_vma = NULL; /* After gate vma */ | 177 | if (pos == mm->map_count && priv->tail_vma) |
197 | 178 | return priv->tail_vma; | |
198 | out: | ||
199 | if (vma) | ||
200 | return vma; | ||
201 | 179 | ||
202 | release_task_mempolicy(priv); | 180 | vma_stop(priv); |
203 | /* End of vmas has been reached */ | 181 | return NULL; |
204 | m->version = (tail_vma != NULL)? 0: -1UL; | ||
205 | up_read(&mm->mmap_sem); | ||
206 | mmput(mm); | ||
207 | return tail_vma; | ||
208 | } | 182 | } |
209 | 183 | ||
210 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | 184 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
211 | { | 185 | { |
212 | struct proc_maps_private *priv = m->private; | 186 | struct proc_maps_private *priv = m->private; |
213 | struct vm_area_struct *vma = v; | 187 | struct vm_area_struct *next; |
214 | struct vm_area_struct *tail_vma = priv->tail_vma; | ||
215 | 188 | ||
216 | (*pos)++; | 189 | (*pos)++; |
217 | if (vma && (vma != tail_vma) && vma->vm_next) | 190 | next = m_next_vma(priv, v); |
218 | return vma->vm_next; | 191 | if (!next) |
219 | vma_stop(priv, vma); | 192 | vma_stop(priv); |
220 | return (vma != tail_vma)? tail_vma: NULL; | 193 | return next; |
221 | } | 194 | } |
222 | 195 | ||
223 | static void m_stop(struct seq_file *m, void *v) | 196 | static void m_stop(struct seq_file *m, void *v) |
224 | { | 197 | { |
225 | struct proc_maps_private *priv = m->private; | 198 | struct proc_maps_private *priv = m->private; |
226 | struct vm_area_struct *vma = v; | ||
227 | 199 | ||
228 | if (!IS_ERR(vma)) | 200 | if (!IS_ERR_OR_NULL(v)) |
229 | vma_stop(priv, vma); | 201 | vma_stop(priv); |
230 | if (priv->task) | 202 | if (priv->task) { |
231 | put_task_struct(priv->task); | 203 | put_task_struct(priv->task); |
204 | priv->task = NULL; | ||
205 | } | ||
206 | } | ||
207 | |||
208 | static int proc_maps_open(struct inode *inode, struct file *file, | ||
209 | const struct seq_operations *ops, int psize) | ||
210 | { | ||
211 | struct proc_maps_private *priv = __seq_open_private(file, ops, psize); | ||
212 | |||
213 | if (!priv) | ||
214 | return -ENOMEM; | ||
215 | |||
216 | priv->inode = inode; | ||
217 | priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); | ||
218 | if (IS_ERR(priv->mm)) { | ||
219 | int err = PTR_ERR(priv->mm); | ||
220 | |||
221 | seq_release_private(inode, file); | ||
222 | return err; | ||
223 | } | ||
224 | |||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | static int proc_map_release(struct inode *inode, struct file *file) | ||
229 | { | ||
230 | struct seq_file *seq = file->private_data; | ||
231 | struct proc_maps_private *priv = seq->private; | ||
232 | |||
233 | if (priv->mm) | ||
234 | mmdrop(priv->mm); | ||
235 | |||
236 | return seq_release_private(inode, file); | ||
232 | } | 237 | } |
233 | 238 | ||
234 | static int do_maps_open(struct inode *inode, struct file *file, | 239 | static int do_maps_open(struct inode *inode, struct file *file, |
235 | const struct seq_operations *ops) | 240 | const struct seq_operations *ops) |
236 | { | 241 | { |
237 | struct proc_maps_private *priv; | 242 | return proc_maps_open(inode, file, ops, |
238 | int ret = -ENOMEM; | 243 | sizeof(struct proc_maps_private)); |
239 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 244 | } |
240 | if (priv) { | 245 | |
241 | priv->pid = proc_pid(inode); | 246 | static pid_t pid_of_stack(struct proc_maps_private *priv, |
242 | ret = seq_open(file, ops); | 247 | struct vm_area_struct *vma, bool is_pid) |
243 | if (!ret) { | 248 | { |
244 | struct seq_file *m = file->private_data; | 249 | struct inode *inode = priv->inode; |
245 | m->private = priv; | 250 | struct task_struct *task; |
246 | } else { | 251 | pid_t ret = 0; |
247 | kfree(priv); | 252 | |
248 | } | 253 | rcu_read_lock(); |
254 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
255 | if (task) { | ||
256 | task = task_of_stack(task, vma, is_pid); | ||
257 | if (task) | ||
258 | ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); | ||
249 | } | 259 | } |
260 | rcu_read_unlock(); | ||
261 | |||
250 | return ret; | 262 | return ret; |
251 | } | 263 | } |
252 | 264 | ||
@@ -256,7 +268,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) | |||
256 | struct mm_struct *mm = vma->vm_mm; | 268 | struct mm_struct *mm = vma->vm_mm; |
257 | struct file *file = vma->vm_file; | 269 | struct file *file = vma->vm_file; |
258 | struct proc_maps_private *priv = m->private; | 270 | struct proc_maps_private *priv = m->private; |
259 | struct task_struct *task = priv->task; | ||
260 | vm_flags_t flags = vma->vm_flags; | 271 | vm_flags_t flags = vma->vm_flags; |
261 | unsigned long ino = 0; | 272 | unsigned long ino = 0; |
262 | unsigned long long pgoff = 0; | 273 | unsigned long long pgoff = 0; |
@@ -321,8 +332,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) | |||
321 | goto done; | 332 | goto done; |
322 | } | 333 | } |
323 | 334 | ||
324 | tid = vm_is_stack(task, vma, is_pid); | 335 | tid = pid_of_stack(priv, vma, is_pid); |
325 | |||
326 | if (tid != 0) { | 336 | if (tid != 0) { |
327 | /* | 337 | /* |
328 | * Thread stack in /proc/PID/task/TID/maps or | 338 | * Thread stack in /proc/PID/task/TID/maps or |
@@ -349,15 +359,8 @@ done: | |||
349 | 359 | ||
350 | static int show_map(struct seq_file *m, void *v, int is_pid) | 360 | static int show_map(struct seq_file *m, void *v, int is_pid) |
351 | { | 361 | { |
352 | struct vm_area_struct *vma = v; | 362 | show_map_vma(m, v, is_pid); |
353 | struct proc_maps_private *priv = m->private; | 363 | m_cache_vma(m, v); |
354 | struct task_struct *task = priv->task; | ||
355 | |||
356 | show_map_vma(m, vma, is_pid); | ||
357 | |||
358 | if (m->count < m->size) /* vma is copied successfully */ | ||
359 | m->version = (vma != get_gate_vma(task->mm)) | ||
360 | ? vma->vm_start : 0; | ||
361 | return 0; | 364 | return 0; |
362 | } | 365 | } |
363 | 366 | ||
@@ -399,14 +402,14 @@ const struct file_operations proc_pid_maps_operations = { | |||
399 | .open = pid_maps_open, | 402 | .open = pid_maps_open, |
400 | .read = seq_read, | 403 | .read = seq_read, |
401 | .llseek = seq_lseek, | 404 | .llseek = seq_lseek, |
402 | .release = seq_release_private, | 405 | .release = proc_map_release, |
403 | }; | 406 | }; |
404 | 407 | ||
405 | const struct file_operations proc_tid_maps_operations = { | 408 | const struct file_operations proc_tid_maps_operations = { |
406 | .open = tid_maps_open, | 409 | .open = tid_maps_open, |
407 | .read = seq_read, | 410 | .read = seq_read, |
408 | .llseek = seq_lseek, | 411 | .llseek = seq_lseek, |
409 | .release = seq_release_private, | 412 | .release = proc_map_release, |
410 | }; | 413 | }; |
411 | 414 | ||
412 | /* | 415 | /* |
@@ -583,8 +586,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) | |||
583 | 586 | ||
584 | static int show_smap(struct seq_file *m, void *v, int is_pid) | 587 | static int show_smap(struct seq_file *m, void *v, int is_pid) |
585 | { | 588 | { |
586 | struct proc_maps_private *priv = m->private; | ||
587 | struct task_struct *task = priv->task; | ||
588 | struct vm_area_struct *vma = v; | 589 | struct vm_area_struct *vma = v; |
589 | struct mem_size_stats mss; | 590 | struct mem_size_stats mss; |
590 | struct mm_walk smaps_walk = { | 591 | struct mm_walk smaps_walk = { |
@@ -637,10 +638,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) | |||
637 | mss.nonlinear >> 10); | 638 | mss.nonlinear >> 10); |
638 | 639 | ||
639 | show_smap_vma_flags(m, vma); | 640 | show_smap_vma_flags(m, vma); |
640 | 641 | m_cache_vma(m, vma); | |
641 | if (m->count < m->size) /* vma is copied successfully */ | ||
642 | m->version = (vma != get_gate_vma(task->mm)) | ||
643 | ? vma->vm_start : 0; | ||
644 | return 0; | 642 | return 0; |
645 | } | 643 | } |
646 | 644 | ||
@@ -682,14 +680,14 @@ const struct file_operations proc_pid_smaps_operations = { | |||
682 | .open = pid_smaps_open, | 680 | .open = pid_smaps_open, |
683 | .read = seq_read, | 681 | .read = seq_read, |
684 | .llseek = seq_lseek, | 682 | .llseek = seq_lseek, |
685 | .release = seq_release_private, | 683 | .release = proc_map_release, |
686 | }; | 684 | }; |
687 | 685 | ||
688 | const struct file_operations proc_tid_smaps_operations = { | 686 | const struct file_operations proc_tid_smaps_operations = { |
689 | .open = tid_smaps_open, | 687 | .open = tid_smaps_open, |
690 | .read = seq_read, | 688 | .read = seq_read, |
691 | .llseek = seq_lseek, | 689 | .llseek = seq_lseek, |
692 | .release = seq_release_private, | 690 | .release = proc_map_release, |
693 | }; | 691 | }; |
694 | 692 | ||
695 | /* | 693 | /* |
@@ -931,23 +929,32 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
931 | while (addr < end) { | 929 | while (addr < end) { |
932 | struct vm_area_struct *vma = find_vma(walk->mm, addr); | 930 | struct vm_area_struct *vma = find_vma(walk->mm, addr); |
933 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); | 931 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
934 | unsigned long vm_end; | 932 | /* End of address space hole, which we mark as non-present. */ |
935 | 933 | unsigned long hole_end; | |
936 | if (!vma) { | 934 | |
937 | vm_end = end; | 935 | if (vma) |
938 | } else { | 936 | hole_end = min(end, vma->vm_start); |
939 | vm_end = min(end, vma->vm_end); | 937 | else |
940 | if (vma->vm_flags & VM_SOFTDIRTY) | 938 | hole_end = end; |
941 | pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY); | 939 | |
940 | for (; addr < hole_end; addr += PAGE_SIZE) { | ||
941 | err = add_to_pagemap(addr, &pme, pm); | ||
942 | if (err) | ||
943 | goto out; | ||
942 | } | 944 | } |
943 | 945 | ||
944 | for (; addr < vm_end; addr += PAGE_SIZE) { | 946 | if (!vma) |
947 | break; | ||
948 | |||
949 | /* Addresses in the VMA. */ | ||
950 | if (vma->vm_flags & VM_SOFTDIRTY) | ||
951 | pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY); | ||
952 | for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { | ||
945 | err = add_to_pagemap(addr, &pme, pm); | 953 | err = add_to_pagemap(addr, &pme, pm); |
946 | if (err) | 954 | if (err) |
947 | goto out; | 955 | goto out; |
948 | } | 956 | } |
949 | } | 957 | } |
950 | |||
951 | out: | 958 | out: |
952 | return err; | 959 | return err; |
953 | } | 960 | } |
@@ -1020,7 +1027,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
1020 | spinlock_t *ptl; | 1027 | spinlock_t *ptl; |
1021 | pte_t *pte; | 1028 | pte_t *pte; |
1022 | int err = 0; | 1029 | int err = 0; |
1023 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); | ||
1024 | 1030 | ||
1025 | /* find the first VMA at or above 'addr' */ | 1031 | /* find the first VMA at or above 'addr' */ |
1026 | vma = find_vma(walk->mm, addr); | 1032 | vma = find_vma(walk->mm, addr); |
@@ -1034,6 +1040,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
1034 | 1040 | ||
1035 | for (; addr != end; addr += PAGE_SIZE) { | 1041 | for (; addr != end; addr += PAGE_SIZE) { |
1036 | unsigned long offset; | 1042 | unsigned long offset; |
1043 | pagemap_entry_t pme; | ||
1037 | 1044 | ||
1038 | offset = (addr & ~PAGEMAP_WALK_MASK) >> | 1045 | offset = (addr & ~PAGEMAP_WALK_MASK) >> |
1039 | PAGE_SHIFT; | 1046 | PAGE_SHIFT; |
@@ -1048,32 +1055,51 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
1048 | 1055 | ||
1049 | if (pmd_trans_unstable(pmd)) | 1056 | if (pmd_trans_unstable(pmd)) |
1050 | return 0; | 1057 | return 0; |
1051 | for (; addr != end; addr += PAGE_SIZE) { | 1058 | |
1052 | int flags2; | 1059 | while (1) { |
1053 | 1060 | /* End of address space hole, which we mark as non-present. */ | |
1054 | /* check to see if we've left 'vma' behind | 1061 | unsigned long hole_end; |
1055 | * and need a new, higher one */ | 1062 | |
1056 | if (vma && (addr >= vma->vm_end)) { | 1063 | if (vma) |
1057 | vma = find_vma(walk->mm, addr); | 1064 | hole_end = min(end, vma->vm_start); |
1058 | if (vma && (vma->vm_flags & VM_SOFTDIRTY)) | 1065 | else |
1059 | flags2 = __PM_SOFT_DIRTY; | 1066 | hole_end = end; |
1060 | else | 1067 | |
1061 | flags2 = 0; | 1068 | for (; addr < hole_end; addr += PAGE_SIZE) { |
1062 | pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); | 1069 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
1070 | |||
1071 | err = add_to_pagemap(addr, &pme, pm); | ||
1072 | if (err) | ||
1073 | return err; | ||
1063 | } | 1074 | } |
1064 | 1075 | ||
1065 | /* check that 'vma' actually covers this address, | 1076 | if (!vma || vma->vm_start >= end) |
1066 | * and that it isn't a huge page vma */ | 1077 | break; |
1067 | if (vma && (vma->vm_start <= addr) && | 1078 | /* |
1068 | !is_vm_hugetlb_page(vma)) { | 1079 | * We can't possibly be in a hugetlb VMA. In general, |
1080 | * for a mm_walk with a pmd_entry and a hugetlb_entry, | ||
1081 | * the pmd_entry can only be called on addresses in a | ||
1082 | * hugetlb if the walk starts in a non-hugetlb VMA and | ||
1083 | * spans a hugepage VMA. Since pagemap_read walks are | ||
1084 | * PMD-sized and PMD-aligned, this will never be true. | ||
1085 | */ | ||
1086 | BUG_ON(is_vm_hugetlb_page(vma)); | ||
1087 | |||
1088 | /* Addresses in the VMA. */ | ||
1089 | for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { | ||
1090 | pagemap_entry_t pme; | ||
1069 | pte = pte_offset_map(pmd, addr); | 1091 | pte = pte_offset_map(pmd, addr); |
1070 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); | 1092 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); |
1071 | /* unmap before userspace copy */ | ||
1072 | pte_unmap(pte); | 1093 | pte_unmap(pte); |
1094 | err = add_to_pagemap(addr, &pme, pm); | ||
1095 | if (err) | ||
1096 | return err; | ||
1073 | } | 1097 | } |
1074 | err = add_to_pagemap(addr, &pme, pm); | 1098 | |
1075 | if (err) | 1099 | if (addr == end) |
1076 | return err; | 1100 | break; |
1101 | |||
1102 | vma = find_vma(walk->mm, addr); | ||
1077 | } | 1103 | } |
1078 | 1104 | ||
1079 | cond_resched(); | 1105 | cond_resched(); |
@@ -1406,7 +1432,6 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1406 | struct vm_area_struct *vma = v; | 1432 | struct vm_area_struct *vma = v; |
1407 | struct numa_maps *md = &numa_priv->md; | 1433 | struct numa_maps *md = &numa_priv->md; |
1408 | struct file *file = vma->vm_file; | 1434 | struct file *file = vma->vm_file; |
1409 | struct task_struct *task = proc_priv->task; | ||
1410 | struct mm_struct *mm = vma->vm_mm; | 1435 | struct mm_struct *mm = vma->vm_mm; |
1411 | struct mm_walk walk = {}; | 1436 | struct mm_walk walk = {}; |
1412 | struct mempolicy *pol; | 1437 | struct mempolicy *pol; |
@@ -1426,9 +1451,13 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1426 | walk.private = md; | 1451 | walk.private = md; |
1427 | walk.mm = mm; | 1452 | walk.mm = mm; |
1428 | 1453 | ||
1429 | pol = get_vma_policy(task, vma, vma->vm_start); | 1454 | pol = __get_vma_policy(vma, vma->vm_start); |
1430 | mpol_to_str(buffer, sizeof(buffer), pol); | 1455 | if (pol) { |
1431 | mpol_cond_put(pol); | 1456 | mpol_to_str(buffer, sizeof(buffer), pol); |
1457 | mpol_cond_put(pol); | ||
1458 | } else { | ||
1459 | mpol_to_str(buffer, sizeof(buffer), proc_priv->task_mempolicy); | ||
1460 | } | ||
1432 | 1461 | ||
1433 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 1462 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
1434 | 1463 | ||
@@ -1438,7 +1467,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1438 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | 1467 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { |
1439 | seq_puts(m, " heap"); | 1468 | seq_puts(m, " heap"); |
1440 | } else { | 1469 | } else { |
1441 | pid_t tid = vm_is_stack(task, vma, is_pid); | 1470 | pid_t tid = pid_of_stack(proc_priv, vma, is_pid); |
1442 | if (tid != 0) { | 1471 | if (tid != 0) { |
1443 | /* | 1472 | /* |
1444 | * Thread stack in /proc/PID/task/TID/maps or | 1473 | * Thread stack in /proc/PID/task/TID/maps or |
@@ -1486,9 +1515,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1486 | seq_printf(m, " N%d=%lu", nid, md->node[nid]); | 1515 | seq_printf(m, " N%d=%lu", nid, md->node[nid]); |
1487 | out: | 1516 | out: |
1488 | seq_putc(m, '\n'); | 1517 | seq_putc(m, '\n'); |
1489 | 1518 | m_cache_vma(m, vma); | |
1490 | if (m->count < m->size) | ||
1491 | m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; | ||
1492 | return 0; | 1519 | return 0; |
1493 | } | 1520 | } |
1494 | 1521 | ||
@@ -1519,20 +1546,8 @@ static const struct seq_operations proc_tid_numa_maps_op = { | |||
1519 | static int numa_maps_open(struct inode *inode, struct file *file, | 1546 | static int numa_maps_open(struct inode *inode, struct file *file, |
1520 | const struct seq_operations *ops) | 1547 | const struct seq_operations *ops) |
1521 | { | 1548 | { |
1522 | struct numa_maps_private *priv; | 1549 | return proc_maps_open(inode, file, ops, |
1523 | int ret = -ENOMEM; | 1550 | sizeof(struct numa_maps_private)); |
1524 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
1525 | if (priv) { | ||
1526 | priv->proc_maps.pid = proc_pid(inode); | ||
1527 | ret = seq_open(file, ops); | ||
1528 | if (!ret) { | ||
1529 | struct seq_file *m = file->private_data; | ||
1530 | m->private = priv; | ||
1531 | } else { | ||
1532 | kfree(priv); | ||
1533 | } | ||
1534 | } | ||
1535 | return ret; | ||
1536 | } | 1551 | } |
1537 | 1552 | ||
1538 | static int pid_numa_maps_open(struct inode *inode, struct file *file) | 1553 | static int pid_numa_maps_open(struct inode *inode, struct file *file) |
@@ -1549,13 +1564,13 @@ const struct file_operations proc_pid_numa_maps_operations = { | |||
1549 | .open = pid_numa_maps_open, | 1564 | .open = pid_numa_maps_open, |
1550 | .read = seq_read, | 1565 | .read = seq_read, |
1551 | .llseek = seq_lseek, | 1566 | .llseek = seq_lseek, |
1552 | .release = seq_release_private, | 1567 | .release = proc_map_release, |
1553 | }; | 1568 | }; |
1554 | 1569 | ||
1555 | const struct file_operations proc_tid_numa_maps_operations = { | 1570 | const struct file_operations proc_tid_numa_maps_operations = { |
1556 | .open = tid_numa_maps_open, | 1571 | .open = tid_numa_maps_open, |
1557 | .read = seq_read, | 1572 | .read = seq_read, |
1558 | .llseek = seq_lseek, | 1573 | .llseek = seq_lseek, |
1559 | .release = seq_release_private, | 1574 | .release = proc_map_release, |
1560 | }; | 1575 | }; |
1561 | #endif /* CONFIG_NUMA */ | 1576 | #endif /* CONFIG_NUMA */ |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 678455d2d683..599ec2e20104 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -123,6 +123,25 @@ unsigned long task_statm(struct mm_struct *mm, | |||
123 | return size; | 123 | return size; |
124 | } | 124 | } |
125 | 125 | ||
126 | static pid_t pid_of_stack(struct proc_maps_private *priv, | ||
127 | struct vm_area_struct *vma, bool is_pid) | ||
128 | { | ||
129 | struct inode *inode = priv->inode; | ||
130 | struct task_struct *task; | ||
131 | pid_t ret = 0; | ||
132 | |||
133 | rcu_read_lock(); | ||
134 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
135 | if (task) { | ||
136 | task = task_of_stack(task, vma, is_pid); | ||
137 | if (task) | ||
138 | ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); | ||
139 | } | ||
140 | rcu_read_unlock(); | ||
141 | |||
142 | return ret; | ||
143 | } | ||
144 | |||
126 | /* | 145 | /* |
127 | * display a single VMA to a sequenced file | 146 | * display a single VMA to a sequenced file |
128 | */ | 147 | */ |
@@ -163,7 +182,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, | |||
163 | seq_pad(m, ' '); | 182 | seq_pad(m, ' '); |
164 | seq_path(m, &file->f_path, ""); | 183 | seq_path(m, &file->f_path, ""); |
165 | } else if (mm) { | 184 | } else if (mm) { |
166 | pid_t tid = vm_is_stack(priv->task, vma, is_pid); | 185 | pid_t tid = pid_of_stack(priv, vma, is_pid); |
167 | 186 | ||
168 | if (tid != 0) { | 187 | if (tid != 0) { |
169 | seq_pad(m, ' '); | 188 | seq_pad(m, ' '); |
@@ -212,22 +231,22 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
212 | loff_t n = *pos; | 231 | loff_t n = *pos; |
213 | 232 | ||
214 | /* pin the task and mm whilst we play with them */ | 233 | /* pin the task and mm whilst we play with them */ |
215 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | 234 | priv->task = get_proc_task(priv->inode); |
216 | if (!priv->task) | 235 | if (!priv->task) |
217 | return ERR_PTR(-ESRCH); | 236 | return ERR_PTR(-ESRCH); |
218 | 237 | ||
219 | mm = mm_access(priv->task, PTRACE_MODE_READ); | 238 | mm = priv->mm; |
220 | if (!mm || IS_ERR(mm)) { | 239 | if (!mm || !atomic_inc_not_zero(&mm->mm_users)) |
221 | put_task_struct(priv->task); | 240 | return NULL; |
222 | priv->task = NULL; | ||
223 | return mm; | ||
224 | } | ||
225 | down_read(&mm->mmap_sem); | ||
226 | 241 | ||
242 | down_read(&mm->mmap_sem); | ||
227 | /* start from the Nth VMA */ | 243 | /* start from the Nth VMA */ |
228 | for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) | 244 | for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) |
229 | if (n-- == 0) | 245 | if (n-- == 0) |
230 | return p; | 246 | return p; |
247 | |||
248 | up_read(&mm->mmap_sem); | ||
249 | mmput(mm); | ||
231 | return NULL; | 250 | return NULL; |
232 | } | 251 | } |
233 | 252 | ||
@@ -235,11 +254,13 @@ static void m_stop(struct seq_file *m, void *_vml) | |||
235 | { | 254 | { |
236 | struct proc_maps_private *priv = m->private; | 255 | struct proc_maps_private *priv = m->private; |
237 | 256 | ||
257 | if (!IS_ERR_OR_NULL(_vml)) { | ||
258 | up_read(&priv->mm->mmap_sem); | ||
259 | mmput(priv->mm); | ||
260 | } | ||
238 | if (priv->task) { | 261 | if (priv->task) { |
239 | struct mm_struct *mm = priv->task->mm; | ||
240 | up_read(&mm->mmap_sem); | ||
241 | mmput(mm); | ||
242 | put_task_struct(priv->task); | 262 | put_task_struct(priv->task); |
263 | priv->task = NULL; | ||
243 | } | 264 | } |
244 | } | 265 | } |
245 | 266 | ||
@@ -269,20 +290,33 @@ static int maps_open(struct inode *inode, struct file *file, | |||
269 | const struct seq_operations *ops) | 290 | const struct seq_operations *ops) |
270 | { | 291 | { |
271 | struct proc_maps_private *priv; | 292 | struct proc_maps_private *priv; |
272 | int ret = -ENOMEM; | 293 | |
273 | 294 | priv = __seq_open_private(file, ops, sizeof(*priv)); | |
274 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 295 | if (!priv) |
275 | if (priv) { | 296 | return -ENOMEM; |
276 | priv->pid = proc_pid(inode); | 297 | |
277 | ret = seq_open(file, ops); | 298 | priv->inode = inode; |
278 | if (!ret) { | 299 | priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); |
279 | struct seq_file *m = file->private_data; | 300 | if (IS_ERR(priv->mm)) { |
280 | m->private = priv; | 301 | int err = PTR_ERR(priv->mm); |
281 | } else { | 302 | |
282 | kfree(priv); | 303 | seq_release_private(inode, file); |
283 | } | 304 | return err; |
284 | } | 305 | } |
285 | return ret; | 306 | |
307 | return 0; | ||
308 | } | ||
309 | |||
310 | |||
311 | static int map_release(struct inode *inode, struct file *file) | ||
312 | { | ||
313 | struct seq_file *seq = file->private_data; | ||
314 | struct proc_maps_private *priv = seq->private; | ||
315 | |||
316 | if (priv->mm) | ||
317 | mmdrop(priv->mm); | ||
318 | |||
319 | return seq_release_private(inode, file); | ||
286 | } | 320 | } |
287 | 321 | ||
288 | static int pid_maps_open(struct inode *inode, struct file *file) | 322 | static int pid_maps_open(struct inode *inode, struct file *file) |
@@ -299,13 +333,13 @@ const struct file_operations proc_pid_maps_operations = { | |||
299 | .open = pid_maps_open, | 333 | .open = pid_maps_open, |
300 | .read = seq_read, | 334 | .read = seq_read, |
301 | .llseek = seq_lseek, | 335 | .llseek = seq_lseek, |
302 | .release = seq_release_private, | 336 | .release = map_release, |
303 | }; | 337 | }; |
304 | 338 | ||
305 | const struct file_operations proc_tid_maps_operations = { | 339 | const struct file_operations proc_tid_maps_operations = { |
306 | .open = tid_maps_open, | 340 | .open = tid_maps_open, |
307 | .read = seq_read, | 341 | .read = seq_read, |
308 | .llseek = seq_lseek, | 342 | .llseek = seq_lseek, |
309 | .release = seq_release_private, | 343 | .release = map_release, |
310 | }; | 344 | }; |
311 | 345 | ||
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index f2d0eee9d1f1..8b663b2d9562 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -2725,7 +2725,7 @@ static int __init dquot_init(void) | |||
2725 | panic("Cannot create dquot hash table"); | 2725 | panic("Cannot create dquot hash table"); |
2726 | 2726 | ||
2727 | for (i = 0; i < _DQST_DQSTAT_LAST; i++) { | 2727 | for (i = 0; i < _DQST_DQSTAT_LAST; i++) { |
2728 | ret = percpu_counter_init(&dqstats.counter[i], 0); | 2728 | ret = percpu_counter_init(&dqstats.counter[i], 0, GFP_KERNEL); |
2729 | if (ret) | 2729 | if (ret) |
2730 | panic("Cannot create dquot stat counters"); | 2730 | panic("Cannot create dquot stat counters"); |
2731 | } | 2731 | } |
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 735c2c2b4536..1894d96ccb7c 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h | |||
@@ -506,6 +506,9 @@ typedef struct reiserfs_proc_info_data { | |||
506 | } reiserfs_proc_info_data_t; | 506 | } reiserfs_proc_info_data_t; |
507 | #endif | 507 | #endif |
508 | 508 | ||
509 | /* Number of quota types we support */ | ||
510 | #define REISERFS_MAXQUOTAS 2 | ||
511 | |||
509 | /* reiserfs union of in-core super block data */ | 512 | /* reiserfs union of in-core super block data */ |
510 | struct reiserfs_sb_info { | 513 | struct reiserfs_sb_info { |
511 | /* Buffer containing the super block */ | 514 | /* Buffer containing the super block */ |
@@ -615,7 +618,7 @@ struct reiserfs_sb_info { | |||
615 | spinlock_t old_work_lock; /* protects old_work and work_queued */ | 618 | spinlock_t old_work_lock; /* protects old_work and work_queued */ |
616 | 619 | ||
617 | #ifdef CONFIG_QUOTA | 620 | #ifdef CONFIG_QUOTA |
618 | char *s_qf_names[MAXQUOTAS]; | 621 | char *s_qf_names[REISERFS_MAXQUOTAS]; |
619 | int s_jquota_fmt; | 622 | int s_jquota_fmt; |
620 | #endif | 623 | #endif |
621 | char *s_jdev; /* Stored jdev for mount option showing */ | 624 | char *s_jdev; /* Stored jdev for mount option showing */ |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index d46e88a33b02..f1376c92cf74 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -206,7 +206,7 @@ static int finish_unfinished(struct super_block *s) | |||
206 | #ifdef CONFIG_QUOTA | 206 | #ifdef CONFIG_QUOTA |
207 | int i; | 207 | int i; |
208 | int ms_active_set; | 208 | int ms_active_set; |
209 | int quota_enabled[MAXQUOTAS]; | 209 | int quota_enabled[REISERFS_MAXQUOTAS]; |
210 | #endif | 210 | #endif |
211 | 211 | ||
212 | /* compose key to look for "save" links */ | 212 | /* compose key to look for "save" links */ |
@@ -227,7 +227,7 @@ static int finish_unfinished(struct super_block *s) | |||
227 | s->s_flags |= MS_ACTIVE; | 227 | s->s_flags |= MS_ACTIVE; |
228 | } | 228 | } |
229 | /* Turn on quotas so that they are updated correctly */ | 229 | /* Turn on quotas so that they are updated correctly */ |
230 | for (i = 0; i < MAXQUOTAS; i++) { | 230 | for (i = 0; i < REISERFS_MAXQUOTAS; i++) { |
231 | quota_enabled[i] = 1; | 231 | quota_enabled[i] = 1; |
232 | if (REISERFS_SB(s)->s_qf_names[i]) { | 232 | if (REISERFS_SB(s)->s_qf_names[i]) { |
233 | int ret; | 233 | int ret; |
@@ -370,7 +370,7 @@ static int finish_unfinished(struct super_block *s) | |||
370 | #ifdef CONFIG_QUOTA | 370 | #ifdef CONFIG_QUOTA |
371 | /* Turn quotas off */ | 371 | /* Turn quotas off */ |
372 | reiserfs_write_unlock(s); | 372 | reiserfs_write_unlock(s); |
373 | for (i = 0; i < MAXQUOTAS; i++) { | 373 | for (i = 0; i < REISERFS_MAXQUOTAS; i++) { |
374 | if (sb_dqopt(s)->files[i] && quota_enabled[i]) | 374 | if (sb_dqopt(s)->files[i] && quota_enabled[i]) |
375 | dquot_quota_off(s, i); | 375 | dquot_quota_off(s, i); |
376 | } | 376 | } |
@@ -1360,7 +1360,7 @@ static void handle_quota_files(struct super_block *s, char **qf_names, | |||
1360 | { | 1360 | { |
1361 | int i; | 1361 | int i; |
1362 | 1362 | ||
1363 | for (i = 0; i < MAXQUOTAS; i++) { | 1363 | for (i = 0; i < REISERFS_MAXQUOTAS; i++) { |
1364 | if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) | 1364 | if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) |
1365 | kfree(REISERFS_SB(s)->s_qf_names[i]); | 1365 | kfree(REISERFS_SB(s)->s_qf_names[i]); |
1366 | REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; | 1366 | REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; |
@@ -1381,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1381 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 1381 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
1382 | char *new_opts = kstrdup(arg, GFP_KERNEL); | 1382 | char *new_opts = kstrdup(arg, GFP_KERNEL); |
1383 | int err; | 1383 | int err; |
1384 | char *qf_names[MAXQUOTAS]; | 1384 | char *qf_names[REISERFS_MAXQUOTAS]; |
1385 | unsigned int qfmt = 0; | 1385 | unsigned int qfmt = 0; |
1386 | #ifdef CONFIG_QUOTA | 1386 | #ifdef CONFIG_QUOTA |
1387 | int i; | 1387 | int i; |
@@ -1400,7 +1400,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1400 | (s, arg, &mount_options, &blocks, NULL, &commit_max_age, | 1400 | (s, arg, &mount_options, &blocks, NULL, &commit_max_age, |
1401 | qf_names, &qfmt)) { | 1401 | qf_names, &qfmt)) { |
1402 | #ifdef CONFIG_QUOTA | 1402 | #ifdef CONFIG_QUOTA |
1403 | for (i = 0; i < MAXQUOTAS; i++) | 1403 | for (i = 0; i < REISERFS_MAXQUOTAS; i++) |
1404 | if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) | 1404 | if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i]) |
1405 | kfree(qf_names[i]); | 1405 | kfree(qf_names[i]); |
1406 | #endif | 1406 | #endif |
@@ -1844,7 +1844,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1844 | char *jdev_name; | 1844 | char *jdev_name; |
1845 | struct reiserfs_sb_info *sbi; | 1845 | struct reiserfs_sb_info *sbi; |
1846 | int errval = -EINVAL; | 1846 | int errval = -EINVAL; |
1847 | char *qf_names[MAXQUOTAS] = {}; | 1847 | char *qf_names[REISERFS_MAXQUOTAS] = {}; |
1848 | unsigned int qfmt = 0; | 1848 | unsigned int qfmt = 0; |
1849 | 1849 | ||
1850 | save_mount_options(s, data); | 1850 | save_mount_options(s, data); |
@@ -2169,7 +2169,7 @@ error_unlocked: | |||
2169 | #ifdef CONFIG_QUOTA | 2169 | #ifdef CONFIG_QUOTA |
2170 | { | 2170 | { |
2171 | int j; | 2171 | int j; |
2172 | for (j = 0; j < MAXQUOTAS; j++) | 2172 | for (j = 0; j < REISERFS_MAXQUOTAS; j++) |
2173 | kfree(qf_names[j]); | 2173 | kfree(qf_names[j]); |
2174 | } | 2174 | } |
2175 | #endif | 2175 | #endif |
diff --git a/fs/stack.c b/fs/stack.c index 5b5388250e29..a54e33ed10f1 100644 --- a/fs/stack.c +++ b/fs/stack.c | |||
@@ -44,7 +44,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src) | |||
44 | * include/linux/fs.h). We don't necessarily hold i_mutex when this | 44 | * include/linux/fs.h). We don't necessarily hold i_mutex when this |
45 | * is called, so take i_lock for that case. | 45 | * is called, so take i_lock for that case. |
46 | * | 46 | * |
47 | * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the | 47 | * And if CONFIG_LBDAF (on 32-bit), continue our effort to keep the |
48 | * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock | 48 | * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock |
49 | * for that case too, and do both at once by combining the tests. | 49 | * for that case too, and do both at once by combining the tests. |
50 | * | 50 | * |
diff --git a/fs/super.c b/fs/super.c index b9a214d2fe98..1b836107acee 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -175,7 +175,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) | |||
175 | goto fail; | 175 | goto fail; |
176 | 176 | ||
177 | for (i = 0; i < SB_FREEZE_LEVELS; i++) { | 177 | for (i = 0; i < SB_FREEZE_LEVELS; i++) { |
178 | if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0) | 178 | if (percpu_counter_init(&s->s_writers.counter[i], 0, |
179 | GFP_KERNEL) < 0) | ||
179 | goto fail; | 180 | goto fail; |
180 | lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], | 181 | lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], |
181 | &type->s_writers_key[i], 0); | 182 | &type->s_writers_key[i], 0); |
@@ -65,7 +65,7 @@ int sync_filesystem(struct super_block *sb) | |||
65 | return ret; | 65 | return ret; |
66 | return __sync_filesystem(sb, 1); | 66 | return __sync_filesystem(sb, 1); |
67 | } | 67 | } |
68 | EXPORT_SYMBOL_GPL(sync_filesystem); | 68 | EXPORT_SYMBOL(sync_filesystem); |
69 | 69 | ||
70 | static void sync_inodes_one_sb(struct super_block *sb, void *arg) | 70 | static void sync_inodes_one_sb(struct super_block *sb, void *arg) |
71 | { | 71 | { |
diff --git a/fs/timerfd.c b/fs/timerfd.c index 80c350216ea8..b46ffa94372a 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -333,8 +333,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg | |||
333 | spin_lock_irq(&ctx->wqh.lock); | 333 | spin_lock_irq(&ctx->wqh.lock); |
334 | if (!timerfd_canceled(ctx)) { | 334 | if (!timerfd_canceled(ctx)) { |
335 | ctx->ticks = ticks; | 335 | ctx->ticks = ticks; |
336 | if (ticks) | 336 | wake_up_locked(&ctx->wqh); |
337 | wake_up_locked(&ctx->wqh); | ||
338 | } else | 337 | } else |
339 | ret = -ECANCELED; | 338 | ret = -ECANCELED; |
340 | spin_unlock_irq(&ctx->wqh.lock); | 339 | spin_unlock_irq(&ctx->wqh.lock); |
diff --git a/fs/udf/file.c b/fs/udf/file.c index 86c6743ec1fe..bb15771b92ae 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -223,11 +223,18 @@ out: | |||
223 | 223 | ||
224 | static int udf_release_file(struct inode *inode, struct file *filp) | 224 | static int udf_release_file(struct inode *inode, struct file *filp) |
225 | { | 225 | { |
226 | if (filp->f_mode & FMODE_WRITE) { | 226 | if (filp->f_mode & FMODE_WRITE && |
227 | atomic_read(&inode->i_writecount) > 1) { | ||
228 | /* | ||
229 | * Grab i_mutex to avoid races with writes changing i_size | ||
230 | * while we are running. | ||
231 | */ | ||
232 | mutex_lock(&inode->i_mutex); | ||
227 | down_write(&UDF_I(inode)->i_data_sem); | 233 | down_write(&UDF_I(inode)->i_data_sem); |
228 | udf_discard_prealloc(inode); | 234 | udf_discard_prealloc(inode); |
229 | udf_truncate_tail_extent(inode); | 235 | udf_truncate_tail_extent(inode); |
230 | up_write(&UDF_I(inode)->i_data_sem); | 236 | up_write(&UDF_I(inode)->i_data_sem); |
237 | mutex_unlock(&inode->i_mutex); | ||
231 | } | 238 | } |
232 | return 0; | 239 | return 0; |
233 | } | 240 | } |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 6eaf5edf1ea1..e77db621ec89 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -45,7 +45,7 @@ void udf_free_inode(struct inode *inode) | |||
45 | udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1); | 45 | udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1); |
46 | } | 46 | } |
47 | 47 | ||
48 | struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) | 48 | struct inode *udf_new_inode(struct inode *dir, umode_t mode) |
49 | { | 49 | { |
50 | struct super_block *sb = dir->i_sb; | 50 | struct super_block *sb = dir->i_sb; |
51 | struct udf_sb_info *sbi = UDF_SB(sb); | 51 | struct udf_sb_info *sbi = UDF_SB(sb); |
@@ -55,14 +55,12 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) | |||
55 | struct udf_inode_info *iinfo; | 55 | struct udf_inode_info *iinfo; |
56 | struct udf_inode_info *dinfo = UDF_I(dir); | 56 | struct udf_inode_info *dinfo = UDF_I(dir); |
57 | struct logicalVolIntegrityDescImpUse *lvidiu; | 57 | struct logicalVolIntegrityDescImpUse *lvidiu; |
58 | int err; | ||
58 | 59 | ||
59 | inode = new_inode(sb); | 60 | inode = new_inode(sb); |
60 | 61 | ||
61 | if (!inode) { | 62 | if (!inode) |
62 | *err = -ENOMEM; | 63 | return ERR_PTR(-ENOMEM); |
63 | return NULL; | ||
64 | } | ||
65 | *err = -ENOSPC; | ||
66 | 64 | ||
67 | iinfo = UDF_I(inode); | 65 | iinfo = UDF_I(inode); |
68 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { | 66 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { |
@@ -80,21 +78,22 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) | |||
80 | } | 78 | } |
81 | if (!iinfo->i_ext.i_data) { | 79 | if (!iinfo->i_ext.i_data) { |
82 | iput(inode); | 80 | iput(inode); |
83 | *err = -ENOMEM; | 81 | return ERR_PTR(-ENOMEM); |
84 | return NULL; | ||
85 | } | 82 | } |
86 | 83 | ||
84 | err = -ENOSPC; | ||
87 | block = udf_new_block(dir->i_sb, NULL, | 85 | block = udf_new_block(dir->i_sb, NULL, |
88 | dinfo->i_location.partitionReferenceNum, | 86 | dinfo->i_location.partitionReferenceNum, |
89 | start, err); | 87 | start, &err); |
90 | if (*err) { | 88 | if (err) { |
91 | iput(inode); | 89 | iput(inode); |
92 | return NULL; | 90 | return ERR_PTR(err); |
93 | } | 91 | } |
94 | 92 | ||
95 | lvidiu = udf_sb_lvidiu(sb); | 93 | lvidiu = udf_sb_lvidiu(sb); |
96 | if (lvidiu) { | 94 | if (lvidiu) { |
97 | iinfo->i_unique = lvid_get_unique_id(sb); | 95 | iinfo->i_unique = lvid_get_unique_id(sb); |
96 | inode->i_generation = iinfo->i_unique; | ||
98 | mutex_lock(&sbi->s_alloc_mutex); | 97 | mutex_lock(&sbi->s_alloc_mutex); |
99 | if (S_ISDIR(mode)) | 98 | if (S_ISDIR(mode)) |
100 | le32_add_cpu(&lvidiu->numDirs, 1); | 99 | le32_add_cpu(&lvidiu->numDirs, 1); |
@@ -123,9 +122,12 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) | |||
123 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; | 122 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; |
124 | inode->i_mtime = inode->i_atime = inode->i_ctime = | 123 | inode->i_mtime = inode->i_atime = inode->i_ctime = |
125 | iinfo->i_crtime = current_fs_time(inode->i_sb); | 124 | iinfo->i_crtime = current_fs_time(inode->i_sb); |
126 | insert_inode_hash(inode); | 125 | if (unlikely(insert_inode_locked(inode) < 0)) { |
126 | make_bad_inode(inode); | ||
127 | iput(inode); | ||
128 | return ERR_PTR(-EIO); | ||
129 | } | ||
127 | mark_inode_dirty(inode); | 130 | mark_inode_dirty(inode); |
128 | 131 | ||
129 | *err = 0; | ||
130 | return inode; | 132 | return inode; |
131 | } | 133 | } |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 236cd48184c2..c9b4df5810d5 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -51,7 +51,6 @@ MODULE_LICENSE("GPL"); | |||
51 | 51 | ||
52 | static umode_t udf_convert_permissions(struct fileEntry *); | 52 | static umode_t udf_convert_permissions(struct fileEntry *); |
53 | static int udf_update_inode(struct inode *, int); | 53 | static int udf_update_inode(struct inode *, int); |
54 | static void udf_fill_inode(struct inode *, struct buffer_head *); | ||
55 | static int udf_sync_inode(struct inode *inode); | 54 | static int udf_sync_inode(struct inode *inode); |
56 | static int udf_alloc_i_data(struct inode *inode, size_t size); | 55 | static int udf_alloc_i_data(struct inode *inode, size_t size); |
57 | static sector_t inode_getblk(struct inode *, sector_t, int *, int *); | 56 | static sector_t inode_getblk(struct inode *, sector_t, int *, int *); |
@@ -1271,12 +1270,33 @@ update_time: | |||
1271 | return 0; | 1270 | return 0; |
1272 | } | 1271 | } |
1273 | 1272 | ||
1274 | static void __udf_read_inode(struct inode *inode) | 1273 | /* |
1274 | * Maximum length of linked list formed by ICB hierarchy. The chosen number is | ||
1275 | * arbitrary - just that we hopefully don't limit any real use of rewritten | ||
1276 | * inode on write-once media but avoid looping for too long on corrupted media. | ||
1277 | */ | ||
1278 | #define UDF_MAX_ICB_NESTING 1024 | ||
1279 | |||
1280 | static int udf_read_inode(struct inode *inode, bool hidden_inode) | ||
1275 | { | 1281 | { |
1276 | struct buffer_head *bh = NULL; | 1282 | struct buffer_head *bh = NULL; |
1277 | struct fileEntry *fe; | 1283 | struct fileEntry *fe; |
1284 | struct extendedFileEntry *efe; | ||
1278 | uint16_t ident; | 1285 | uint16_t ident; |
1279 | struct udf_inode_info *iinfo = UDF_I(inode); | 1286 | struct udf_inode_info *iinfo = UDF_I(inode); |
1287 | struct udf_sb_info *sbi = UDF_SB(inode->i_sb); | ||
1288 | struct kernel_lb_addr *iloc = &iinfo->i_location; | ||
1289 | unsigned int link_count; | ||
1290 | unsigned int indirections = 0; | ||
1291 | int ret = -EIO; | ||
1292 | |||
1293 | reread: | ||
1294 | if (iloc->logicalBlockNum >= | ||
1295 | sbi->s_partmaps[iloc->partitionReferenceNum].s_partition_len) { | ||
1296 | udf_debug("block=%d, partition=%d out of range\n", | ||
1297 | iloc->logicalBlockNum, iloc->partitionReferenceNum); | ||
1298 | return -EIO; | ||
1299 | } | ||
1280 | 1300 | ||
1281 | /* | 1301 | /* |
1282 | * Set defaults, but the inode is still incomplete! | 1302 | * Set defaults, but the inode is still incomplete! |
@@ -1290,78 +1310,54 @@ static void __udf_read_inode(struct inode *inode) | |||
1290 | * i_nlink = 1 | 1310 | * i_nlink = 1 |
1291 | * i_op = NULL; | 1311 | * i_op = NULL; |
1292 | */ | 1312 | */ |
1293 | bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident); | 1313 | bh = udf_read_ptagged(inode->i_sb, iloc, 0, &ident); |
1294 | if (!bh) { | 1314 | if (!bh) { |
1295 | udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino); | 1315 | udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino); |
1296 | make_bad_inode(inode); | 1316 | return -EIO; |
1297 | return; | ||
1298 | } | 1317 | } |
1299 | 1318 | ||
1300 | if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE && | 1319 | if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE && |
1301 | ident != TAG_IDENT_USE) { | 1320 | ident != TAG_IDENT_USE) { |
1302 | udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n", | 1321 | udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n", |
1303 | inode->i_ino, ident); | 1322 | inode->i_ino, ident); |
1304 | brelse(bh); | 1323 | goto out; |
1305 | make_bad_inode(inode); | ||
1306 | return; | ||
1307 | } | 1324 | } |
1308 | 1325 | ||
1309 | fe = (struct fileEntry *)bh->b_data; | 1326 | fe = (struct fileEntry *)bh->b_data; |
1327 | efe = (struct extendedFileEntry *)bh->b_data; | ||
1310 | 1328 | ||
1311 | if (fe->icbTag.strategyType == cpu_to_le16(4096)) { | 1329 | if (fe->icbTag.strategyType == cpu_to_le16(4096)) { |
1312 | struct buffer_head *ibh; | 1330 | struct buffer_head *ibh; |
1313 | 1331 | ||
1314 | ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1, | 1332 | ibh = udf_read_ptagged(inode->i_sb, iloc, 1, &ident); |
1315 | &ident); | ||
1316 | if (ident == TAG_IDENT_IE && ibh) { | 1333 | if (ident == TAG_IDENT_IE && ibh) { |
1317 | struct buffer_head *nbh = NULL; | ||
1318 | struct kernel_lb_addr loc; | 1334 | struct kernel_lb_addr loc; |
1319 | struct indirectEntry *ie; | 1335 | struct indirectEntry *ie; |
1320 | 1336 | ||
1321 | ie = (struct indirectEntry *)ibh->b_data; | 1337 | ie = (struct indirectEntry *)ibh->b_data; |
1322 | loc = lelb_to_cpu(ie->indirectICB.extLocation); | 1338 | loc = lelb_to_cpu(ie->indirectICB.extLocation); |
1323 | 1339 | ||
1324 | if (ie->indirectICB.extLength && | 1340 | if (ie->indirectICB.extLength) { |
1325 | (nbh = udf_read_ptagged(inode->i_sb, &loc, 0, | 1341 | brelse(ibh); |
1326 | &ident))) { | 1342 | memcpy(&iinfo->i_location, &loc, |
1327 | if (ident == TAG_IDENT_FE || | 1343 | sizeof(struct kernel_lb_addr)); |
1328 | ident == TAG_IDENT_EFE) { | 1344 | if (++indirections > UDF_MAX_ICB_NESTING) { |
1329 | memcpy(&iinfo->i_location, | 1345 | udf_err(inode->i_sb, |
1330 | &loc, | 1346 | "too many ICBs in ICB hierarchy" |
1331 | sizeof(struct kernel_lb_addr)); | 1347 | " (max %d supported)\n", |
1332 | brelse(bh); | 1348 | UDF_MAX_ICB_NESTING); |
1333 | brelse(ibh); | 1349 | goto out; |
1334 | brelse(nbh); | ||
1335 | __udf_read_inode(inode); | ||
1336 | return; | ||
1337 | } | 1350 | } |
1338 | brelse(nbh); | 1351 | brelse(bh); |
1352 | goto reread; | ||
1339 | } | 1353 | } |
1340 | } | 1354 | } |
1341 | brelse(ibh); | 1355 | brelse(ibh); |
1342 | } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { | 1356 | } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { |
1343 | udf_err(inode->i_sb, "unsupported strategy type: %d\n", | 1357 | udf_err(inode->i_sb, "unsupported strategy type: %d\n", |
1344 | le16_to_cpu(fe->icbTag.strategyType)); | 1358 | le16_to_cpu(fe->icbTag.strategyType)); |
1345 | brelse(bh); | 1359 | goto out; |
1346 | make_bad_inode(inode); | ||
1347 | return; | ||
1348 | } | 1360 | } |
1349 | udf_fill_inode(inode, bh); | ||
1350 | |||
1351 | brelse(bh); | ||
1352 | } | ||
1353 | |||
1354 | static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | ||
1355 | { | ||
1356 | struct fileEntry *fe; | ||
1357 | struct extendedFileEntry *efe; | ||
1358 | struct udf_sb_info *sbi = UDF_SB(inode->i_sb); | ||
1359 | struct udf_inode_info *iinfo = UDF_I(inode); | ||
1360 | unsigned int link_count; | ||
1361 | |||
1362 | fe = (struct fileEntry *)bh->b_data; | ||
1363 | efe = (struct extendedFileEntry *)bh->b_data; | ||
1364 | |||
1365 | if (fe->icbTag.strategyType == cpu_to_le16(4)) | 1361 | if (fe->icbTag.strategyType == cpu_to_le16(4)) |
1366 | iinfo->i_strat4096 = 0; | 1362 | iinfo->i_strat4096 = 0; |
1367 | else /* if (fe->icbTag.strategyType == cpu_to_le16(4096)) */ | 1363 | else /* if (fe->icbTag.strategyType == cpu_to_le16(4096)) */ |
@@ -1378,11 +1374,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1378 | if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) { | 1374 | if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) { |
1379 | iinfo->i_efe = 1; | 1375 | iinfo->i_efe = 1; |
1380 | iinfo->i_use = 0; | 1376 | iinfo->i_use = 0; |
1381 | if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - | 1377 | ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - |
1382 | sizeof(struct extendedFileEntry))) { | 1378 | sizeof(struct extendedFileEntry)); |
1383 | make_bad_inode(inode); | 1379 | if (ret) |
1384 | return; | 1380 | goto out; |
1385 | } | ||
1386 | memcpy(iinfo->i_ext.i_data, | 1381 | memcpy(iinfo->i_ext.i_data, |
1387 | bh->b_data + sizeof(struct extendedFileEntry), | 1382 | bh->b_data + sizeof(struct extendedFileEntry), |
1388 | inode->i_sb->s_blocksize - | 1383 | inode->i_sb->s_blocksize - |
@@ -1390,11 +1385,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1390 | } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { | 1385 | } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { |
1391 | iinfo->i_efe = 0; | 1386 | iinfo->i_efe = 0; |
1392 | iinfo->i_use = 0; | 1387 | iinfo->i_use = 0; |
1393 | if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - | 1388 | ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - |
1394 | sizeof(struct fileEntry))) { | 1389 | sizeof(struct fileEntry)); |
1395 | make_bad_inode(inode); | 1390 | if (ret) |
1396 | return; | 1391 | goto out; |
1397 | } | ||
1398 | memcpy(iinfo->i_ext.i_data, | 1392 | memcpy(iinfo->i_ext.i_data, |
1399 | bh->b_data + sizeof(struct fileEntry), | 1393 | bh->b_data + sizeof(struct fileEntry), |
1400 | inode->i_sb->s_blocksize - sizeof(struct fileEntry)); | 1394 | inode->i_sb->s_blocksize - sizeof(struct fileEntry)); |
@@ -1404,18 +1398,18 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1404 | iinfo->i_lenAlloc = le32_to_cpu( | 1398 | iinfo->i_lenAlloc = le32_to_cpu( |
1405 | ((struct unallocSpaceEntry *)bh->b_data)-> | 1399 | ((struct unallocSpaceEntry *)bh->b_data)-> |
1406 | lengthAllocDescs); | 1400 | lengthAllocDescs); |
1407 | if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - | 1401 | ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - |
1408 | sizeof(struct unallocSpaceEntry))) { | 1402 | sizeof(struct unallocSpaceEntry)); |
1409 | make_bad_inode(inode); | 1403 | if (ret) |
1410 | return; | 1404 | goto out; |
1411 | } | ||
1412 | memcpy(iinfo->i_ext.i_data, | 1405 | memcpy(iinfo->i_ext.i_data, |
1413 | bh->b_data + sizeof(struct unallocSpaceEntry), | 1406 | bh->b_data + sizeof(struct unallocSpaceEntry), |
1414 | inode->i_sb->s_blocksize - | 1407 | inode->i_sb->s_blocksize - |
1415 | sizeof(struct unallocSpaceEntry)); | 1408 | sizeof(struct unallocSpaceEntry)); |
1416 | return; | 1409 | return 0; |
1417 | } | 1410 | } |
1418 | 1411 | ||
1412 | ret = -EIO; | ||
1419 | read_lock(&sbi->s_cred_lock); | 1413 | read_lock(&sbi->s_cred_lock); |
1420 | i_uid_write(inode, le32_to_cpu(fe->uid)); | 1414 | i_uid_write(inode, le32_to_cpu(fe->uid)); |
1421 | if (!uid_valid(inode->i_uid) || | 1415 | if (!uid_valid(inode->i_uid) || |
@@ -1441,8 +1435,13 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1441 | read_unlock(&sbi->s_cred_lock); | 1435 | read_unlock(&sbi->s_cred_lock); |
1442 | 1436 | ||
1443 | link_count = le16_to_cpu(fe->fileLinkCount); | 1437 | link_count = le16_to_cpu(fe->fileLinkCount); |
1444 | if (!link_count) | 1438 | if (!link_count) { |
1439 | if (!hidden_inode) { | ||
1440 | ret = -ESTALE; | ||
1441 | goto out; | ||
1442 | } | ||
1445 | link_count = 1; | 1443 | link_count = 1; |
1444 | } | ||
1446 | set_nlink(inode, link_count); | 1445 | set_nlink(inode, link_count); |
1447 | 1446 | ||
1448 | inode->i_size = le64_to_cpu(fe->informationLength); | 1447 | inode->i_size = le64_to_cpu(fe->informationLength); |
@@ -1488,6 +1487,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1488 | iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); | 1487 | iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); |
1489 | iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint); | 1488 | iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint); |
1490 | } | 1489 | } |
1490 | inode->i_generation = iinfo->i_unique; | ||
1491 | 1491 | ||
1492 | switch (fe->icbTag.fileType) { | 1492 | switch (fe->icbTag.fileType) { |
1493 | case ICBTAG_FILE_TYPE_DIRECTORY: | 1493 | case ICBTAG_FILE_TYPE_DIRECTORY: |
@@ -1537,8 +1537,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1537 | default: | 1537 | default: |
1538 | udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n", | 1538 | udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n", |
1539 | inode->i_ino, fe->icbTag.fileType); | 1539 | inode->i_ino, fe->icbTag.fileType); |
1540 | make_bad_inode(inode); | 1540 | goto out; |
1541 | return; | ||
1542 | } | 1541 | } |
1543 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | 1542 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { |
1544 | struct deviceSpec *dsea = | 1543 | struct deviceSpec *dsea = |
@@ -1549,8 +1548,12 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) | |||
1549 | le32_to_cpu(dsea->minorDeviceIdent))); | 1548 | le32_to_cpu(dsea->minorDeviceIdent))); |
1550 | /* Developer ID ??? */ | 1549 | /* Developer ID ??? */ |
1551 | } else | 1550 | } else |
1552 | make_bad_inode(inode); | 1551 | goto out; |
1553 | } | 1552 | } |
1553 | ret = 0; | ||
1554 | out: | ||
1555 | brelse(bh); | ||
1556 | return ret; | ||
1554 | } | 1557 | } |
1555 | 1558 | ||
1556 | static int udf_alloc_i_data(struct inode *inode, size_t size) | 1559 | static int udf_alloc_i_data(struct inode *inode, size_t size) |
@@ -1664,7 +1667,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) | |||
1664 | FE_PERM_U_DELETE | FE_PERM_U_CHATTR)); | 1667 | FE_PERM_U_DELETE | FE_PERM_U_CHATTR)); |
1665 | fe->permissions = cpu_to_le32(udfperms); | 1668 | fe->permissions = cpu_to_le32(udfperms); |
1666 | 1669 | ||
1667 | if (S_ISDIR(inode->i_mode)) | 1670 | if (S_ISDIR(inode->i_mode) && inode->i_nlink > 0) |
1668 | fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1); | 1671 | fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1); |
1669 | else | 1672 | else |
1670 | fe->fileLinkCount = cpu_to_le16(inode->i_nlink); | 1673 | fe->fileLinkCount = cpu_to_le16(inode->i_nlink); |
@@ -1826,36 +1829,28 @@ out: | |||
1826 | return err; | 1829 | return err; |
1827 | } | 1830 | } |
1828 | 1831 | ||
1829 | struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino) | 1832 | struct inode *__udf_iget(struct super_block *sb, struct kernel_lb_addr *ino, |
1833 | bool hidden_inode) | ||
1830 | { | 1834 | { |
1831 | unsigned long block = udf_get_lb_pblock(sb, ino, 0); | 1835 | unsigned long block = udf_get_lb_pblock(sb, ino, 0); |
1832 | struct inode *inode = iget_locked(sb, block); | 1836 | struct inode *inode = iget_locked(sb, block); |
1837 | int err; | ||
1833 | 1838 | ||
1834 | if (!inode) | 1839 | if (!inode) |
1835 | return NULL; | 1840 | return ERR_PTR(-ENOMEM); |
1836 | |||
1837 | if (inode->i_state & I_NEW) { | ||
1838 | memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr)); | ||
1839 | __udf_read_inode(inode); | ||
1840 | unlock_new_inode(inode); | ||
1841 | } | ||
1842 | 1841 | ||
1843 | if (is_bad_inode(inode)) | 1842 | if (!(inode->i_state & I_NEW)) |
1844 | goto out_iput; | 1843 | return inode; |
1845 | 1844 | ||
1846 | if (ino->logicalBlockNum >= UDF_SB(sb)-> | 1845 | memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr)); |
1847 | s_partmaps[ino->partitionReferenceNum].s_partition_len) { | 1846 | err = udf_read_inode(inode, hidden_inode); |
1848 | udf_debug("block=%d, partition=%d out of range\n", | 1847 | if (err < 0) { |
1849 | ino->logicalBlockNum, ino->partitionReferenceNum); | 1848 | iget_failed(inode); |
1850 | make_bad_inode(inode); | 1849 | return ERR_PTR(err); |
1851 | goto out_iput; | ||
1852 | } | 1850 | } |
1851 | unlock_new_inode(inode); | ||
1853 | 1852 | ||
1854 | return inode; | 1853 | return inode; |
1855 | |||
1856 | out_iput: | ||
1857 | iput(inode); | ||
1858 | return NULL; | ||
1859 | } | 1854 | } |
1860 | 1855 | ||
1861 | int udf_add_aext(struct inode *inode, struct extent_position *epos, | 1856 | int udf_add_aext(struct inode *inode, struct extent_position *epos, |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 9737cba1357d..c12e260fd6c4 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -270,9 +270,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, | |||
270 | NULL, 0), | 270 | NULL, 0), |
271 | }; | 271 | }; |
272 | inode = udf_iget(dir->i_sb, lb); | 272 | inode = udf_iget(dir->i_sb, lb); |
273 | if (!inode) { | 273 | if (IS_ERR(inode)) |
274 | return ERR_PTR(-EACCES); | 274 | return inode; |
275 | } | ||
276 | } else | 275 | } else |
277 | #endif /* UDF_RECOVERY */ | 276 | #endif /* UDF_RECOVERY */ |
278 | 277 | ||
@@ -285,9 +284,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, | |||
285 | 284 | ||
286 | loc = lelb_to_cpu(cfi.icb.extLocation); | 285 | loc = lelb_to_cpu(cfi.icb.extLocation); |
287 | inode = udf_iget(dir->i_sb, &loc); | 286 | inode = udf_iget(dir->i_sb, &loc); |
288 | if (!inode) { | 287 | if (IS_ERR(inode)) |
289 | return ERR_PTR(-EACCES); | 288 | return ERR_CAST(inode); |
290 | } | ||
291 | } | 289 | } |
292 | 290 | ||
293 | return d_splice_alias(inode, dentry); | 291 | return d_splice_alias(inode, dentry); |
@@ -550,32 +548,18 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, | |||
550 | return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); | 548 | return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); |
551 | } | 549 | } |
552 | 550 | ||
553 | static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, | 551 | static int udf_add_nondir(struct dentry *dentry, struct inode *inode) |
554 | bool excl) | ||
555 | { | 552 | { |
553 | struct udf_inode_info *iinfo = UDF_I(inode); | ||
554 | struct inode *dir = dentry->d_parent->d_inode; | ||
556 | struct udf_fileident_bh fibh; | 555 | struct udf_fileident_bh fibh; |
557 | struct inode *inode; | ||
558 | struct fileIdentDesc cfi, *fi; | 556 | struct fileIdentDesc cfi, *fi; |
559 | int err; | 557 | int err; |
560 | struct udf_inode_info *iinfo; | ||
561 | |||
562 | inode = udf_new_inode(dir, mode, &err); | ||
563 | if (!inode) { | ||
564 | return err; | ||
565 | } | ||
566 | |||
567 | iinfo = UDF_I(inode); | ||
568 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | ||
569 | inode->i_data.a_ops = &udf_adinicb_aops; | ||
570 | else | ||
571 | inode->i_data.a_ops = &udf_aops; | ||
572 | inode->i_op = &udf_file_inode_operations; | ||
573 | inode->i_fop = &udf_file_operations; | ||
574 | mark_inode_dirty(inode); | ||
575 | 558 | ||
576 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); | 559 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); |
577 | if (!fi) { | 560 | if (unlikely(!fi)) { |
578 | inode_dec_link_count(inode); | 561 | inode_dec_link_count(inode); |
562 | unlock_new_inode(inode); | ||
579 | iput(inode); | 563 | iput(inode); |
580 | return err; | 564 | return err; |
581 | } | 565 | } |
@@ -589,23 +573,21 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
589 | if (fibh.sbh != fibh.ebh) | 573 | if (fibh.sbh != fibh.ebh) |
590 | brelse(fibh.ebh); | 574 | brelse(fibh.ebh); |
591 | brelse(fibh.sbh); | 575 | brelse(fibh.sbh); |
576 | unlock_new_inode(inode); | ||
592 | d_instantiate(dentry, inode); | 577 | d_instantiate(dentry, inode); |
593 | 578 | ||
594 | return 0; | 579 | return 0; |
595 | } | 580 | } |
596 | 581 | ||
597 | static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | 582 | static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, |
583 | bool excl) | ||
598 | { | 584 | { |
599 | struct inode *inode; | 585 | struct inode *inode = udf_new_inode(dir, mode); |
600 | struct udf_inode_info *iinfo; | ||
601 | int err; | ||
602 | 586 | ||
603 | inode = udf_new_inode(dir, mode, &err); | 587 | if (IS_ERR(inode)) |
604 | if (!inode) | 588 | return PTR_ERR(inode); |
605 | return err; | ||
606 | 589 | ||
607 | iinfo = UDF_I(inode); | 590 | if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) |
608 | if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | ||
609 | inode->i_data.a_ops = &udf_adinicb_aops; | 591 | inode->i_data.a_ops = &udf_adinicb_aops; |
610 | else | 592 | else |
611 | inode->i_data.a_ops = &udf_aops; | 593 | inode->i_data.a_ops = &udf_aops; |
@@ -613,7 +595,25 @@ static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
613 | inode->i_fop = &udf_file_operations; | 595 | inode->i_fop = &udf_file_operations; |
614 | mark_inode_dirty(inode); | 596 | mark_inode_dirty(inode); |
615 | 597 | ||
598 | return udf_add_nondir(dentry, inode); | ||
599 | } | ||
600 | |||
601 | static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | ||
602 | { | ||
603 | struct inode *inode = udf_new_inode(dir, mode); | ||
604 | |||
605 | if (IS_ERR(inode)) | ||
606 | return PTR_ERR(inode); | ||
607 | |||
608 | if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | ||
609 | inode->i_data.a_ops = &udf_adinicb_aops; | ||
610 | else | ||
611 | inode->i_data.a_ops = &udf_aops; | ||
612 | inode->i_op = &udf_file_inode_operations; | ||
613 | inode->i_fop = &udf_file_operations; | ||
614 | mark_inode_dirty(inode); | ||
616 | d_tmpfile(dentry, inode); | 615 | d_tmpfile(dentry, inode); |
616 | unlock_new_inode(inode); | ||
617 | return 0; | 617 | return 0; |
618 | } | 618 | } |
619 | 619 | ||
@@ -621,44 +621,16 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
621 | dev_t rdev) | 621 | dev_t rdev) |
622 | { | 622 | { |
623 | struct inode *inode; | 623 | struct inode *inode; |
624 | struct udf_fileident_bh fibh; | ||
625 | struct fileIdentDesc cfi, *fi; | ||
626 | int err; | ||
627 | struct udf_inode_info *iinfo; | ||
628 | 624 | ||
629 | if (!old_valid_dev(rdev)) | 625 | if (!old_valid_dev(rdev)) |
630 | return -EINVAL; | 626 | return -EINVAL; |
631 | 627 | ||
632 | err = -EIO; | 628 | inode = udf_new_inode(dir, mode); |
633 | inode = udf_new_inode(dir, mode, &err); | 629 | if (IS_ERR(inode)) |
634 | if (!inode) | 630 | return PTR_ERR(inode); |
635 | goto out; | ||
636 | 631 | ||
637 | iinfo = UDF_I(inode); | ||
638 | init_special_inode(inode, mode, rdev); | 632 | init_special_inode(inode, mode, rdev); |
639 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); | 633 | return udf_add_nondir(dentry, inode); |
640 | if (!fi) { | ||
641 | inode_dec_link_count(inode); | ||
642 | iput(inode); | ||
643 | return err; | ||
644 | } | ||
645 | cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); | ||
646 | cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); | ||
647 | *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = | ||
648 | cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL); | ||
649 | udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); | ||
650 | if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | ||
651 | mark_inode_dirty(dir); | ||
652 | mark_inode_dirty(inode); | ||
653 | |||
654 | if (fibh.sbh != fibh.ebh) | ||
655 | brelse(fibh.ebh); | ||
656 | brelse(fibh.sbh); | ||
657 | d_instantiate(dentry, inode); | ||
658 | err = 0; | ||
659 | |||
660 | out: | ||
661 | return err; | ||
662 | } | 634 | } |
663 | 635 | ||
664 | static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | 636 | static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
@@ -670,10 +642,9 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
670 | struct udf_inode_info *dinfo = UDF_I(dir); | 642 | struct udf_inode_info *dinfo = UDF_I(dir); |
671 | struct udf_inode_info *iinfo; | 643 | struct udf_inode_info *iinfo; |
672 | 644 | ||
673 | err = -EIO; | 645 | inode = udf_new_inode(dir, S_IFDIR | mode); |
674 | inode = udf_new_inode(dir, S_IFDIR | mode, &err); | 646 | if (IS_ERR(inode)) |
675 | if (!inode) | 647 | return PTR_ERR(inode); |
676 | goto out; | ||
677 | 648 | ||
678 | iinfo = UDF_I(inode); | 649 | iinfo = UDF_I(inode); |
679 | inode->i_op = &udf_dir_inode_operations; | 650 | inode->i_op = &udf_dir_inode_operations; |
@@ -681,6 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
681 | fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); | 652 | fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); |
682 | if (!fi) { | 653 | if (!fi) { |
683 | inode_dec_link_count(inode); | 654 | inode_dec_link_count(inode); |
655 | unlock_new_inode(inode); | ||
684 | iput(inode); | 656 | iput(inode); |
685 | goto out; | 657 | goto out; |
686 | } | 658 | } |
@@ -699,6 +671,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
699 | if (!fi) { | 671 | if (!fi) { |
700 | clear_nlink(inode); | 672 | clear_nlink(inode); |
701 | mark_inode_dirty(inode); | 673 | mark_inode_dirty(inode); |
674 | unlock_new_inode(inode); | ||
702 | iput(inode); | 675 | iput(inode); |
703 | goto out; | 676 | goto out; |
704 | } | 677 | } |
@@ -710,6 +683,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
710 | udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); | 683 | udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); |
711 | inc_nlink(dir); | 684 | inc_nlink(dir); |
712 | mark_inode_dirty(dir); | 685 | mark_inode_dirty(dir); |
686 | unlock_new_inode(inode); | ||
713 | d_instantiate(dentry, inode); | 687 | d_instantiate(dentry, inode); |
714 | if (fibh.sbh != fibh.ebh) | 688 | if (fibh.sbh != fibh.ebh) |
715 | brelse(fibh.ebh); | 689 | brelse(fibh.ebh); |
@@ -876,14 +850,11 @@ out: | |||
876 | static int udf_symlink(struct inode *dir, struct dentry *dentry, | 850 | static int udf_symlink(struct inode *dir, struct dentry *dentry, |
877 | const char *symname) | 851 | const char *symname) |
878 | { | 852 | { |
879 | struct inode *inode; | 853 | struct inode *inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO); |
880 | struct pathComponent *pc; | 854 | struct pathComponent *pc; |
881 | const char *compstart; | 855 | const char *compstart; |
882 | struct udf_fileident_bh fibh; | ||
883 | struct extent_position epos = {}; | 856 | struct extent_position epos = {}; |
884 | int eoffset, elen = 0; | 857 | int eoffset, elen = 0; |
885 | struct fileIdentDesc *fi; | ||
886 | struct fileIdentDesc cfi; | ||
887 | uint8_t *ea; | 858 | uint8_t *ea; |
888 | int err; | 859 | int err; |
889 | int block; | 860 | int block; |
@@ -892,9 +863,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
892 | struct udf_inode_info *iinfo; | 863 | struct udf_inode_info *iinfo; |
893 | struct super_block *sb = dir->i_sb; | 864 | struct super_block *sb = dir->i_sb; |
894 | 865 | ||
895 | inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err); | 866 | if (IS_ERR(inode)) |
896 | if (!inode) | 867 | return PTR_ERR(inode); |
897 | goto out; | ||
898 | 868 | ||
899 | iinfo = UDF_I(inode); | 869 | iinfo = UDF_I(inode); |
900 | down_write(&iinfo->i_data_sem); | 870 | down_write(&iinfo->i_data_sem); |
@@ -1012,24 +982,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, | |||
1012 | mark_inode_dirty(inode); | 982 | mark_inode_dirty(inode); |
1013 | up_write(&iinfo->i_data_sem); | 983 | up_write(&iinfo->i_data_sem); |
1014 | 984 | ||
1015 | fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); | 985 | err = udf_add_nondir(dentry, inode); |
1016 | if (!fi) | ||
1017 | goto out_no_entry; | ||
1018 | cfi.icb.extLength = cpu_to_le32(sb->s_blocksize); | ||
1019 | cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); | ||
1020 | if (UDF_SB(inode->i_sb)->s_lvid_bh) { | ||
1021 | *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = | ||
1022 | cpu_to_le32(lvid_get_unique_id(sb)); | ||
1023 | } | ||
1024 | udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); | ||
1025 | if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) | ||
1026 | mark_inode_dirty(dir); | ||
1027 | if (fibh.sbh != fibh.ebh) | ||
1028 | brelse(fibh.ebh); | ||
1029 | brelse(fibh.sbh); | ||
1030 | d_instantiate(dentry, inode); | ||
1031 | err = 0; | ||
1032 | |||
1033 | out: | 986 | out: |
1034 | kfree(name); | 987 | kfree(name); |
1035 | return err; | 988 | return err; |
@@ -1037,6 +990,7 @@ out: | |||
1037 | out_no_entry: | 990 | out_no_entry: |
1038 | up_write(&iinfo->i_data_sem); | 991 | up_write(&iinfo->i_data_sem); |
1039 | inode_dec_link_count(inode); | 992 | inode_dec_link_count(inode); |
993 | unlock_new_inode(inode); | ||
1040 | iput(inode); | 994 | iput(inode); |
1041 | goto out; | 995 | goto out; |
1042 | } | 996 | } |
@@ -1221,7 +1175,7 @@ static struct dentry *udf_get_parent(struct dentry *child) | |||
1221 | struct udf_fileident_bh fibh; | 1175 | struct udf_fileident_bh fibh; |
1222 | 1176 | ||
1223 | if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) | 1177 | if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) |
1224 | goto out_unlock; | 1178 | return ERR_PTR(-EACCES); |
1225 | 1179 | ||
1226 | if (fibh.sbh != fibh.ebh) | 1180 | if (fibh.sbh != fibh.ebh) |
1227 | brelse(fibh.ebh); | 1181 | brelse(fibh.ebh); |
@@ -1229,12 +1183,10 @@ static struct dentry *udf_get_parent(struct dentry *child) | |||
1229 | 1183 | ||
1230 | tloc = lelb_to_cpu(cfi.icb.extLocation); | 1184 | tloc = lelb_to_cpu(cfi.icb.extLocation); |
1231 | inode = udf_iget(child->d_inode->i_sb, &tloc); | 1185 | inode = udf_iget(child->d_inode->i_sb, &tloc); |
1232 | if (!inode) | 1186 | if (IS_ERR(inode)) |
1233 | goto out_unlock; | 1187 | return ERR_CAST(inode); |
1234 | 1188 | ||
1235 | return d_obtain_alias(inode); | 1189 | return d_obtain_alias(inode); |
1236 | out_unlock: | ||
1237 | return ERR_PTR(-EACCES); | ||
1238 | } | 1190 | } |
1239 | 1191 | ||
1240 | 1192 | ||
@@ -1251,8 +1203,8 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block, | |||
1251 | loc.partitionReferenceNum = partref; | 1203 | loc.partitionReferenceNum = partref; |
1252 | inode = udf_iget(sb, &loc); | 1204 | inode = udf_iget(sb, &loc); |
1253 | 1205 | ||
1254 | if (inode == NULL) | 1206 | if (IS_ERR(inode)) |
1255 | return ERR_PTR(-ENOMEM); | 1207 | return ERR_CAST(inode); |
1256 | 1208 | ||
1257 | if (generation && inode->i_generation != generation) { | 1209 | if (generation && inode->i_generation != generation) { |
1258 | iput(inode); | 1210 | iput(inode); |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 813da94d447b..e229315bbf7a 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -959,14 +959,16 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb, | |||
959 | addr.logicalBlockNum = meta_file_loc; | 959 | addr.logicalBlockNum = meta_file_loc; |
960 | addr.partitionReferenceNum = partition_num; | 960 | addr.partitionReferenceNum = partition_num; |
961 | 961 | ||
962 | metadata_fe = udf_iget(sb, &addr); | 962 | metadata_fe = udf_iget_special(sb, &addr); |
963 | 963 | ||
964 | if (metadata_fe == NULL) | 964 | if (IS_ERR(metadata_fe)) { |
965 | udf_warn(sb, "metadata inode efe not found\n"); | 965 | udf_warn(sb, "metadata inode efe not found\n"); |
966 | else if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) { | 966 | return metadata_fe; |
967 | } | ||
968 | if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) { | ||
967 | udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n"); | 969 | udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n"); |
968 | iput(metadata_fe); | 970 | iput(metadata_fe); |
969 | metadata_fe = NULL; | 971 | return ERR_PTR(-EIO); |
970 | } | 972 | } |
971 | 973 | ||
972 | return metadata_fe; | 974 | return metadata_fe; |
@@ -978,6 +980,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) | |||
978 | struct udf_part_map *map; | 980 | struct udf_part_map *map; |
979 | struct udf_meta_data *mdata; | 981 | struct udf_meta_data *mdata; |
980 | struct kernel_lb_addr addr; | 982 | struct kernel_lb_addr addr; |
983 | struct inode *fe; | ||
981 | 984 | ||
982 | map = &sbi->s_partmaps[partition]; | 985 | map = &sbi->s_partmaps[partition]; |
983 | mdata = &map->s_type_specific.s_metadata; | 986 | mdata = &map->s_type_specific.s_metadata; |
@@ -986,22 +989,24 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) | |||
986 | udf_debug("Metadata file location: block = %d part = %d\n", | 989 | udf_debug("Metadata file location: block = %d part = %d\n", |
987 | mdata->s_meta_file_loc, map->s_partition_num); | 990 | mdata->s_meta_file_loc, map->s_partition_num); |
988 | 991 | ||
989 | mdata->s_metadata_fe = udf_find_metadata_inode_efe(sb, | 992 | fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc, |
990 | mdata->s_meta_file_loc, map->s_partition_num); | 993 | map->s_partition_num); |
991 | 994 | if (IS_ERR(fe)) { | |
992 | if (mdata->s_metadata_fe == NULL) { | ||
993 | /* mirror file entry */ | 995 | /* mirror file entry */ |
994 | udf_debug("Mirror metadata file location: block = %d part = %d\n", | 996 | udf_debug("Mirror metadata file location: block = %d part = %d\n", |
995 | mdata->s_mirror_file_loc, map->s_partition_num); | 997 | mdata->s_mirror_file_loc, map->s_partition_num); |
996 | 998 | ||
997 | mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb, | 999 | fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc, |
998 | mdata->s_mirror_file_loc, map->s_partition_num); | 1000 | map->s_partition_num); |
999 | 1001 | ||
1000 | if (mdata->s_mirror_fe == NULL) { | 1002 | if (IS_ERR(fe)) { |
1001 | udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n"); | 1003 | udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n"); |
1002 | return -EIO; | 1004 | return PTR_ERR(fe); |
1003 | } | 1005 | } |
1004 | } | 1006 | mdata->s_mirror_fe = fe; |
1007 | } else | ||
1008 | mdata->s_metadata_fe = fe; | ||
1009 | |||
1005 | 1010 | ||
1006 | /* | 1011 | /* |
1007 | * bitmap file entry | 1012 | * bitmap file entry |
@@ -1015,15 +1020,16 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) | |||
1015 | udf_debug("Bitmap file location: block = %d part = %d\n", | 1020 | udf_debug("Bitmap file location: block = %d part = %d\n", |
1016 | addr.logicalBlockNum, addr.partitionReferenceNum); | 1021 | addr.logicalBlockNum, addr.partitionReferenceNum); |
1017 | 1022 | ||
1018 | mdata->s_bitmap_fe = udf_iget(sb, &addr); | 1023 | fe = udf_iget_special(sb, &addr); |
1019 | if (mdata->s_bitmap_fe == NULL) { | 1024 | if (IS_ERR(fe)) { |
1020 | if (sb->s_flags & MS_RDONLY) | 1025 | if (sb->s_flags & MS_RDONLY) |
1021 | udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n"); | 1026 | udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n"); |
1022 | else { | 1027 | else { |
1023 | udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n"); | 1028 | udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n"); |
1024 | return -EIO; | 1029 | return PTR_ERR(fe); |
1025 | } | 1030 | } |
1026 | } | 1031 | } else |
1032 | mdata->s_bitmap_fe = fe; | ||
1027 | } | 1033 | } |
1028 | 1034 | ||
1029 | udf_debug("udf_load_metadata_files Ok\n"); | 1035 | udf_debug("udf_load_metadata_files Ok\n"); |
@@ -1111,13 +1117,15 @@ static int udf_fill_partdesc_info(struct super_block *sb, | |||
1111 | phd->unallocSpaceTable.extPosition), | 1117 | phd->unallocSpaceTable.extPosition), |
1112 | .partitionReferenceNum = p_index, | 1118 | .partitionReferenceNum = p_index, |
1113 | }; | 1119 | }; |
1120 | struct inode *inode; | ||
1114 | 1121 | ||
1115 | map->s_uspace.s_table = udf_iget(sb, &loc); | 1122 | inode = udf_iget_special(sb, &loc); |
1116 | if (!map->s_uspace.s_table) { | 1123 | if (IS_ERR(inode)) { |
1117 | udf_debug("cannot load unallocSpaceTable (part %d)\n", | 1124 | udf_debug("cannot load unallocSpaceTable (part %d)\n", |
1118 | p_index); | 1125 | p_index); |
1119 | return -EIO; | 1126 | return PTR_ERR(inode); |
1120 | } | 1127 | } |
1128 | map->s_uspace.s_table = inode; | ||
1121 | map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; | 1129 | map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; |
1122 | udf_debug("unallocSpaceTable (part %d) @ %ld\n", | 1130 | udf_debug("unallocSpaceTable (part %d) @ %ld\n", |
1123 | p_index, map->s_uspace.s_table->i_ino); | 1131 | p_index, map->s_uspace.s_table->i_ino); |
@@ -1144,14 +1152,15 @@ static int udf_fill_partdesc_info(struct super_block *sb, | |||
1144 | phd->freedSpaceTable.extPosition), | 1152 | phd->freedSpaceTable.extPosition), |
1145 | .partitionReferenceNum = p_index, | 1153 | .partitionReferenceNum = p_index, |
1146 | }; | 1154 | }; |
1155 | struct inode *inode; | ||
1147 | 1156 | ||
1148 | map->s_fspace.s_table = udf_iget(sb, &loc); | 1157 | inode = udf_iget_special(sb, &loc); |
1149 | if (!map->s_fspace.s_table) { | 1158 | if (IS_ERR(inode)) { |
1150 | udf_debug("cannot load freedSpaceTable (part %d)\n", | 1159 | udf_debug("cannot load freedSpaceTable (part %d)\n", |
1151 | p_index); | 1160 | p_index); |
1152 | return -EIO; | 1161 | return PTR_ERR(inode); |
1153 | } | 1162 | } |
1154 | 1163 | map->s_fspace.s_table = inode; | |
1155 | map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; | 1164 | map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; |
1156 | udf_debug("freedSpaceTable (part %d) @ %ld\n", | 1165 | udf_debug("freedSpaceTable (part %d) @ %ld\n", |
1157 | p_index, map->s_fspace.s_table->i_ino); | 1166 | p_index, map->s_fspace.s_table->i_ino); |
@@ -1178,6 +1187,7 @@ static void udf_find_vat_block(struct super_block *sb, int p_index, | |||
1178 | struct udf_part_map *map = &sbi->s_partmaps[p_index]; | 1187 | struct udf_part_map *map = &sbi->s_partmaps[p_index]; |
1179 | sector_t vat_block; | 1188 | sector_t vat_block; |
1180 | struct kernel_lb_addr ino; | 1189 | struct kernel_lb_addr ino; |
1190 | struct inode *inode; | ||
1181 | 1191 | ||
1182 | /* | 1192 | /* |
1183 | * VAT file entry is in the last recorded block. Some broken disks have | 1193 | * VAT file entry is in the last recorded block. Some broken disks have |
@@ -1186,10 +1196,13 @@ static void udf_find_vat_block(struct super_block *sb, int p_index, | |||
1186 | ino.partitionReferenceNum = type1_index; | 1196 | ino.partitionReferenceNum = type1_index; |
1187 | for (vat_block = start_block; | 1197 | for (vat_block = start_block; |
1188 | vat_block >= map->s_partition_root && | 1198 | vat_block >= map->s_partition_root && |
1189 | vat_block >= start_block - 3 && | 1199 | vat_block >= start_block - 3; vat_block--) { |
1190 | !sbi->s_vat_inode; vat_block--) { | ||
1191 | ino.logicalBlockNum = vat_block - map->s_partition_root; | 1200 | ino.logicalBlockNum = vat_block - map->s_partition_root; |
1192 | sbi->s_vat_inode = udf_iget(sb, &ino); | 1201 | inode = udf_iget_special(sb, &ino); |
1202 | if (!IS_ERR(inode)) { | ||
1203 | sbi->s_vat_inode = inode; | ||
1204 | break; | ||
1205 | } | ||
1193 | } | 1206 | } |
1194 | } | 1207 | } |
1195 | 1208 | ||
@@ -2205,10 +2218,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
2205 | /* assign inodes by physical block number */ | 2218 | /* assign inodes by physical block number */ |
2206 | /* perhaps it's not extensible enough, but for now ... */ | 2219 | /* perhaps it's not extensible enough, but for now ... */ |
2207 | inode = udf_iget(sb, &rootdir); | 2220 | inode = udf_iget(sb, &rootdir); |
2208 | if (!inode) { | 2221 | if (IS_ERR(inode)) { |
2209 | udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n", | 2222 | udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n", |
2210 | rootdir.logicalBlockNum, rootdir.partitionReferenceNum); | 2223 | rootdir.logicalBlockNum, rootdir.partitionReferenceNum); |
2211 | ret = -EIO; | 2224 | ret = PTR_ERR(inode); |
2212 | goto error_out; | 2225 | goto error_out; |
2213 | } | 2226 | } |
2214 | 2227 | ||
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index be7dabbbcb49..1cc3c993ebd0 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h | |||
@@ -138,12 +138,22 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, | |||
138 | /* file.c */ | 138 | /* file.c */ |
139 | extern long udf_ioctl(struct file *, unsigned int, unsigned long); | 139 | extern long udf_ioctl(struct file *, unsigned int, unsigned long); |
140 | /* inode.c */ | 140 | /* inode.c */ |
141 | extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); | 141 | extern struct inode *__udf_iget(struct super_block *, struct kernel_lb_addr *, |
142 | bool hidden_inode); | ||
143 | static inline struct inode *udf_iget_special(struct super_block *sb, | ||
144 | struct kernel_lb_addr *ino) | ||
145 | { | ||
146 | return __udf_iget(sb, ino, true); | ||
147 | } | ||
148 | static inline struct inode *udf_iget(struct super_block *sb, | ||
149 | struct kernel_lb_addr *ino) | ||
150 | { | ||
151 | return __udf_iget(sb, ino, false); | ||
152 | } | ||
142 | extern int udf_expand_file_adinicb(struct inode *); | 153 | extern int udf_expand_file_adinicb(struct inode *); |
143 | extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); | 154 | extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); |
144 | extern struct buffer_head *udf_bread(struct inode *, int, int, int *); | 155 | extern struct buffer_head *udf_bread(struct inode *, int, int, int *); |
145 | extern int udf_setsize(struct inode *, loff_t); | 156 | extern int udf_setsize(struct inode *, loff_t); |
146 | extern void udf_read_inode(struct inode *); | ||
147 | extern void udf_evict_inode(struct inode *); | 157 | extern void udf_evict_inode(struct inode *); |
148 | extern int udf_write_inode(struct inode *, struct writeback_control *wbc); | 158 | extern int udf_write_inode(struct inode *, struct writeback_control *wbc); |
149 | extern long udf_block_map(struct inode *, sector_t); | 159 | extern long udf_block_map(struct inode *, sector_t); |
@@ -209,7 +219,7 @@ extern int udf_CS0toUTF8(struct ustr *, const struct ustr *); | |||
209 | 219 | ||
210 | /* ialloc.c */ | 220 | /* ialloc.c */ |
211 | extern void udf_free_inode(struct inode *); | 221 | extern void udf_free_inode(struct inode *); |
212 | extern struct inode *udf_new_inode(struct inode *, umode_t, int *); | 222 | extern struct inode *udf_new_inode(struct inode *, umode_t); |
213 | 223 | ||
214 | /* truncate.c */ | 224 | /* truncate.c */ |
215 | extern void udf_truncate_tail_extent(struct inode *); | 225 | extern void udf_truncate_tail_extent(struct inode *); |
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index 1f11483eba6a..77c331f1a770 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c | |||
@@ -81,8 +81,6 @@ static time_t year_seconds[MAX_YEAR_SECONDS] = { | |||
81 | /*2038*/ SPY(68, 17, 0) | 81 | /*2038*/ SPY(68, 17, 0) |
82 | }; | 82 | }; |
83 | 83 | ||
84 | extern struct timezone sys_tz; | ||
85 | |||
86 | #define SECS_PER_HOUR (60 * 60) | 84 | #define SECS_PER_HOUR (60 * 60) |
87 | #define SECS_PER_DAY (SECS_PER_HOUR * 24) | 85 | #define SECS_PER_DAY (SECS_PER_HOUR * 24) |
88 | 86 | ||
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index a9cc75ffa925..7caa01652888 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c | |||
@@ -298,7 +298,10 @@ cg_found: | |||
298 | ufsi->i_oeftflag = 0; | 298 | ufsi->i_oeftflag = 0; |
299 | ufsi->i_dir_start_lookup = 0; | 299 | ufsi->i_dir_start_lookup = 0; |
300 | memset(&ufsi->i_u1, 0, sizeof(ufsi->i_u1)); | 300 | memset(&ufsi->i_u1, 0, sizeof(ufsi->i_u1)); |
301 | insert_inode_hash(inode); | 301 | if (insert_inode_locked(inode) < 0) { |
302 | err = -EIO; | ||
303 | goto failed; | ||
304 | } | ||
302 | mark_inode_dirty(inode); | 305 | mark_inode_dirty(inode); |
303 | 306 | ||
304 | if (uspi->fs_magic == UFS2_MAGIC) { | 307 | if (uspi->fs_magic == UFS2_MAGIC) { |
@@ -337,6 +340,7 @@ cg_found: | |||
337 | fail_remove_inode: | 340 | fail_remove_inode: |
338 | unlock_ufs(sb); | 341 | unlock_ufs(sb); |
339 | clear_nlink(inode); | 342 | clear_nlink(inode); |
343 | unlock_new_inode(inode); | ||
340 | iput(inode); | 344 | iput(inode); |
341 | UFSD("EXIT (FAILED): err %d\n", err); | 345 | UFSD("EXIT (FAILED): err %d\n", err); |
342 | return ERR_PTR(err); | 346 | return ERR_PTR(err); |
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7c580c97990e..be7d42c7d938 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c | |||
@@ -902,9 +902,6 @@ void ufs_evict_inode(struct inode * inode) | |||
902 | invalidate_inode_buffers(inode); | 902 | invalidate_inode_buffers(inode); |
903 | clear_inode(inode); | 903 | clear_inode(inode); |
904 | 904 | ||
905 | if (want_delete) { | 905 | if (want_delete) |
906 | lock_ufs(inode->i_sb); | 906 | ufs_free_inode(inode); |
907 | ufs_free_inode (inode); | ||
908 | unlock_ufs(inode->i_sb); | ||
909 | } | ||
910 | } | 907 | } |
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 90d74b8f8eba..fd65deb4b5f0 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c | |||
@@ -38,10 +38,12 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) | |||
38 | { | 38 | { |
39 | int err = ufs_add_link(dentry, inode); | 39 | int err = ufs_add_link(dentry, inode); |
40 | if (!err) { | 40 | if (!err) { |
41 | unlock_new_inode(inode); | ||
41 | d_instantiate(dentry, inode); | 42 | d_instantiate(dentry, inode); |
42 | return 0; | 43 | return 0; |
43 | } | 44 | } |
44 | inode_dec_link_count(inode); | 45 | inode_dec_link_count(inode); |
46 | unlock_new_inode(inode); | ||
45 | iput(inode); | 47 | iput(inode); |
46 | return err; | 48 | return err; |
47 | } | 49 | } |
@@ -126,12 +128,12 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, | |||
126 | if (l > sb->s_blocksize) | 128 | if (l > sb->s_blocksize) |
127 | goto out_notlocked; | 129 | goto out_notlocked; |
128 | 130 | ||
129 | lock_ufs(dir->i_sb); | ||
130 | inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); | 131 | inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); |
131 | err = PTR_ERR(inode); | 132 | err = PTR_ERR(inode); |
132 | if (IS_ERR(inode)) | 133 | if (IS_ERR(inode)) |
133 | goto out; | 134 | goto out_notlocked; |
134 | 135 | ||
136 | lock_ufs(dir->i_sb); | ||
135 | if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { | 137 | if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { |
136 | /* slow symlink */ | 138 | /* slow symlink */ |
137 | inode->i_op = &ufs_symlink_inode_operations; | 139 | inode->i_op = &ufs_symlink_inode_operations; |
@@ -155,6 +157,7 @@ out_notlocked: | |||
155 | 157 | ||
156 | out_fail: | 158 | out_fail: |
157 | inode_dec_link_count(inode); | 159 | inode_dec_link_count(inode); |
160 | unlock_new_inode(inode); | ||
158 | iput(inode); | 161 | iput(inode); |
159 | goto out; | 162 | goto out; |
160 | } | 163 | } |
@@ -181,13 +184,9 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) | |||
181 | struct inode * inode; | 184 | struct inode * inode; |
182 | int err; | 185 | int err; |
183 | 186 | ||
184 | lock_ufs(dir->i_sb); | ||
185 | inode_inc_link_count(dir); | ||
186 | |||
187 | inode = ufs_new_inode(dir, S_IFDIR|mode); | 187 | inode = ufs_new_inode(dir, S_IFDIR|mode); |
188 | err = PTR_ERR(inode); | ||
189 | if (IS_ERR(inode)) | 188 | if (IS_ERR(inode)) |
190 | goto out_dir; | 189 | return PTR_ERR(inode); |
191 | 190 | ||
192 | inode->i_op = &ufs_dir_inode_operations; | 191 | inode->i_op = &ufs_dir_inode_operations; |
193 | inode->i_fop = &ufs_dir_operations; | 192 | inode->i_fop = &ufs_dir_operations; |
@@ -195,6 +194,9 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) | |||
195 | 194 | ||
196 | inode_inc_link_count(inode); | 195 | inode_inc_link_count(inode); |
197 | 196 | ||
197 | lock_ufs(dir->i_sb); | ||
198 | inode_inc_link_count(dir); | ||
199 | |||
198 | err = ufs_make_empty(inode, dir); | 200 | err = ufs_make_empty(inode, dir); |
199 | if (err) | 201 | if (err) |
200 | goto out_fail; | 202 | goto out_fail; |
@@ -211,8 +213,8 @@ out: | |||
211 | out_fail: | 213 | out_fail: |
212 | inode_dec_link_count(inode); | 214 | inode_dec_link_count(inode); |
213 | inode_dec_link_count(inode); | 215 | inode_dec_link_count(inode); |
216 | unlock_new_inode(inode); | ||
214 | iput (inode); | 217 | iput (inode); |
215 | out_dir: | ||
216 | inode_dec_link_count(dir); | 218 | inode_dec_link_count(dir); |
217 | unlock_ufs(dir->i_sb); | 219 | unlock_ufs(dir->i_sb); |
218 | goto out; | 220 | goto out; |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index de2d26d32844..86df952d3e24 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -5424,7 +5424,7 @@ xfs_bmap_shift_extents( | |||
5424 | struct xfs_bmap_free *flist, | 5424 | struct xfs_bmap_free *flist, |
5425 | int num_exts) | 5425 | int num_exts) |
5426 | { | 5426 | { |
5427 | struct xfs_btree_cur *cur; | 5427 | struct xfs_btree_cur *cur = NULL; |
5428 | struct xfs_bmbt_rec_host *gotp; | 5428 | struct xfs_bmbt_rec_host *gotp; |
5429 | struct xfs_bmbt_irec got; | 5429 | struct xfs_bmbt_irec got; |
5430 | struct xfs_bmbt_irec left; | 5430 | struct xfs_bmbt_irec left; |
@@ -5435,7 +5435,7 @@ xfs_bmap_shift_extents( | |||
5435 | int error = 0; | 5435 | int error = 0; |
5436 | int i; | 5436 | int i; |
5437 | int whichfork = XFS_DATA_FORK; | 5437 | int whichfork = XFS_DATA_FORK; |
5438 | int logflags; | 5438 | int logflags = 0; |
5439 | xfs_filblks_t blockcount = 0; | 5439 | xfs_filblks_t blockcount = 0; |
5440 | int total_extents; | 5440 | int total_extents; |
5441 | 5441 | ||
@@ -5478,16 +5478,11 @@ xfs_bmap_shift_extents( | |||
5478 | } | 5478 | } |
5479 | } | 5479 | } |
5480 | 5480 | ||
5481 | /* We are going to change core inode */ | ||
5482 | logflags = XFS_ILOG_CORE; | ||
5483 | if (ifp->if_flags & XFS_IFBROOT) { | 5481 | if (ifp->if_flags & XFS_IFBROOT) { |
5484 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); | 5482 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); |
5485 | cur->bc_private.b.firstblock = *firstblock; | 5483 | cur->bc_private.b.firstblock = *firstblock; |
5486 | cur->bc_private.b.flist = flist; | 5484 | cur->bc_private.b.flist = flist; |
5487 | cur->bc_private.b.flags = 0; | 5485 | cur->bc_private.b.flags = 0; |
5488 | } else { | ||
5489 | cur = NULL; | ||
5490 | logflags |= XFS_ILOG_DEXT; | ||
5491 | } | 5486 | } |
5492 | 5487 | ||
5493 | /* | 5488 | /* |
@@ -5545,11 +5540,14 @@ xfs_bmap_shift_extents( | |||
5545 | blockcount = left.br_blockcount + | 5540 | blockcount = left.br_blockcount + |
5546 | got.br_blockcount; | 5541 | got.br_blockcount; |
5547 | xfs_iext_remove(ip, *current_ext, 1, 0); | 5542 | xfs_iext_remove(ip, *current_ext, 1, 0); |
5543 | logflags |= XFS_ILOG_CORE; | ||
5548 | if (cur) { | 5544 | if (cur) { |
5549 | error = xfs_btree_delete(cur, &i); | 5545 | error = xfs_btree_delete(cur, &i); |
5550 | if (error) | 5546 | if (error) |
5551 | goto del_cursor; | 5547 | goto del_cursor; |
5552 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | 5548 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); |
5549 | } else { | ||
5550 | logflags |= XFS_ILOG_DEXT; | ||
5553 | } | 5551 | } |
5554 | XFS_IFORK_NEXT_SET(ip, whichfork, | 5552 | XFS_IFORK_NEXT_SET(ip, whichfork, |
5555 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | 5553 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); |
@@ -5575,6 +5573,7 @@ xfs_bmap_shift_extents( | |||
5575 | got.br_startoff = startoff; | 5573 | got.br_startoff = startoff; |
5576 | } | 5574 | } |
5577 | 5575 | ||
5576 | logflags |= XFS_ILOG_CORE; | ||
5578 | if (cur) { | 5577 | if (cur) { |
5579 | error = xfs_bmbt_update(cur, got.br_startoff, | 5578 | error = xfs_bmbt_update(cur, got.br_startoff, |
5580 | got.br_startblock, | 5579 | got.br_startblock, |
@@ -5582,6 +5581,8 @@ xfs_bmap_shift_extents( | |||
5582 | got.br_state); | 5581 | got.br_state); |
5583 | if (error) | 5582 | if (error) |
5584 | goto del_cursor; | 5583 | goto del_cursor; |
5584 | } else { | ||
5585 | logflags |= XFS_ILOG_DEXT; | ||
5585 | } | 5586 | } |
5586 | 5587 | ||
5587 | (*current_ext)++; | 5588 | (*current_ext)++; |
@@ -5597,6 +5598,7 @@ del_cursor: | |||
5597 | xfs_btree_del_cursor(cur, | 5598 | xfs_btree_del_cursor(cur, |
5598 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | 5599 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); |
5599 | 5600 | ||
5600 | xfs_trans_log_inode(tp, ip, logflags); | 5601 | if (logflags) |
5602 | xfs_trans_log_inode(tp, ip, logflags); | ||
5601 | return error; | 5603 | return error; |
5602 | } | 5604 | } |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 11e9b4caa54f..b984647c24db 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -1753,11 +1753,72 @@ xfs_vm_readpages( | |||
1753 | return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); | 1753 | return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); |
1754 | } | 1754 | } |
1755 | 1755 | ||
1756 | /* | ||
1757 | * This is basically a copy of __set_page_dirty_buffers() with one | ||
1758 | * small tweak: buffers beyond EOF do not get marked dirty. If we mark them | ||
1759 | * dirty, we'll never be able to clean them because we don't write buffers | ||
1760 | * beyond EOF, and that means we can't invalidate pages that span EOF | ||
1761 | * that have been marked dirty. Further, the dirty state can leak into | ||
1762 | * the file interior if the file is extended, resulting in all sorts of | ||
1763 | * bad things happening as the state does not match the underlying data. | ||
1764 | * | ||
1765 | * XXX: this really indicates that bufferheads in XFS need to die. Warts like | ||
1766 | * this only exist because of bufferheads and how the generic code manages them. | ||
1767 | */ | ||
1768 | STATIC int | ||
1769 | xfs_vm_set_page_dirty( | ||
1770 | struct page *page) | ||
1771 | { | ||
1772 | struct address_space *mapping = page->mapping; | ||
1773 | struct inode *inode = mapping->host; | ||
1774 | loff_t end_offset; | ||
1775 | loff_t offset; | ||
1776 | int newly_dirty; | ||
1777 | |||
1778 | if (unlikely(!mapping)) | ||
1779 | return !TestSetPageDirty(page); | ||
1780 | |||
1781 | end_offset = i_size_read(inode); | ||
1782 | offset = page_offset(page); | ||
1783 | |||
1784 | spin_lock(&mapping->private_lock); | ||
1785 | if (page_has_buffers(page)) { | ||
1786 | struct buffer_head *head = page_buffers(page); | ||
1787 | struct buffer_head *bh = head; | ||
1788 | |||
1789 | do { | ||
1790 | if (offset < end_offset) | ||
1791 | set_buffer_dirty(bh); | ||
1792 | bh = bh->b_this_page; | ||
1793 | offset += 1 << inode->i_blkbits; | ||
1794 | } while (bh != head); | ||
1795 | } | ||
1796 | newly_dirty = !TestSetPageDirty(page); | ||
1797 | spin_unlock(&mapping->private_lock); | ||
1798 | |||
1799 | if (newly_dirty) { | ||
1800 | /* sigh - __set_page_dirty() is static, so copy it here, too */ | ||
1801 | unsigned long flags; | ||
1802 | |||
1803 | spin_lock_irqsave(&mapping->tree_lock, flags); | ||
1804 | if (page->mapping) { /* Race with truncate? */ | ||
1805 | WARN_ON_ONCE(!PageUptodate(page)); | ||
1806 | account_page_dirtied(page, mapping); | ||
1807 | radix_tree_tag_set(&mapping->page_tree, | ||
1808 | page_index(page), PAGECACHE_TAG_DIRTY); | ||
1809 | } | ||
1810 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | ||
1811 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | ||
1812 | } | ||
1813 | return newly_dirty; | ||
1814 | } | ||
1815 | |||
1756 | const struct address_space_operations xfs_address_space_operations = { | 1816 | const struct address_space_operations xfs_address_space_operations = { |
1757 | .readpage = xfs_vm_readpage, | 1817 | .readpage = xfs_vm_readpage, |
1758 | .readpages = xfs_vm_readpages, | 1818 | .readpages = xfs_vm_readpages, |
1759 | .writepage = xfs_vm_writepage, | 1819 | .writepage = xfs_vm_writepage, |
1760 | .writepages = xfs_vm_writepages, | 1820 | .writepages = xfs_vm_writepages, |
1821 | .set_page_dirty = xfs_vm_set_page_dirty, | ||
1761 | .releasepage = xfs_vm_releasepage, | 1822 | .releasepage = xfs_vm_releasepage, |
1762 | .invalidatepage = xfs_vm_invalidatepage, | 1823 | .invalidatepage = xfs_vm_invalidatepage, |
1763 | .write_begin = xfs_vm_write_begin, | 1824 | .write_begin = xfs_vm_write_begin, |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2f1e30d39a35..1707980f9a4b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -1470,6 +1470,26 @@ xfs_collapse_file_space( | |||
1470 | start_fsb = XFS_B_TO_FSB(mp, offset + len); | 1470 | start_fsb = XFS_B_TO_FSB(mp, offset + len); |
1471 | shift_fsb = XFS_B_TO_FSB(mp, len); | 1471 | shift_fsb = XFS_B_TO_FSB(mp, len); |
1472 | 1472 | ||
1473 | /* | ||
1474 | * Writeback the entire file and force remove any post-eof blocks. The | ||
1475 | * writeback prevents changes to the extent list via concurrent | ||
1476 | * writeback and the eofblocks trim prevents the extent shift algorithm | ||
1477 | * from running into a post-eof delalloc extent. | ||
1478 | * | ||
1479 | * XXX: This is a temporary fix until the extent shift loop below is | ||
1480 | * converted to use offsets and lookups within the ILOCK rather than | ||
1481 | * carrying around the index into the extent list for the next | ||
1482 | * iteration. | ||
1483 | */ | ||
1484 | error = filemap_write_and_wait(VFS_I(ip)->i_mapping); | ||
1485 | if (error) | ||
1486 | return error; | ||
1487 | if (xfs_can_free_eofblocks(ip, true)) { | ||
1488 | error = xfs_free_eofblocks(mp, ip, false); | ||
1489 | if (error) | ||
1490 | return error; | ||
1491 | } | ||
1492 | |||
1473 | error = xfs_free_file_space(ip, offset, len); | 1493 | error = xfs_free_file_space(ip, offset, len); |
1474 | if (error) | 1494 | if (error) |
1475 | return error; | 1495 | return error; |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 076b1708d134..de5368c803f9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -291,12 +291,22 @@ xfs_file_read_iter( | |||
291 | if (inode->i_mapping->nrpages) { | 291 | if (inode->i_mapping->nrpages) { |
292 | ret = filemap_write_and_wait_range( | 292 | ret = filemap_write_and_wait_range( |
293 | VFS_I(ip)->i_mapping, | 293 | VFS_I(ip)->i_mapping, |
294 | pos, -1); | 294 | pos, pos + size - 1); |
295 | if (ret) { | 295 | if (ret) { |
296 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); | 296 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); |
297 | return ret; | 297 | return ret; |
298 | } | 298 | } |
299 | truncate_pagecache_range(VFS_I(ip), pos, -1); | 299 | |
300 | /* | ||
301 | * Invalidate whole pages. This can return an error if | ||
302 | * we fail to invalidate a page, but this should never | ||
303 | * happen on XFS. Warn if it does fail. | ||
304 | */ | ||
305 | ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, | ||
306 | pos >> PAGE_CACHE_SHIFT, | ||
307 | (pos + size - 1) >> PAGE_CACHE_SHIFT); | ||
308 | WARN_ON_ONCE(ret); | ||
309 | ret = 0; | ||
300 | } | 310 | } |
301 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); | 311 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
302 | } | 312 | } |
@@ -632,10 +642,19 @@ xfs_file_dio_aio_write( | |||
632 | 642 | ||
633 | if (mapping->nrpages) { | 643 | if (mapping->nrpages) { |
634 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | 644 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
635 | pos, -1); | 645 | pos, pos + count - 1); |
636 | if (ret) | 646 | if (ret) |
637 | goto out; | 647 | goto out; |
638 | truncate_pagecache_range(VFS_I(ip), pos, -1); | 648 | /* |
649 | * Invalidate whole pages. This can return an error if | ||
650 | * we fail to invalidate a page, but this should never | ||
651 | * happen on XFS. Warn if it does fail. | ||
652 | */ | ||
653 | ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, | ||
654 | pos >> PAGE_CACHE_SHIFT, | ||
655 | (pos + count - 1) >> PAGE_CACHE_SHIFT); | ||
656 | WARN_ON_ONCE(ret); | ||
657 | ret = 0; | ||
639 | } | 658 | } |
640 | 659 | ||
641 | /* | 660 | /* |