diff options
Diffstat (limited to 'fs')
171 files changed, 9130 insertions, 3632 deletions
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 099c7712631c..fb9ffcb43277 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h | |||
@@ -78,7 +78,6 @@ enum p9_cache_modes { | |||
78 | * @cache: cache mode of type &p9_cache_modes | 78 | * @cache: cache mode of type &p9_cache_modes |
79 | * @cachetag: the tag of the cache associated with this session | 79 | * @cachetag: the tag of the cache associated with this session |
80 | * @fscache: session cookie associated with FS-Cache | 80 | * @fscache: session cookie associated with FS-Cache |
81 | * @options: copy of options string given by user | ||
82 | * @uname: string user name to mount hierarchy as | 81 | * @uname: string user name to mount hierarchy as |
83 | * @aname: mount specifier for remote hierarchy | 82 | * @aname: mount specifier for remote hierarchy |
84 | * @maxdata: maximum data to be sent/recvd per protocol message | 83 | * @maxdata: maximum data to be sent/recvd per protocol message |
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index be35d05a4d0e..e9e04376c52c 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c | |||
@@ -231,9 +231,7 @@ static int v9fs_launder_page(struct page *page) | |||
231 | /** | 231 | /** |
232 | * v9fs_direct_IO - 9P address space operation for direct I/O | 232 | * v9fs_direct_IO - 9P address space operation for direct I/O |
233 | * @iocb: target I/O control block | 233 | * @iocb: target I/O control block |
234 | * @iov: array of vectors that define I/O buffer | ||
235 | * @pos: offset in file to begin the operation | 234 | * @pos: offset in file to begin the operation |
236 | * @nr_segs: size of iovec array | ||
237 | * | 235 | * |
238 | * The presence of v9fs_direct_IO() in the address space ops vector | 236 | * The presence of v9fs_direct_IO() in the address space ops vector |
239 | * allowes open() O_DIRECT flags which would have failed otherwise. | 237 | * allowes open() O_DIRECT flags which would have failed otherwise. |
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 2a9dd37dc426..1ef16bd8280b 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -151,7 +151,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
151 | { | 151 | { |
152 | struct p9_flock flock; | 152 | struct p9_flock flock; |
153 | struct p9_fid *fid; | 153 | struct p9_fid *fid; |
154 | uint8_t status; | 154 | uint8_t status = P9_LOCK_ERROR; |
155 | int res = 0; | 155 | int res = 0; |
156 | unsigned char fl_type; | 156 | unsigned char fl_type; |
157 | 157 | ||
@@ -196,7 +196,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
196 | for (;;) { | 196 | for (;;) { |
197 | res = p9_client_lock_dotl(fid, &flock, &status); | 197 | res = p9_client_lock_dotl(fid, &flock, &status); |
198 | if (res < 0) | 198 | if (res < 0) |
199 | break; | 199 | goto out_unlock; |
200 | 200 | ||
201 | if (status != P9_LOCK_BLOCKED) | 201 | if (status != P9_LOCK_BLOCKED) |
202 | break; | 202 | break; |
@@ -214,14 +214,16 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
214 | case P9_LOCK_BLOCKED: | 214 | case P9_LOCK_BLOCKED: |
215 | res = -EAGAIN; | 215 | res = -EAGAIN; |
216 | break; | 216 | break; |
217 | default: | ||
218 | WARN_ONCE(1, "unknown lock status code: %d\n", status); | ||
219 | /* fallthough */ | ||
217 | case P9_LOCK_ERROR: | 220 | case P9_LOCK_ERROR: |
218 | case P9_LOCK_GRACE: | 221 | case P9_LOCK_GRACE: |
219 | res = -ENOLCK; | 222 | res = -ENOLCK; |
220 | break; | 223 | break; |
221 | default: | ||
222 | BUG(); | ||
223 | } | 224 | } |
224 | 225 | ||
226 | out_unlock: | ||
225 | /* | 227 | /* |
226 | * incase server returned error for lock request, revert | 228 | * incase server returned error for lock request, revert |
227 | * it locally | 229 | * it locally |
diff --git a/fs/Kconfig b/fs/Kconfig index ec35851e5b71..011f43365d7b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -32,6 +32,7 @@ source "fs/gfs2/Kconfig" | |||
32 | source "fs/ocfs2/Kconfig" | 32 | source "fs/ocfs2/Kconfig" |
33 | source "fs/btrfs/Kconfig" | 33 | source "fs/btrfs/Kconfig" |
34 | source "fs/nilfs2/Kconfig" | 34 | source "fs/nilfs2/Kconfig" |
35 | source "fs/f2fs/Kconfig" | ||
35 | 36 | ||
36 | config FS_DAX | 37 | config FS_DAX |
37 | bool "Direct Access (DAX) support" | 38 | bool "Direct Access (DAX) support" |
@@ -217,7 +218,6 @@ source "fs/pstore/Kconfig" | |||
217 | source "fs/sysv/Kconfig" | 218 | source "fs/sysv/Kconfig" |
218 | source "fs/ufs/Kconfig" | 219 | source "fs/ufs/Kconfig" |
219 | source "fs/exofs/Kconfig" | 220 | source "fs/exofs/Kconfig" |
220 | source "fs/f2fs/Kconfig" | ||
221 | 221 | ||
222 | endif # MISC_FILESYSTEMS | 222 | endif # MISC_FILESYSTEMS |
223 | 223 | ||
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 4dabeb893b7c..df9932b00d08 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -87,7 +87,7 @@ BTRFS_WORK_HELPER(scrubwrc_helper); | |||
87 | BTRFS_WORK_HELPER(scrubnc_helper); | 87 | BTRFS_WORK_HELPER(scrubnc_helper); |
88 | 88 | ||
89 | static struct __btrfs_workqueue * | 89 | static struct __btrfs_workqueue * |
90 | __btrfs_alloc_workqueue(const char *name, int flags, int max_active, | 90 | __btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active, |
91 | int thresh) | 91 | int thresh) |
92 | { | 92 | { |
93 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | 93 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); |
@@ -132,7 +132,7 @@ static inline void | |||
132 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); | 132 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); |
133 | 133 | ||
134 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, | 134 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
135 | int flags, | 135 | unsigned int flags, |
136 | int max_active, | 136 | int max_active, |
137 | int thresh) | 137 | int thresh) |
138 | { | 138 | { |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index e386c29ef1f6..ec2ee477f8ba 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -66,7 +66,7 @@ BTRFS_WORK_HELPER_PROTO(scrubwrc_helper); | |||
66 | BTRFS_WORK_HELPER_PROTO(scrubnc_helper); | 66 | BTRFS_WORK_HELPER_PROTO(scrubnc_helper); |
67 | 67 | ||
68 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, | 68 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
69 | int flags, | 69 | unsigned int flags, |
70 | int max_active, | 70 | int max_active, |
71 | int thresh); | 71 | int thresh); |
72 | void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, | 72 | void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f55721ff9385..9de772ee0031 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -1206,7 +1206,7 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans, | |||
1206 | struct ulist *roots = NULL; | 1206 | struct ulist *roots = NULL; |
1207 | struct ulist_iterator uiter; | 1207 | struct ulist_iterator uiter; |
1208 | struct ulist_node *node; | 1208 | struct ulist_node *node; |
1209 | struct seq_list elem = {}; | 1209 | struct seq_list elem = SEQ_LIST_INIT(elem); |
1210 | int ret = 0; | 1210 | int ret = 0; |
1211 | 1211 | ||
1212 | tmp = ulist_alloc(GFP_NOFS); | 1212 | tmp = ulist_alloc(GFP_NOFS); |
@@ -1610,7 +1610,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1610 | struct ulist *roots = NULL; | 1610 | struct ulist *roots = NULL; |
1611 | struct ulist_node *ref_node = NULL; | 1611 | struct ulist_node *ref_node = NULL; |
1612 | struct ulist_node *root_node = NULL; | 1612 | struct ulist_node *root_node = NULL; |
1613 | struct seq_list tree_mod_seq_elem = {}; | 1613 | struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); |
1614 | struct ulist_iterator ref_uiter; | 1614 | struct ulist_iterator ref_uiter; |
1615 | struct ulist_iterator root_uiter; | 1615 | struct ulist_iterator root_uiter; |
1616 | 1616 | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index de5e4f2adfea..0ef5cc13fae2 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -66,7 +66,11 @@ struct btrfs_inode { | |||
66 | */ | 66 | */ |
67 | struct btrfs_key location; | 67 | struct btrfs_key location; |
68 | 68 | ||
69 | /* Lock for counters */ | 69 | /* |
70 | * Lock for counters and all fields used to determine if the inode is in | ||
71 | * the log or not (last_trans, last_sub_trans, last_log_commit, | ||
72 | * logged_trans). | ||
73 | */ | ||
70 | spinlock_t lock; | 74 | spinlock_t lock; |
71 | 75 | ||
72 | /* the extent_tree has caches of all the extent mappings to disk */ | 76 | /* the extent_tree has caches of all the extent mappings to disk */ |
@@ -250,6 +254,9 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode) | |||
250 | 254 | ||
251 | static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) | 255 | static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) |
252 | { | 256 | { |
257 | int ret = 0; | ||
258 | |||
259 | spin_lock(&BTRFS_I(inode)->lock); | ||
253 | if (BTRFS_I(inode)->logged_trans == generation && | 260 | if (BTRFS_I(inode)->logged_trans == generation && |
254 | BTRFS_I(inode)->last_sub_trans <= | 261 | BTRFS_I(inode)->last_sub_trans <= |
255 | BTRFS_I(inode)->last_log_commit && | 262 | BTRFS_I(inode)->last_log_commit && |
@@ -263,9 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) | |||
263 | */ | 270 | */ |
264 | smp_mb(); | 271 | smp_mb(); |
265 | if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents)) | 272 | if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents)) |
266 | return 1; | 273 | ret = 1; |
267 | } | 274 | } |
268 | return 0; | 275 | spin_unlock(&BTRFS_I(inode)->lock); |
276 | return ret; | ||
269 | } | 277 | } |
270 | 278 | ||
271 | #define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1 | 279 | #define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1 |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index d897ef803b3b..ce7dec88f4b8 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -2990,8 +2990,8 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) | |||
2990 | (unsigned long long)bio->bi_iter.bi_sector, | 2990 | (unsigned long long)bio->bi_iter.bi_sector, |
2991 | dev_bytenr, bio->bi_bdev); | 2991 | dev_bytenr, bio->bi_bdev); |
2992 | 2992 | ||
2993 | mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, | 2993 | mapped_datav = kmalloc_array(bio->bi_vcnt, |
2994 | GFP_NOFS); | 2994 | sizeof(*mapped_datav), GFP_NOFS); |
2995 | if (!mapped_datav) | 2995 | if (!mapped_datav) |
2996 | goto leave; | 2996 | goto leave; |
2997 | cur_bytenr = dev_bytenr; | 2997 | cur_bytenr = dev_bytenr; |
@@ -3241,8 +3241,5 @@ void btrfsic_unmount(struct btrfs_root *root, | |||
3241 | 3241 | ||
3242 | mutex_unlock(&btrfsic_mutex); | 3242 | mutex_unlock(&btrfsic_mutex); |
3243 | 3243 | ||
3244 | if (is_vmalloc_addr(state)) | 3244 | kvfree(state); |
3245 | vfree(state); | ||
3246 | else | ||
3247 | kfree(state); | ||
3248 | } | 3245 | } |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index e9df8862012c..ce62324c78e7 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -622,7 +622,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
622 | cb->orig_bio = bio; | 622 | cb->orig_bio = bio; |
623 | 623 | ||
624 | nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE); | 624 | nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE); |
625 | cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, | 625 | cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *), |
626 | GFP_NOFS); | 626 | GFP_NOFS); |
627 | if (!cb->compressed_pages) | 627 | if (!cb->compressed_pages) |
628 | goto fail1; | 628 | goto fail1; |
@@ -750,7 +750,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; | |||
750 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; | 750 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; |
751 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; | 751 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; |
752 | 752 | ||
753 | static struct btrfs_compress_op *btrfs_compress_op[] = { | 753 | static const struct btrfs_compress_op * const btrfs_compress_op[] = { |
754 | &btrfs_zlib_compress, | 754 | &btrfs_zlib_compress, |
755 | &btrfs_lzo_compress, | 755 | &btrfs_lzo_compress, |
756 | }; | 756 | }; |
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index d181f70caae0..13a4dc0436c9 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
@@ -77,7 +77,7 @@ struct btrfs_compress_op { | |||
77 | size_t srclen, size_t destlen); | 77 | size_t srclen, size_t destlen); |
78 | }; | 78 | }; |
79 | 79 | ||
80 | extern struct btrfs_compress_op btrfs_zlib_compress; | 80 | extern const struct btrfs_compress_op btrfs_zlib_compress; |
81 | extern struct btrfs_compress_op btrfs_lzo_compress; | 81 | extern const struct btrfs_compress_op btrfs_lzo_compress; |
82 | 82 | ||
83 | #endif | 83 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6d67f32e648d..0f11ebc92f02 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -578,7 +578,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | |||
578 | if (!tree_mod_need_log(fs_info, eb)) | 578 | if (!tree_mod_need_log(fs_info, eb)) |
579 | return 0; | 579 | return 0; |
580 | 580 | ||
581 | tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags); | 581 | tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags); |
582 | if (!tm_list) | 582 | if (!tm_list) |
583 | return -ENOMEM; | 583 | return -ENOMEM; |
584 | 584 | ||
@@ -677,7 +677,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | |||
677 | 677 | ||
678 | if (log_removal && btrfs_header_level(old_root) > 0) { | 678 | if (log_removal && btrfs_header_level(old_root) > 0) { |
679 | nritems = btrfs_header_nritems(old_root); | 679 | nritems = btrfs_header_nritems(old_root); |
680 | tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *), | 680 | tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), |
681 | flags); | 681 | flags); |
682 | if (!tm_list) { | 682 | if (!tm_list) { |
683 | ret = -ENOMEM; | 683 | ret = -ENOMEM; |
@@ -814,7 +814,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | |||
814 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) | 814 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) |
815 | return 0; | 815 | return 0; |
816 | 816 | ||
817 | tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *), | 817 | tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *), |
818 | GFP_NOFS); | 818 | GFP_NOFS); |
819 | if (!tm_list) | 819 | if (!tm_list) |
820 | return -ENOMEM; | 820 | return -ENOMEM; |
@@ -905,8 +905,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) | |||
905 | return 0; | 905 | return 0; |
906 | 906 | ||
907 | nritems = btrfs_header_nritems(eb); | 907 | nritems = btrfs_header_nritems(eb); |
908 | tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *), | 908 | tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS); |
909 | GFP_NOFS); | ||
910 | if (!tm_list) | 909 | if (!tm_list) |
911 | return -ENOMEM; | 910 | return -ENOMEM; |
912 | 911 | ||
@@ -1073,7 +1072,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
1073 | ret = btrfs_dec_ref(trans, root, buf, 1); | 1072 | ret = btrfs_dec_ref(trans, root, buf, 1); |
1074 | BUG_ON(ret); /* -ENOMEM */ | 1073 | BUG_ON(ret); /* -ENOMEM */ |
1075 | } | 1074 | } |
1076 | clean_tree_block(trans, root, buf); | 1075 | clean_tree_block(trans, root->fs_info, buf); |
1077 | *last_ref = 1; | 1076 | *last_ref = 1; |
1078 | } | 1077 | } |
1079 | return 0; | 1078 | return 0; |
@@ -1678,7 +1677,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
1678 | continue; | 1677 | continue; |
1679 | } | 1678 | } |
1680 | 1679 | ||
1681 | cur = btrfs_find_tree_block(root, blocknr); | 1680 | cur = btrfs_find_tree_block(root->fs_info, blocknr); |
1682 | if (cur) | 1681 | if (cur) |
1683 | uptodate = btrfs_buffer_uptodate(cur, gen, 0); | 1682 | uptodate = btrfs_buffer_uptodate(cur, gen, 0); |
1684 | else | 1683 | else |
@@ -1943,7 +1942,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1943 | 1942 | ||
1944 | path->locks[level] = 0; | 1943 | path->locks[level] = 0; |
1945 | path->nodes[level] = NULL; | 1944 | path->nodes[level] = NULL; |
1946 | clean_tree_block(trans, root, mid); | 1945 | clean_tree_block(trans, root->fs_info, mid); |
1947 | btrfs_tree_unlock(mid); | 1946 | btrfs_tree_unlock(mid); |
1948 | /* once for the path */ | 1947 | /* once for the path */ |
1949 | free_extent_buffer(mid); | 1948 | free_extent_buffer(mid); |
@@ -1997,7 +1996,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1997 | if (wret < 0 && wret != -ENOSPC) | 1996 | if (wret < 0 && wret != -ENOSPC) |
1998 | ret = wret; | 1997 | ret = wret; |
1999 | if (btrfs_header_nritems(right) == 0) { | 1998 | if (btrfs_header_nritems(right) == 0) { |
2000 | clean_tree_block(trans, root, right); | 1999 | clean_tree_block(trans, root->fs_info, right); |
2001 | btrfs_tree_unlock(right); | 2000 | btrfs_tree_unlock(right); |
2002 | del_ptr(root, path, level + 1, pslot + 1); | 2001 | del_ptr(root, path, level + 1, pslot + 1); |
2003 | root_sub_used(root, right->len); | 2002 | root_sub_used(root, right->len); |
@@ -2041,7 +2040,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
2041 | BUG_ON(wret == 1); | 2040 | BUG_ON(wret == 1); |
2042 | } | 2041 | } |
2043 | if (btrfs_header_nritems(mid) == 0) { | 2042 | if (btrfs_header_nritems(mid) == 0) { |
2044 | clean_tree_block(trans, root, mid); | 2043 | clean_tree_block(trans, root->fs_info, mid); |
2045 | btrfs_tree_unlock(mid); | 2044 | btrfs_tree_unlock(mid); |
2046 | del_ptr(root, path, level + 1, pslot); | 2045 | del_ptr(root, path, level + 1, pslot); |
2047 | root_sub_used(root, mid->len); | 2046 | root_sub_used(root, mid->len); |
@@ -2259,7 +2258,7 @@ static void reada_for_search(struct btrfs_root *root, | |||
2259 | 2258 | ||
2260 | search = btrfs_node_blockptr(node, slot); | 2259 | search = btrfs_node_blockptr(node, slot); |
2261 | blocksize = root->nodesize; | 2260 | blocksize = root->nodesize; |
2262 | eb = btrfs_find_tree_block(root, search); | 2261 | eb = btrfs_find_tree_block(root->fs_info, search); |
2263 | if (eb) { | 2262 | if (eb) { |
2264 | free_extent_buffer(eb); | 2263 | free_extent_buffer(eb); |
2265 | return; | 2264 | return; |
@@ -2319,7 +2318,7 @@ static noinline void reada_for_balance(struct btrfs_root *root, | |||
2319 | if (slot > 0) { | 2318 | if (slot > 0) { |
2320 | block1 = btrfs_node_blockptr(parent, slot - 1); | 2319 | block1 = btrfs_node_blockptr(parent, slot - 1); |
2321 | gen = btrfs_node_ptr_generation(parent, slot - 1); | 2320 | gen = btrfs_node_ptr_generation(parent, slot - 1); |
2322 | eb = btrfs_find_tree_block(root, block1); | 2321 | eb = btrfs_find_tree_block(root->fs_info, block1); |
2323 | /* | 2322 | /* |
2324 | * if we get -eagain from btrfs_buffer_uptodate, we | 2323 | * if we get -eagain from btrfs_buffer_uptodate, we |
2325 | * don't want to return eagain here. That will loop | 2324 | * don't want to return eagain here. That will loop |
@@ -2332,7 +2331,7 @@ static noinline void reada_for_balance(struct btrfs_root *root, | |||
2332 | if (slot + 1 < nritems) { | 2331 | if (slot + 1 < nritems) { |
2333 | block2 = btrfs_node_blockptr(parent, slot + 1); | 2332 | block2 = btrfs_node_blockptr(parent, slot + 1); |
2334 | gen = btrfs_node_ptr_generation(parent, slot + 1); | 2333 | gen = btrfs_node_ptr_generation(parent, slot + 1); |
2335 | eb = btrfs_find_tree_block(root, block2); | 2334 | eb = btrfs_find_tree_block(root->fs_info, block2); |
2336 | if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) | 2335 | if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) |
2337 | block2 = 0; | 2336 | block2 = 0; |
2338 | free_extent_buffer(eb); | 2337 | free_extent_buffer(eb); |
@@ -2450,7 +2449,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
2450 | blocknr = btrfs_node_blockptr(b, slot); | 2449 | blocknr = btrfs_node_blockptr(b, slot); |
2451 | gen = btrfs_node_ptr_generation(b, slot); | 2450 | gen = btrfs_node_ptr_generation(b, slot); |
2452 | 2451 | ||
2453 | tmp = btrfs_find_tree_block(root, blocknr); | 2452 | tmp = btrfs_find_tree_block(root->fs_info, blocknr); |
2454 | if (tmp) { | 2453 | if (tmp) { |
2455 | /* first we do an atomic uptodate check */ | 2454 | /* first we do an atomic uptodate check */ |
2456 | if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { | 2455 | if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { |
@@ -3126,7 +3125,8 @@ again: | |||
3126 | * higher levels | 3125 | * higher levels |
3127 | * | 3126 | * |
3128 | */ | 3127 | */ |
3129 | static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path, | 3128 | static void fixup_low_keys(struct btrfs_fs_info *fs_info, |
3129 | struct btrfs_path *path, | ||
3130 | struct btrfs_disk_key *key, int level) | 3130 | struct btrfs_disk_key *key, int level) |
3131 | { | 3131 | { |
3132 | int i; | 3132 | int i; |
@@ -3137,7 +3137,7 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path, | |||
3137 | if (!path->nodes[i]) | 3137 | if (!path->nodes[i]) |
3138 | break; | 3138 | break; |
3139 | t = path->nodes[i]; | 3139 | t = path->nodes[i]; |
3140 | tree_mod_log_set_node_key(root->fs_info, t, tslot, 1); | 3140 | tree_mod_log_set_node_key(fs_info, t, tslot, 1); |
3141 | btrfs_set_node_key(t, key, tslot); | 3141 | btrfs_set_node_key(t, key, tslot); |
3142 | btrfs_mark_buffer_dirty(path->nodes[i]); | 3142 | btrfs_mark_buffer_dirty(path->nodes[i]); |
3143 | if (tslot != 0) | 3143 | if (tslot != 0) |
@@ -3151,7 +3151,8 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path, | |||
3151 | * This function isn't completely safe. It's the caller's responsibility | 3151 | * This function isn't completely safe. It's the caller's responsibility |
3152 | * that the new key won't break the order | 3152 | * that the new key won't break the order |
3153 | */ | 3153 | */ |
3154 | void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, | 3154 | void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, |
3155 | struct btrfs_path *path, | ||
3155 | struct btrfs_key *new_key) | 3156 | struct btrfs_key *new_key) |
3156 | { | 3157 | { |
3157 | struct btrfs_disk_key disk_key; | 3158 | struct btrfs_disk_key disk_key; |
@@ -3173,7 +3174,7 @@ void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, | |||
3173 | btrfs_set_item_key(eb, &disk_key, slot); | 3174 | btrfs_set_item_key(eb, &disk_key, slot); |
3174 | btrfs_mark_buffer_dirty(eb); | 3175 | btrfs_mark_buffer_dirty(eb); |
3175 | if (slot == 0) | 3176 | if (slot == 0) |
3176 | fixup_low_keys(root, path, &disk_key, 1); | 3177 | fixup_low_keys(fs_info, path, &disk_key, 1); |
3177 | } | 3178 | } |
3178 | 3179 | ||
3179 | /* | 3180 | /* |
@@ -3692,7 +3693,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
3692 | if (left_nritems) | 3693 | if (left_nritems) |
3693 | btrfs_mark_buffer_dirty(left); | 3694 | btrfs_mark_buffer_dirty(left); |
3694 | else | 3695 | else |
3695 | clean_tree_block(trans, root, left); | 3696 | clean_tree_block(trans, root->fs_info, left); |
3696 | 3697 | ||
3697 | btrfs_mark_buffer_dirty(right); | 3698 | btrfs_mark_buffer_dirty(right); |
3698 | 3699 | ||
@@ -3704,7 +3705,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
3704 | if (path->slots[0] >= left_nritems) { | 3705 | if (path->slots[0] >= left_nritems) { |
3705 | path->slots[0] -= left_nritems; | 3706 | path->slots[0] -= left_nritems; |
3706 | if (btrfs_header_nritems(path->nodes[0]) == 0) | 3707 | if (btrfs_header_nritems(path->nodes[0]) == 0) |
3707 | clean_tree_block(trans, root, path->nodes[0]); | 3708 | clean_tree_block(trans, root->fs_info, path->nodes[0]); |
3708 | btrfs_tree_unlock(path->nodes[0]); | 3709 | btrfs_tree_unlock(path->nodes[0]); |
3709 | free_extent_buffer(path->nodes[0]); | 3710 | free_extent_buffer(path->nodes[0]); |
3710 | path->nodes[0] = right; | 3711 | path->nodes[0] = right; |
@@ -3928,10 +3929,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
3928 | if (right_nritems) | 3929 | if (right_nritems) |
3929 | btrfs_mark_buffer_dirty(right); | 3930 | btrfs_mark_buffer_dirty(right); |
3930 | else | 3931 | else |
3931 | clean_tree_block(trans, root, right); | 3932 | clean_tree_block(trans, root->fs_info, right); |
3932 | 3933 | ||
3933 | btrfs_item_key(right, &disk_key, 0); | 3934 | btrfs_item_key(right, &disk_key, 0); |
3934 | fixup_low_keys(root, path, &disk_key, 1); | 3935 | fixup_low_keys(root->fs_info, path, &disk_key, 1); |
3935 | 3936 | ||
3936 | /* then fixup the leaf pointer in the path */ | 3937 | /* then fixup the leaf pointer in the path */ |
3937 | if (path->slots[0] < push_items) { | 3938 | if (path->slots[0] < push_items) { |
@@ -4168,6 +4169,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
4168 | int mid; | 4169 | int mid; |
4169 | int slot; | 4170 | int slot; |
4170 | struct extent_buffer *right; | 4171 | struct extent_buffer *right; |
4172 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
4171 | int ret = 0; | 4173 | int ret = 0; |
4172 | int wret; | 4174 | int wret; |
4173 | int split; | 4175 | int split; |
@@ -4271,10 +4273,10 @@ again: | |||
4271 | btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); | 4273 | btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); |
4272 | btrfs_set_header_owner(right, root->root_key.objectid); | 4274 | btrfs_set_header_owner(right, root->root_key.objectid); |
4273 | btrfs_set_header_level(right, 0); | 4275 | btrfs_set_header_level(right, 0); |
4274 | write_extent_buffer(right, root->fs_info->fsid, | 4276 | write_extent_buffer(right, fs_info->fsid, |
4275 | btrfs_header_fsid(), BTRFS_FSID_SIZE); | 4277 | btrfs_header_fsid(), BTRFS_FSID_SIZE); |
4276 | 4278 | ||
4277 | write_extent_buffer(right, root->fs_info->chunk_tree_uuid, | 4279 | write_extent_buffer(right, fs_info->chunk_tree_uuid, |
4278 | btrfs_header_chunk_tree_uuid(right), | 4280 | btrfs_header_chunk_tree_uuid(right), |
4279 | BTRFS_UUID_SIZE); | 4281 | BTRFS_UUID_SIZE); |
4280 | 4282 | ||
@@ -4297,7 +4299,7 @@ again: | |||
4297 | path->nodes[0] = right; | 4299 | path->nodes[0] = right; |
4298 | path->slots[0] = 0; | 4300 | path->slots[0] = 0; |
4299 | if (path->slots[1] == 0) | 4301 | if (path->slots[1] == 0) |
4300 | fixup_low_keys(root, path, &disk_key, 1); | 4302 | fixup_low_keys(fs_info, path, &disk_key, 1); |
4301 | } | 4303 | } |
4302 | btrfs_mark_buffer_dirty(right); | 4304 | btrfs_mark_buffer_dirty(right); |
4303 | return ret; | 4305 | return ret; |
@@ -4615,7 +4617,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path, | |||
4615 | btrfs_set_disk_key_offset(&disk_key, offset + size_diff); | 4617 | btrfs_set_disk_key_offset(&disk_key, offset + size_diff); |
4616 | btrfs_set_item_key(leaf, &disk_key, slot); | 4618 | btrfs_set_item_key(leaf, &disk_key, slot); |
4617 | if (slot == 0) | 4619 | if (slot == 0) |
4618 | fixup_low_keys(root, path, &disk_key, 1); | 4620 | fixup_low_keys(root->fs_info, path, &disk_key, 1); |
4619 | } | 4621 | } |
4620 | 4622 | ||
4621 | item = btrfs_item_nr(slot); | 4623 | item = btrfs_item_nr(slot); |
@@ -4716,7 +4718,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, | |||
4716 | 4718 | ||
4717 | if (path->slots[0] == 0) { | 4719 | if (path->slots[0] == 0) { |
4718 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | 4720 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); |
4719 | fixup_low_keys(root, path, &disk_key, 1); | 4721 | fixup_low_keys(root->fs_info, path, &disk_key, 1); |
4720 | } | 4722 | } |
4721 | btrfs_unlock_up_safe(path, 1); | 4723 | btrfs_unlock_up_safe(path, 1); |
4722 | 4724 | ||
@@ -4888,7 +4890,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, | |||
4888 | struct btrfs_disk_key disk_key; | 4890 | struct btrfs_disk_key disk_key; |
4889 | 4891 | ||
4890 | btrfs_node_key(parent, &disk_key, 0); | 4892 | btrfs_node_key(parent, &disk_key, 0); |
4891 | fixup_low_keys(root, path, &disk_key, level + 1); | 4893 | fixup_low_keys(root->fs_info, path, &disk_key, level + 1); |
4892 | } | 4894 | } |
4893 | btrfs_mark_buffer_dirty(parent); | 4895 | btrfs_mark_buffer_dirty(parent); |
4894 | } | 4896 | } |
@@ -4981,7 +4983,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
4981 | btrfs_set_header_level(leaf, 0); | 4983 | btrfs_set_header_level(leaf, 0); |
4982 | } else { | 4984 | } else { |
4983 | btrfs_set_path_blocking(path); | 4985 | btrfs_set_path_blocking(path); |
4984 | clean_tree_block(trans, root, leaf); | 4986 | clean_tree_block(trans, root->fs_info, leaf); |
4985 | btrfs_del_leaf(trans, root, path, leaf); | 4987 | btrfs_del_leaf(trans, root, path, leaf); |
4986 | } | 4988 | } |
4987 | } else { | 4989 | } else { |
@@ -4990,7 +4992,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
4990 | struct btrfs_disk_key disk_key; | 4992 | struct btrfs_disk_key disk_key; |
4991 | 4993 | ||
4992 | btrfs_item_key(leaf, &disk_key, 0); | 4994 | btrfs_item_key(leaf, &disk_key, 0); |
4993 | fixup_low_keys(root, path, &disk_key, 1); | 4995 | fixup_low_keys(root->fs_info, path, &disk_key, 1); |
4994 | } | 4996 | } |
4995 | 4997 | ||
4996 | /* delete the leaf if it is mostly empty */ | 4998 | /* delete the leaf if it is mostly empty */ |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f9c89cae39ee..6f364e1d8d3d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -1061,6 +1061,12 @@ struct btrfs_block_group_item { | |||
1061 | __le64 flags; | 1061 | __le64 flags; |
1062 | } __attribute__ ((__packed__)); | 1062 | } __attribute__ ((__packed__)); |
1063 | 1063 | ||
1064 | #define BTRFS_QGROUP_LEVEL_SHIFT 48 | ||
1065 | static inline u64 btrfs_qgroup_level(u64 qgroupid) | ||
1066 | { | ||
1067 | return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT; | ||
1068 | } | ||
1069 | |||
1064 | /* | 1070 | /* |
1065 | * is subvolume quota turned on? | 1071 | * is subvolume quota turned on? |
1066 | */ | 1072 | */ |
@@ -1256,6 +1262,20 @@ struct btrfs_caching_control { | |||
1256 | atomic_t count; | 1262 | atomic_t count; |
1257 | }; | 1263 | }; |
1258 | 1264 | ||
1265 | struct btrfs_io_ctl { | ||
1266 | void *cur, *orig; | ||
1267 | struct page *page; | ||
1268 | struct page **pages; | ||
1269 | struct btrfs_root *root; | ||
1270 | struct inode *inode; | ||
1271 | unsigned long size; | ||
1272 | int index; | ||
1273 | int num_pages; | ||
1274 | int entries; | ||
1275 | int bitmaps; | ||
1276 | unsigned check_crcs:1; | ||
1277 | }; | ||
1278 | |||
1259 | struct btrfs_block_group_cache { | 1279 | struct btrfs_block_group_cache { |
1260 | struct btrfs_key key; | 1280 | struct btrfs_key key; |
1261 | struct btrfs_block_group_item item; | 1281 | struct btrfs_block_group_item item; |
@@ -1321,6 +1341,9 @@ struct btrfs_block_group_cache { | |||
1321 | 1341 | ||
1322 | /* For dirty block groups */ | 1342 | /* For dirty block groups */ |
1323 | struct list_head dirty_list; | 1343 | struct list_head dirty_list; |
1344 | struct list_head io_list; | ||
1345 | |||
1346 | struct btrfs_io_ctl io_ctl; | ||
1324 | }; | 1347 | }; |
1325 | 1348 | ||
1326 | /* delayed seq elem */ | 1349 | /* delayed seq elem */ |
@@ -1329,6 +1352,8 @@ struct seq_list { | |||
1329 | u64 seq; | 1352 | u64 seq; |
1330 | }; | 1353 | }; |
1331 | 1354 | ||
1355 | #define SEQ_LIST_INIT(name) { .list = LIST_HEAD_INIT((name).list), .seq = 0 } | ||
1356 | |||
1332 | enum btrfs_orphan_cleanup_state { | 1357 | enum btrfs_orphan_cleanup_state { |
1333 | ORPHAN_CLEANUP_STARTED = 1, | 1358 | ORPHAN_CLEANUP_STARTED = 1, |
1334 | ORPHAN_CLEANUP_DONE = 2, | 1359 | ORPHAN_CLEANUP_DONE = 2, |
@@ -1472,6 +1497,12 @@ struct btrfs_fs_info { | |||
1472 | struct mutex chunk_mutex; | 1497 | struct mutex chunk_mutex; |
1473 | struct mutex volume_mutex; | 1498 | struct mutex volume_mutex; |
1474 | 1499 | ||
1500 | /* | ||
1501 | * this is taken to make sure we don't set block groups ro after | ||
1502 | * the free space cache has been allocated on them | ||
1503 | */ | ||
1504 | struct mutex ro_block_group_mutex; | ||
1505 | |||
1475 | /* this is used during read/modify/write to make sure | 1506 | /* this is used during read/modify/write to make sure |
1476 | * no two ios are trying to mod the same stripe at the same | 1507 | * no two ios are trying to mod the same stripe at the same |
1477 | * time | 1508 | * time |
@@ -1513,6 +1544,7 @@ struct btrfs_fs_info { | |||
1513 | 1544 | ||
1514 | spinlock_t delayed_iput_lock; | 1545 | spinlock_t delayed_iput_lock; |
1515 | struct list_head delayed_iputs; | 1546 | struct list_head delayed_iputs; |
1547 | struct rw_semaphore delayed_iput_sem; | ||
1516 | 1548 | ||
1517 | /* this protects tree_mod_seq_list */ | 1549 | /* this protects tree_mod_seq_list */ |
1518 | spinlock_t tree_mod_seq_lock; | 1550 | spinlock_t tree_mod_seq_lock; |
@@ -3295,6 +3327,9 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) | |||
3295 | } | 3327 | } |
3296 | 3328 | ||
3297 | /* extent-tree.c */ | 3329 | /* extent-tree.c */ |
3330 | |||
3331 | u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes); | ||
3332 | |||
3298 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | 3333 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, |
3299 | unsigned num_items) | 3334 | unsigned num_items) |
3300 | { | 3335 | { |
@@ -3385,6 +3420,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
3385 | u64 bytenr, u64 num_bytes, u64 parent, | 3420 | u64 bytenr, u64 num_bytes, u64 parent, |
3386 | u64 root_objectid, u64 owner, u64 offset, int no_quota); | 3421 | u64 root_objectid, u64 owner, u64 offset, int no_quota); |
3387 | 3422 | ||
3423 | int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, | ||
3424 | struct btrfs_root *root); | ||
3388 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 3425 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
3389 | struct btrfs_root *root); | 3426 | struct btrfs_root *root); |
3390 | int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, | 3427 | int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, |
@@ -3417,7 +3454,7 @@ enum btrfs_reserve_flush_enum { | |||
3417 | BTRFS_RESERVE_FLUSH_ALL, | 3454 | BTRFS_RESERVE_FLUSH_ALL, |
3418 | }; | 3455 | }; |
3419 | 3456 | ||
3420 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 3457 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes); |
3421 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); | 3458 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
3422 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 3459 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
3423 | struct btrfs_root *root); | 3460 | struct btrfs_root *root); |
@@ -3440,6 +3477,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root, | |||
3440 | unsigned short type); | 3477 | unsigned short type); |
3441 | void btrfs_free_block_rsv(struct btrfs_root *root, | 3478 | void btrfs_free_block_rsv(struct btrfs_root *root, |
3442 | struct btrfs_block_rsv *rsv); | 3479 | struct btrfs_block_rsv *rsv); |
3480 | void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv); | ||
3443 | int btrfs_block_rsv_add(struct btrfs_root *root, | 3481 | int btrfs_block_rsv_add(struct btrfs_root *root, |
3444 | struct btrfs_block_rsv *block_rsv, u64 num_bytes, | 3482 | struct btrfs_block_rsv *block_rsv, u64 num_bytes, |
3445 | enum btrfs_reserve_flush_enum flush); | 3483 | enum btrfs_reserve_flush_enum flush); |
@@ -3486,7 +3524,8 @@ int btrfs_previous_item(struct btrfs_root *root, | |||
3486 | int type); | 3524 | int type); |
3487 | int btrfs_previous_extent_item(struct btrfs_root *root, | 3525 | int btrfs_previous_extent_item(struct btrfs_root *root, |
3488 | struct btrfs_path *path, u64 min_objectid); | 3526 | struct btrfs_path *path, u64 min_objectid); |
3489 | void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, | 3527 | void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, |
3528 | struct btrfs_path *path, | ||
3490 | struct btrfs_key *new_key); | 3529 | struct btrfs_key *new_key); |
3491 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root); | 3530 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root); |
3492 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); | 3531 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); |
@@ -4180,7 +4219,8 @@ int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | |||
4180 | static inline int is_fstree(u64 rootid) | 4219 | static inline int is_fstree(u64 rootid) |
4181 | { | 4220 | { |
4182 | if (rootid == BTRFS_FS_TREE_OBJECTID || | 4221 | if (rootid == BTRFS_FS_TREE_OBJECTID || |
4183 | (s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | 4222 | ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID && |
4223 | !btrfs_qgroup_level(rootid))) | ||
4184 | return 1; | 4224 | return 1; |
4185 | return 0; | 4225 | return 0; |
4186 | } | 4226 | } |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 82f0c7c95474..cde698a07d21 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -1383,7 +1383,7 @@ out: | |||
1383 | 1383 | ||
1384 | 1384 | ||
1385 | static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, | 1385 | static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, |
1386 | struct btrfs_root *root, int nr) | 1386 | struct btrfs_fs_info *fs_info, int nr) |
1387 | { | 1387 | { |
1388 | struct btrfs_async_delayed_work *async_work; | 1388 | struct btrfs_async_delayed_work *async_work; |
1389 | 1389 | ||
@@ -1399,7 +1399,7 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, | |||
1399 | btrfs_async_run_delayed_root, NULL, NULL); | 1399 | btrfs_async_run_delayed_root, NULL, NULL); |
1400 | async_work->nr = nr; | 1400 | async_work->nr = nr; |
1401 | 1401 | ||
1402 | btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); | 1402 | btrfs_queue_work(fs_info->delayed_workers, &async_work->work); |
1403 | return 0; | 1403 | return 0; |
1404 | } | 1404 | } |
1405 | 1405 | ||
@@ -1426,6 +1426,7 @@ static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq) | |||
1426 | void btrfs_balance_delayed_items(struct btrfs_root *root) | 1426 | void btrfs_balance_delayed_items(struct btrfs_root *root) |
1427 | { | 1427 | { |
1428 | struct btrfs_delayed_root *delayed_root; | 1428 | struct btrfs_delayed_root *delayed_root; |
1429 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1429 | 1430 | ||
1430 | delayed_root = btrfs_get_delayed_root(root); | 1431 | delayed_root = btrfs_get_delayed_root(root); |
1431 | 1432 | ||
@@ -1438,7 +1439,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root) | |||
1438 | 1439 | ||
1439 | seq = atomic_read(&delayed_root->items_seq); | 1440 | seq = atomic_read(&delayed_root->items_seq); |
1440 | 1441 | ||
1441 | ret = btrfs_wq_run_delayed_node(delayed_root, root, 0); | 1442 | ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0); |
1442 | if (ret) | 1443 | if (ret) |
1443 | return; | 1444 | return; |
1444 | 1445 | ||
@@ -1447,7 +1448,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root) | |||
1447 | return; | 1448 | return; |
1448 | } | 1449 | } |
1449 | 1450 | ||
1450 | btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH); | 1451 | btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH); |
1451 | } | 1452 | } |
1452 | 1453 | ||
1453 | /* Will return 0 or -ENOMEM */ | 1454 | /* Will return 0 or -ENOMEM */ |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 6d16bea94e1c..8f8ed7d20bac 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -489,11 +489,13 @@ update_existing_ref(struct btrfs_trans_handle *trans, | |||
489 | * existing and update must have the same bytenr | 489 | * existing and update must have the same bytenr |
490 | */ | 490 | */ |
491 | static noinline void | 491 | static noinline void |
492 | update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | 492 | update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, |
493 | struct btrfs_delayed_ref_node *existing, | ||
493 | struct btrfs_delayed_ref_node *update) | 494 | struct btrfs_delayed_ref_node *update) |
494 | { | 495 | { |
495 | struct btrfs_delayed_ref_head *existing_ref; | 496 | struct btrfs_delayed_ref_head *existing_ref; |
496 | struct btrfs_delayed_ref_head *ref; | 497 | struct btrfs_delayed_ref_head *ref; |
498 | int old_ref_mod; | ||
497 | 499 | ||
498 | existing_ref = btrfs_delayed_node_to_head(existing); | 500 | existing_ref = btrfs_delayed_node_to_head(existing); |
499 | ref = btrfs_delayed_node_to_head(update); | 501 | ref = btrfs_delayed_node_to_head(update); |
@@ -541,7 +543,20 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
541 | * only need the lock for this case cause we could be processing it | 543 | * only need the lock for this case cause we could be processing it |
542 | * currently, for refs we just added we know we're a-ok. | 544 | * currently, for refs we just added we know we're a-ok. |
543 | */ | 545 | */ |
546 | old_ref_mod = existing_ref->total_ref_mod; | ||
544 | existing->ref_mod += update->ref_mod; | 547 | existing->ref_mod += update->ref_mod; |
548 | existing_ref->total_ref_mod += update->ref_mod; | ||
549 | |||
550 | /* | ||
551 | * If we are going to from a positive ref mod to a negative or vice | ||
552 | * versa we need to make sure to adjust pending_csums accordingly. | ||
553 | */ | ||
554 | if (existing_ref->is_data) { | ||
555 | if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0) | ||
556 | delayed_refs->pending_csums -= existing->num_bytes; | ||
557 | if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0) | ||
558 | delayed_refs->pending_csums += existing->num_bytes; | ||
559 | } | ||
545 | spin_unlock(&existing_ref->lock); | 560 | spin_unlock(&existing_ref->lock); |
546 | } | 561 | } |
547 | 562 | ||
@@ -605,6 +620,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
605 | head_ref->is_data = is_data; | 620 | head_ref->is_data = is_data; |
606 | head_ref->ref_root = RB_ROOT; | 621 | head_ref->ref_root = RB_ROOT; |
607 | head_ref->processing = 0; | 622 | head_ref->processing = 0; |
623 | head_ref->total_ref_mod = count_mod; | ||
608 | 624 | ||
609 | spin_lock_init(&head_ref->lock); | 625 | spin_lock_init(&head_ref->lock); |
610 | mutex_init(&head_ref->mutex); | 626 | mutex_init(&head_ref->mutex); |
@@ -614,7 +630,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
614 | existing = htree_insert(&delayed_refs->href_root, | 630 | existing = htree_insert(&delayed_refs->href_root, |
615 | &head_ref->href_node); | 631 | &head_ref->href_node); |
616 | if (existing) { | 632 | if (existing) { |
617 | update_existing_head_ref(&existing->node, ref); | 633 | update_existing_head_ref(delayed_refs, &existing->node, ref); |
618 | /* | 634 | /* |
619 | * we've updated the existing ref, free the newly | 635 | * we've updated the existing ref, free the newly |
620 | * allocated ref | 636 | * allocated ref |
@@ -622,6 +638,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
622 | kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); | 638 | kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); |
623 | head_ref = existing; | 639 | head_ref = existing; |
624 | } else { | 640 | } else { |
641 | if (is_data && count_mod < 0) | ||
642 | delayed_refs->pending_csums += num_bytes; | ||
625 | delayed_refs->num_heads++; | 643 | delayed_refs->num_heads++; |
626 | delayed_refs->num_heads_ready++; | 644 | delayed_refs->num_heads_ready++; |
627 | atomic_inc(&delayed_refs->num_entries); | 645 | atomic_inc(&delayed_refs->num_entries); |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index a764e2340d48..5eb0892396d0 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -88,6 +88,14 @@ struct btrfs_delayed_ref_head { | |||
88 | struct rb_node href_node; | 88 | struct rb_node href_node; |
89 | 89 | ||
90 | struct btrfs_delayed_extent_op *extent_op; | 90 | struct btrfs_delayed_extent_op *extent_op; |
91 | |||
92 | /* | ||
93 | * This is used to track the final ref_mod from all the refs associated | ||
94 | * with this head ref, this is not adjusted as delayed refs are run, | ||
95 | * this is meant to track if we need to do the csum accounting or not. | ||
96 | */ | ||
97 | int total_ref_mod; | ||
98 | |||
91 | /* | 99 | /* |
92 | * when a new extent is allocated, it is just reserved in memory | 100 | * when a new extent is allocated, it is just reserved in memory |
93 | * The actual extent isn't inserted into the extent allocation tree | 101 | * The actual extent isn't inserted into the extent allocation tree |
@@ -138,6 +146,8 @@ struct btrfs_delayed_ref_root { | |||
138 | /* total number of head nodes ready for processing */ | 146 | /* total number of head nodes ready for processing */ |
139 | unsigned long num_heads_ready; | 147 | unsigned long num_heads_ready; |
140 | 148 | ||
149 | u64 pending_csums; | ||
150 | |||
141 | /* | 151 | /* |
142 | * set when the tree is flushing before a transaction commit, | 152 | * set when the tree is flushing before a transaction commit, |
143 | * used by the throttling code to decide if new updates need | 153 | * used by the throttling code to decide if new updates need |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 5ec03d999c37..0573848c7333 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -670,8 +670,8 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | |||
670 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | 670 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: |
671 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | 671 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: |
672 | srcdev = dev_replace->srcdev; | 672 | srcdev = dev_replace->srcdev; |
673 | args->status.progress_1000 = div64_u64(dev_replace->cursor_left, | 673 | args->status.progress_1000 = div_u64(dev_replace->cursor_left, |
674 | div64_u64(btrfs_device_get_total_bytes(srcdev), 1000)); | 674 | div_u64(btrfs_device_get_total_bytes(srcdev), 1000)); |
675 | break; | 675 | break; |
676 | } | 676 | } |
677 | btrfs_dev_replace_unlock(dev_replace); | 677 | btrfs_dev_replace_unlock(dev_replace); |
@@ -806,7 +806,7 @@ static int btrfs_dev_replace_kthread(void *data) | |||
806 | btrfs_dev_replace_status(fs_info, status_args); | 806 | btrfs_dev_replace_status(fs_info, status_args); |
807 | progress = status_args->status.progress_1000; | 807 | progress = status_args->status.progress_1000; |
808 | kfree(status_args); | 808 | kfree(status_args); |
809 | do_div(progress, 10); | 809 | progress = div_u64(progress, 10); |
810 | printk_in_rcu(KERN_INFO | 810 | printk_in_rcu(KERN_INFO |
811 | "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", | 811 | "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", |
812 | dev_replace->srcdev->missing ? "<missing disk>" : | 812 | dev_replace->srcdev->missing ? "<missing disk>" : |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 639f2663ed3f..2ef9a4b72d06 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -54,7 +54,7 @@ | |||
54 | #include <asm/cpufeature.h> | 54 | #include <asm/cpufeature.h> |
55 | #endif | 55 | #endif |
56 | 56 | ||
57 | static struct extent_io_ops btree_extent_io_ops; | 57 | static const struct extent_io_ops btree_extent_io_ops; |
58 | static void end_workqueue_fn(struct btrfs_work *work); | 58 | static void end_workqueue_fn(struct btrfs_work *work); |
59 | static void free_fs_root(struct btrfs_root *root); | 59 | static void free_fs_root(struct btrfs_root *root); |
60 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | 60 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
@@ -274,10 +274,11 @@ void btrfs_csum_final(u32 crc, char *result) | |||
274 | * compute the csum for a btree block, and either verify it or write it | 274 | * compute the csum for a btree block, and either verify it or write it |
275 | * into the csum field of the block. | 275 | * into the csum field of the block. |
276 | */ | 276 | */ |
277 | static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | 277 | static int csum_tree_block(struct btrfs_fs_info *fs_info, |
278 | struct extent_buffer *buf, | ||
278 | int verify) | 279 | int verify) |
279 | { | 280 | { |
280 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | 281 | u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); |
281 | char *result = NULL; | 282 | char *result = NULL; |
282 | unsigned long len; | 283 | unsigned long len; |
283 | unsigned long cur_len; | 284 | unsigned long cur_len; |
@@ -302,7 +303,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
302 | offset += cur_len; | 303 | offset += cur_len; |
303 | } | 304 | } |
304 | if (csum_size > sizeof(inline_result)) { | 305 | if (csum_size > sizeof(inline_result)) { |
305 | result = kzalloc(csum_size * sizeof(char), GFP_NOFS); | 306 | result = kzalloc(csum_size, GFP_NOFS); |
306 | if (!result) | 307 | if (!result) |
307 | return 1; | 308 | return 1; |
308 | } else { | 309 | } else { |
@@ -321,7 +322,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
321 | printk_ratelimited(KERN_WARNING | 322 | printk_ratelimited(KERN_WARNING |
322 | "BTRFS: %s checksum verify failed on %llu wanted %X found %X " | 323 | "BTRFS: %s checksum verify failed on %llu wanted %X found %X " |
323 | "level %d\n", | 324 | "level %d\n", |
324 | root->fs_info->sb->s_id, buf->start, | 325 | fs_info->sb->s_id, buf->start, |
325 | val, found, btrfs_header_level(buf)); | 326 | val, found, btrfs_header_level(buf)); |
326 | if (result != (char *)&inline_result) | 327 | if (result != (char *)&inline_result) |
327 | kfree(result); | 328 | kfree(result); |
@@ -418,12 +419,6 @@ static int btrfs_check_super_csum(char *raw_disk_sb) | |||
418 | 419 | ||
419 | if (memcmp(raw_disk_sb, result, csum_size)) | 420 | if (memcmp(raw_disk_sb, result, csum_size)) |
420 | ret = 1; | 421 | ret = 1; |
421 | |||
422 | if (ret && btrfs_super_generation(disk_sb) < 10) { | ||
423 | printk(KERN_WARNING | ||
424 | "BTRFS: super block crcs don't match, older mkfs detected\n"); | ||
425 | ret = 0; | ||
426 | } | ||
427 | } | 422 | } |
428 | 423 | ||
429 | if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { | 424 | if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { |
@@ -501,7 +496,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
501 | * we only fill in the checksum field in the first page of a multi-page block | 496 | * we only fill in the checksum field in the first page of a multi-page block |
502 | */ | 497 | */ |
503 | 498 | ||
504 | static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | 499 | static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page) |
505 | { | 500 | { |
506 | u64 start = page_offset(page); | 501 | u64 start = page_offset(page); |
507 | u64 found_start; | 502 | u64 found_start; |
@@ -513,14 +508,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
513 | found_start = btrfs_header_bytenr(eb); | 508 | found_start = btrfs_header_bytenr(eb); |
514 | if (WARN_ON(found_start != start || !PageUptodate(page))) | 509 | if (WARN_ON(found_start != start || !PageUptodate(page))) |
515 | return 0; | 510 | return 0; |
516 | csum_tree_block(root, eb, 0); | 511 | csum_tree_block(fs_info, eb, 0); |
517 | return 0; | 512 | return 0; |
518 | } | 513 | } |
519 | 514 | ||
520 | static int check_tree_block_fsid(struct btrfs_root *root, | 515 | static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, |
521 | struct extent_buffer *eb) | 516 | struct extent_buffer *eb) |
522 | { | 517 | { |
523 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | 518 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
524 | u8 fsid[BTRFS_UUID_SIZE]; | 519 | u8 fsid[BTRFS_UUID_SIZE]; |
525 | int ret = 1; | 520 | int ret = 1; |
526 | 521 | ||
@@ -640,7 +635,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
640 | ret = -EIO; | 635 | ret = -EIO; |
641 | goto err; | 636 | goto err; |
642 | } | 637 | } |
643 | if (check_tree_block_fsid(root, eb)) { | 638 | if (check_tree_block_fsid(root->fs_info, eb)) { |
644 | printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n", | 639 | printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n", |
645 | eb->fs_info->sb->s_id, eb->start); | 640 | eb->fs_info->sb->s_id, eb->start); |
646 | ret = -EIO; | 641 | ret = -EIO; |
@@ -657,7 +652,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
657 | btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), | 652 | btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), |
658 | eb, found_level); | 653 | eb, found_level); |
659 | 654 | ||
660 | ret = csum_tree_block(root, eb, 1); | 655 | ret = csum_tree_block(root->fs_info, eb, 1); |
661 | if (ret) { | 656 | if (ret) { |
662 | ret = -EIO; | 657 | ret = -EIO; |
663 | goto err; | 658 | goto err; |
@@ -882,7 +877,7 @@ static int btree_csum_one_bio(struct bio *bio) | |||
882 | 877 | ||
883 | bio_for_each_segment_all(bvec, bio, i) { | 878 | bio_for_each_segment_all(bvec, bio, i) { |
884 | root = BTRFS_I(bvec->bv_page->mapping->host)->root; | 879 | root = BTRFS_I(bvec->bv_page->mapping->host)->root; |
885 | ret = csum_dirty_buffer(root, bvec->bv_page); | 880 | ret = csum_dirty_buffer(root->fs_info, bvec->bv_page); |
886 | if (ret) | 881 | if (ret) |
887 | break; | 882 | break; |
888 | } | 883 | } |
@@ -1119,10 +1114,10 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, | |||
1119 | return 0; | 1114 | return 0; |
1120 | } | 1115 | } |
1121 | 1116 | ||
1122 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 1117 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, |
1123 | u64 bytenr) | 1118 | u64 bytenr) |
1124 | { | 1119 | { |
1125 | return find_extent_buffer(root->fs_info, bytenr); | 1120 | return find_extent_buffer(fs_info, bytenr); |
1126 | } | 1121 | } |
1127 | 1122 | ||
1128 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 1123 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
@@ -1165,11 +1160,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
1165 | 1160 | ||
1166 | } | 1161 | } |
1167 | 1162 | ||
1168 | void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1163 | void clean_tree_block(struct btrfs_trans_handle *trans, |
1164 | struct btrfs_fs_info *fs_info, | ||
1169 | struct extent_buffer *buf) | 1165 | struct extent_buffer *buf) |
1170 | { | 1166 | { |
1171 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1172 | |||
1173 | if (btrfs_header_generation(buf) == | 1167 | if (btrfs_header_generation(buf) == |
1174 | fs_info->running_transaction->transid) { | 1168 | fs_info->running_transaction->transid) { |
1175 | btrfs_assert_tree_locked(buf); | 1169 | btrfs_assert_tree_locked(buf); |
@@ -2146,6 +2140,267 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) | |||
2146 | } | 2140 | } |
2147 | } | 2141 | } |
2148 | 2142 | ||
2143 | static void btrfs_init_scrub(struct btrfs_fs_info *fs_info) | ||
2144 | { | ||
2145 | mutex_init(&fs_info->scrub_lock); | ||
2146 | atomic_set(&fs_info->scrubs_running, 0); | ||
2147 | atomic_set(&fs_info->scrub_pause_req, 0); | ||
2148 | atomic_set(&fs_info->scrubs_paused, 0); | ||
2149 | atomic_set(&fs_info->scrub_cancel_req, 0); | ||
2150 | init_waitqueue_head(&fs_info->scrub_pause_wait); | ||
2151 | fs_info->scrub_workers_refcnt = 0; | ||
2152 | } | ||
2153 | |||
2154 | static void btrfs_init_balance(struct btrfs_fs_info *fs_info) | ||
2155 | { | ||
2156 | spin_lock_init(&fs_info->balance_lock); | ||
2157 | mutex_init(&fs_info->balance_mutex); | ||
2158 | atomic_set(&fs_info->balance_running, 0); | ||
2159 | atomic_set(&fs_info->balance_pause_req, 0); | ||
2160 | atomic_set(&fs_info->balance_cancel_req, 0); | ||
2161 | fs_info->balance_ctl = NULL; | ||
2162 | init_waitqueue_head(&fs_info->balance_wait_q); | ||
2163 | } | ||
2164 | |||
2165 | static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info, | ||
2166 | struct btrfs_root *tree_root) | ||
2167 | { | ||
2168 | fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; | ||
2169 | set_nlink(fs_info->btree_inode, 1); | ||
2170 | /* | ||
2171 | * we set the i_size on the btree inode to the max possible int. | ||
2172 | * the real end of the address space is determined by all of | ||
2173 | * the devices in the system | ||
2174 | */ | ||
2175 | fs_info->btree_inode->i_size = OFFSET_MAX; | ||
2176 | fs_info->btree_inode->i_mapping->a_ops = &btree_aops; | ||
2177 | |||
2178 | RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); | ||
2179 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, | ||
2180 | fs_info->btree_inode->i_mapping); | ||
2181 | BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0; | ||
2182 | extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); | ||
2183 | |||
2184 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; | ||
2185 | |||
2186 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
2187 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
2188 | sizeof(struct btrfs_key)); | ||
2189 | set_bit(BTRFS_INODE_DUMMY, | ||
2190 | &BTRFS_I(fs_info->btree_inode)->runtime_flags); | ||
2191 | btrfs_insert_inode_hash(fs_info->btree_inode); | ||
2192 | } | ||
2193 | |||
2194 | static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) | ||
2195 | { | ||
2196 | fs_info->dev_replace.lock_owner = 0; | ||
2197 | atomic_set(&fs_info->dev_replace.nesting_level, 0); | ||
2198 | mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); | ||
2199 | mutex_init(&fs_info->dev_replace.lock_management_lock); | ||
2200 | mutex_init(&fs_info->dev_replace.lock); | ||
2201 | init_waitqueue_head(&fs_info->replace_wait); | ||
2202 | } | ||
2203 | |||
2204 | static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) | ||
2205 | { | ||
2206 | spin_lock_init(&fs_info->qgroup_lock); | ||
2207 | mutex_init(&fs_info->qgroup_ioctl_lock); | ||
2208 | fs_info->qgroup_tree = RB_ROOT; | ||
2209 | fs_info->qgroup_op_tree = RB_ROOT; | ||
2210 | INIT_LIST_HEAD(&fs_info->dirty_qgroups); | ||
2211 | fs_info->qgroup_seq = 1; | ||
2212 | fs_info->quota_enabled = 0; | ||
2213 | fs_info->pending_quota_state = 0; | ||
2214 | fs_info->qgroup_ulist = NULL; | ||
2215 | mutex_init(&fs_info->qgroup_rescan_lock); | ||
2216 | } | ||
2217 | |||
2218 | static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, | ||
2219 | struct btrfs_fs_devices *fs_devices) | ||
2220 | { | ||
2221 | int max_active = fs_info->thread_pool_size; | ||
2222 | unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; | ||
2223 | |||
2224 | fs_info->workers = | ||
2225 | btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, | ||
2226 | max_active, 16); | ||
2227 | |||
2228 | fs_info->delalloc_workers = | ||
2229 | btrfs_alloc_workqueue("delalloc", flags, max_active, 2); | ||
2230 | |||
2231 | fs_info->flush_workers = | ||
2232 | btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0); | ||
2233 | |||
2234 | fs_info->caching_workers = | ||
2235 | btrfs_alloc_workqueue("cache", flags, max_active, 0); | ||
2236 | |||
2237 | /* | ||
2238 | * a higher idle thresh on the submit workers makes it much more | ||
2239 | * likely that bios will be send down in a sane order to the | ||
2240 | * devices | ||
2241 | */ | ||
2242 | fs_info->submit_workers = | ||
2243 | btrfs_alloc_workqueue("submit", flags, | ||
2244 | min_t(u64, fs_devices->num_devices, | ||
2245 | max_active), 64); | ||
2246 | |||
2247 | fs_info->fixup_workers = | ||
2248 | btrfs_alloc_workqueue("fixup", flags, 1, 0); | ||
2249 | |||
2250 | /* | ||
2251 | * endios are largely parallel and should have a very | ||
2252 | * low idle thresh | ||
2253 | */ | ||
2254 | fs_info->endio_workers = | ||
2255 | btrfs_alloc_workqueue("endio", flags, max_active, 4); | ||
2256 | fs_info->endio_meta_workers = | ||
2257 | btrfs_alloc_workqueue("endio-meta", flags, max_active, 4); | ||
2258 | fs_info->endio_meta_write_workers = | ||
2259 | btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); | ||
2260 | fs_info->endio_raid56_workers = | ||
2261 | btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); | ||
2262 | fs_info->endio_repair_workers = | ||
2263 | btrfs_alloc_workqueue("endio-repair", flags, 1, 0); | ||
2264 | fs_info->rmw_workers = | ||
2265 | btrfs_alloc_workqueue("rmw", flags, max_active, 2); | ||
2266 | fs_info->endio_write_workers = | ||
2267 | btrfs_alloc_workqueue("endio-write", flags, max_active, 2); | ||
2268 | fs_info->endio_freespace_worker = | ||
2269 | btrfs_alloc_workqueue("freespace-write", flags, max_active, 0); | ||
2270 | fs_info->delayed_workers = | ||
2271 | btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0); | ||
2272 | fs_info->readahead_workers = | ||
2273 | btrfs_alloc_workqueue("readahead", flags, max_active, 2); | ||
2274 | fs_info->qgroup_rescan_workers = | ||
2275 | btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); | ||
2276 | fs_info->extent_workers = | ||
2277 | btrfs_alloc_workqueue("extent-refs", flags, | ||
2278 | min_t(u64, fs_devices->num_devices, | ||
2279 | max_active), 8); | ||
2280 | |||
2281 | if (!(fs_info->workers && fs_info->delalloc_workers && | ||
2282 | fs_info->submit_workers && fs_info->flush_workers && | ||
2283 | fs_info->endio_workers && fs_info->endio_meta_workers && | ||
2284 | fs_info->endio_meta_write_workers && | ||
2285 | fs_info->endio_repair_workers && | ||
2286 | fs_info->endio_write_workers && fs_info->endio_raid56_workers && | ||
2287 | fs_info->endio_freespace_worker && fs_info->rmw_workers && | ||
2288 | fs_info->caching_workers && fs_info->readahead_workers && | ||
2289 | fs_info->fixup_workers && fs_info->delayed_workers && | ||
2290 | fs_info->extent_workers && | ||
2291 | fs_info->qgroup_rescan_workers)) { | ||
2292 | return -ENOMEM; | ||
2293 | } | ||
2294 | |||
2295 | return 0; | ||
2296 | } | ||
2297 | |||
2298 | static int btrfs_replay_log(struct btrfs_fs_info *fs_info, | ||
2299 | struct btrfs_fs_devices *fs_devices) | ||
2300 | { | ||
2301 | int ret; | ||
2302 | struct btrfs_root *tree_root = fs_info->tree_root; | ||
2303 | struct btrfs_root *log_tree_root; | ||
2304 | struct btrfs_super_block *disk_super = fs_info->super_copy; | ||
2305 | u64 bytenr = btrfs_super_log_root(disk_super); | ||
2306 | |||
2307 | if (fs_devices->rw_devices == 0) { | ||
2308 | printk(KERN_WARNING "BTRFS: log replay required " | ||
2309 | "on RO media\n"); | ||
2310 | return -EIO; | ||
2311 | } | ||
2312 | |||
2313 | log_tree_root = btrfs_alloc_root(fs_info); | ||
2314 | if (!log_tree_root) | ||
2315 | return -ENOMEM; | ||
2316 | |||
2317 | __setup_root(tree_root->nodesize, tree_root->sectorsize, | ||
2318 | tree_root->stripesize, log_tree_root, fs_info, | ||
2319 | BTRFS_TREE_LOG_OBJECTID); | ||
2320 | |||
2321 | log_tree_root->node = read_tree_block(tree_root, bytenr, | ||
2322 | fs_info->generation + 1); | ||
2323 | if (!log_tree_root->node || | ||
2324 | !extent_buffer_uptodate(log_tree_root->node)) { | ||
2325 | printk(KERN_ERR "BTRFS: failed to read log tree\n"); | ||
2326 | free_extent_buffer(log_tree_root->node); | ||
2327 | kfree(log_tree_root); | ||
2328 | return -EIO; | ||
2329 | } | ||
2330 | /* returns with log_tree_root freed on success */ | ||
2331 | ret = btrfs_recover_log_trees(log_tree_root); | ||
2332 | if (ret) { | ||
2333 | btrfs_error(tree_root->fs_info, ret, | ||
2334 | "Failed to recover log tree"); | ||
2335 | free_extent_buffer(log_tree_root->node); | ||
2336 | kfree(log_tree_root); | ||
2337 | return ret; | ||
2338 | } | ||
2339 | |||
2340 | if (fs_info->sb->s_flags & MS_RDONLY) { | ||
2341 | ret = btrfs_commit_super(tree_root); | ||
2342 | if (ret) | ||
2343 | return ret; | ||
2344 | } | ||
2345 | |||
2346 | return 0; | ||
2347 | } | ||
2348 | |||
2349 | static int btrfs_read_roots(struct btrfs_fs_info *fs_info, | ||
2350 | struct btrfs_root *tree_root) | ||
2351 | { | ||
2352 | struct btrfs_root *root; | ||
2353 | struct btrfs_key location; | ||
2354 | int ret; | ||
2355 | |||
2356 | location.objectid = BTRFS_EXTENT_TREE_OBJECTID; | ||
2357 | location.type = BTRFS_ROOT_ITEM_KEY; | ||
2358 | location.offset = 0; | ||
2359 | |||
2360 | root = btrfs_read_tree_root(tree_root, &location); | ||
2361 | if (IS_ERR(root)) | ||
2362 | return PTR_ERR(root); | ||
2363 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||
2364 | fs_info->extent_root = root; | ||
2365 | |||
2366 | location.objectid = BTRFS_DEV_TREE_OBJECTID; | ||
2367 | root = btrfs_read_tree_root(tree_root, &location); | ||
2368 | if (IS_ERR(root)) | ||
2369 | return PTR_ERR(root); | ||
2370 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||
2371 | fs_info->dev_root = root; | ||
2372 | btrfs_init_devices_late(fs_info); | ||
2373 | |||
2374 | location.objectid = BTRFS_CSUM_TREE_OBJECTID; | ||
2375 | root = btrfs_read_tree_root(tree_root, &location); | ||
2376 | if (IS_ERR(root)) | ||
2377 | return PTR_ERR(root); | ||
2378 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||
2379 | fs_info->csum_root = root; | ||
2380 | |||
2381 | location.objectid = BTRFS_QUOTA_TREE_OBJECTID; | ||
2382 | root = btrfs_read_tree_root(tree_root, &location); | ||
2383 | if (!IS_ERR(root)) { | ||
2384 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||
2385 | fs_info->quota_enabled = 1; | ||
2386 | fs_info->pending_quota_state = 1; | ||
2387 | fs_info->quota_root = root; | ||
2388 | } | ||
2389 | |||
2390 | location.objectid = BTRFS_UUID_TREE_OBJECTID; | ||
2391 | root = btrfs_read_tree_root(tree_root, &location); | ||
2392 | if (IS_ERR(root)) { | ||
2393 | ret = PTR_ERR(root); | ||
2394 | if (ret != -ENOENT) | ||
2395 | return ret; | ||
2396 | } else { | ||
2397 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); | ||
2398 | fs_info->uuid_root = root; | ||
2399 | } | ||
2400 | |||
2401 | return 0; | ||
2402 | } | ||
2403 | |||
2149 | int open_ctree(struct super_block *sb, | 2404 | int open_ctree(struct super_block *sb, |
2150 | struct btrfs_fs_devices *fs_devices, | 2405 | struct btrfs_fs_devices *fs_devices, |
2151 | char *options) | 2406 | char *options) |
@@ -2160,21 +2415,12 @@ int open_ctree(struct super_block *sb, | |||
2160 | struct btrfs_super_block *disk_super; | 2415 | struct btrfs_super_block *disk_super; |
2161 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | 2416 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
2162 | struct btrfs_root *tree_root; | 2417 | struct btrfs_root *tree_root; |
2163 | struct btrfs_root *extent_root; | ||
2164 | struct btrfs_root *csum_root; | ||
2165 | struct btrfs_root *chunk_root; | 2418 | struct btrfs_root *chunk_root; |
2166 | struct btrfs_root *dev_root; | ||
2167 | struct btrfs_root *quota_root; | ||
2168 | struct btrfs_root *uuid_root; | ||
2169 | struct btrfs_root *log_tree_root; | ||
2170 | int ret; | 2419 | int ret; |
2171 | int err = -EINVAL; | 2420 | int err = -EINVAL; |
2172 | int num_backups_tried = 0; | 2421 | int num_backups_tried = 0; |
2173 | int backup_index = 0; | 2422 | int backup_index = 0; |
2174 | int max_active; | 2423 | int max_active; |
2175 | int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; | ||
2176 | bool create_uuid_tree; | ||
2177 | bool check_uuid_tree; | ||
2178 | 2424 | ||
2179 | tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); | 2425 | tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); |
2180 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); | 2426 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); |
@@ -2241,11 +2487,12 @@ int open_ctree(struct super_block *sb, | |||
2241 | spin_lock_init(&fs_info->qgroup_op_lock); | 2487 | spin_lock_init(&fs_info->qgroup_op_lock); |
2242 | spin_lock_init(&fs_info->buffer_lock); | 2488 | spin_lock_init(&fs_info->buffer_lock); |
2243 | spin_lock_init(&fs_info->unused_bgs_lock); | 2489 | spin_lock_init(&fs_info->unused_bgs_lock); |
2244 | mutex_init(&fs_info->unused_bg_unpin_mutex); | ||
2245 | rwlock_init(&fs_info->tree_mod_log_lock); | 2490 | rwlock_init(&fs_info->tree_mod_log_lock); |
2491 | mutex_init(&fs_info->unused_bg_unpin_mutex); | ||
2246 | mutex_init(&fs_info->reloc_mutex); | 2492 | mutex_init(&fs_info->reloc_mutex); |
2247 | mutex_init(&fs_info->delalloc_root_mutex); | 2493 | mutex_init(&fs_info->delalloc_root_mutex); |
2248 | seqlock_init(&fs_info->profiles_lock); | 2494 | seqlock_init(&fs_info->profiles_lock); |
2495 | init_rwsem(&fs_info->delayed_iput_sem); | ||
2249 | 2496 | ||
2250 | init_completion(&fs_info->kobj_unregister); | 2497 | init_completion(&fs_info->kobj_unregister); |
2251 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 2498 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
@@ -2276,7 +2523,7 @@ int open_ctree(struct super_block *sb, | |||
2276 | fs_info->free_chunk_space = 0; | 2523 | fs_info->free_chunk_space = 0; |
2277 | fs_info->tree_mod_log = RB_ROOT; | 2524 | fs_info->tree_mod_log = RB_ROOT; |
2278 | fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; | 2525 | fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; |
2279 | fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64); | 2526 | fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ |
2280 | /* readahead state */ | 2527 | /* readahead state */ |
2281 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); | 2528 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); |
2282 | spin_lock_init(&fs_info->reada_lock); | 2529 | spin_lock_init(&fs_info->reada_lock); |
@@ -2294,55 +2541,18 @@ int open_ctree(struct super_block *sb, | |||
2294 | } | 2541 | } |
2295 | btrfs_init_delayed_root(fs_info->delayed_root); | 2542 | btrfs_init_delayed_root(fs_info->delayed_root); |
2296 | 2543 | ||
2297 | mutex_init(&fs_info->scrub_lock); | 2544 | btrfs_init_scrub(fs_info); |
2298 | atomic_set(&fs_info->scrubs_running, 0); | ||
2299 | atomic_set(&fs_info->scrub_pause_req, 0); | ||
2300 | atomic_set(&fs_info->scrubs_paused, 0); | ||
2301 | atomic_set(&fs_info->scrub_cancel_req, 0); | ||
2302 | init_waitqueue_head(&fs_info->replace_wait); | ||
2303 | init_waitqueue_head(&fs_info->scrub_pause_wait); | ||
2304 | fs_info->scrub_workers_refcnt = 0; | ||
2305 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 2545 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
2306 | fs_info->check_integrity_print_mask = 0; | 2546 | fs_info->check_integrity_print_mask = 0; |
2307 | #endif | 2547 | #endif |
2308 | 2548 | btrfs_init_balance(fs_info); | |
2309 | spin_lock_init(&fs_info->balance_lock); | ||
2310 | mutex_init(&fs_info->balance_mutex); | ||
2311 | atomic_set(&fs_info->balance_running, 0); | ||
2312 | atomic_set(&fs_info->balance_pause_req, 0); | ||
2313 | atomic_set(&fs_info->balance_cancel_req, 0); | ||
2314 | fs_info->balance_ctl = NULL; | ||
2315 | init_waitqueue_head(&fs_info->balance_wait_q); | ||
2316 | btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work); | 2549 | btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work); |
2317 | 2550 | ||
2318 | sb->s_blocksize = 4096; | 2551 | sb->s_blocksize = 4096; |
2319 | sb->s_blocksize_bits = blksize_bits(4096); | 2552 | sb->s_blocksize_bits = blksize_bits(4096); |
2320 | sb->s_bdi = &fs_info->bdi; | 2553 | sb->s_bdi = &fs_info->bdi; |
2321 | 2554 | ||
2322 | fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; | 2555 | btrfs_init_btree_inode(fs_info, tree_root); |
2323 | set_nlink(fs_info->btree_inode, 1); | ||
2324 | /* | ||
2325 | * we set the i_size on the btree inode to the max possible int. | ||
2326 | * the real end of the address space is determined by all of | ||
2327 | * the devices in the system | ||
2328 | */ | ||
2329 | fs_info->btree_inode->i_size = OFFSET_MAX; | ||
2330 | fs_info->btree_inode->i_mapping->a_ops = &btree_aops; | ||
2331 | |||
2332 | RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); | ||
2333 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, | ||
2334 | fs_info->btree_inode->i_mapping); | ||
2335 | BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0; | ||
2336 | extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); | ||
2337 | |||
2338 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; | ||
2339 | |||
2340 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
2341 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
2342 | sizeof(struct btrfs_key)); | ||
2343 | set_bit(BTRFS_INODE_DUMMY, | ||
2344 | &BTRFS_I(fs_info->btree_inode)->runtime_flags); | ||
2345 | btrfs_insert_inode_hash(fs_info->btree_inode); | ||
2346 | 2556 | ||
2347 | spin_lock_init(&fs_info->block_group_cache_lock); | 2557 | spin_lock_init(&fs_info->block_group_cache_lock); |
2348 | fs_info->block_group_cache_tree = RB_ROOT; | 2558 | fs_info->block_group_cache_tree = RB_ROOT; |
@@ -2363,26 +2573,14 @@ int open_ctree(struct super_block *sb, | |||
2363 | mutex_init(&fs_info->transaction_kthread_mutex); | 2573 | mutex_init(&fs_info->transaction_kthread_mutex); |
2364 | mutex_init(&fs_info->cleaner_mutex); | 2574 | mutex_init(&fs_info->cleaner_mutex); |
2365 | mutex_init(&fs_info->volume_mutex); | 2575 | mutex_init(&fs_info->volume_mutex); |
2576 | mutex_init(&fs_info->ro_block_group_mutex); | ||
2366 | init_rwsem(&fs_info->commit_root_sem); | 2577 | init_rwsem(&fs_info->commit_root_sem); |
2367 | init_rwsem(&fs_info->cleanup_work_sem); | 2578 | init_rwsem(&fs_info->cleanup_work_sem); |
2368 | init_rwsem(&fs_info->subvol_sem); | 2579 | init_rwsem(&fs_info->subvol_sem); |
2369 | sema_init(&fs_info->uuid_tree_rescan_sem, 1); | 2580 | sema_init(&fs_info->uuid_tree_rescan_sem, 1); |
2370 | fs_info->dev_replace.lock_owner = 0; | ||
2371 | atomic_set(&fs_info->dev_replace.nesting_level, 0); | ||
2372 | mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); | ||
2373 | mutex_init(&fs_info->dev_replace.lock_management_lock); | ||
2374 | mutex_init(&fs_info->dev_replace.lock); | ||
2375 | 2581 | ||
2376 | spin_lock_init(&fs_info->qgroup_lock); | 2582 | btrfs_init_dev_replace_locks(fs_info); |
2377 | mutex_init(&fs_info->qgroup_ioctl_lock); | 2583 | btrfs_init_qgroup(fs_info); |
2378 | fs_info->qgroup_tree = RB_ROOT; | ||
2379 | fs_info->qgroup_op_tree = RB_ROOT; | ||
2380 | INIT_LIST_HEAD(&fs_info->dirty_qgroups); | ||
2381 | fs_info->qgroup_seq = 1; | ||
2382 | fs_info->quota_enabled = 0; | ||
2383 | fs_info->pending_quota_state = 0; | ||
2384 | fs_info->qgroup_ulist = NULL; | ||
2385 | mutex_init(&fs_info->qgroup_rescan_lock); | ||
2386 | 2584 | ||
2387 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | 2585 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); |
2388 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | 2586 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); |
@@ -2554,75 +2752,9 @@ int open_ctree(struct super_block *sb, | |||
2554 | 2752 | ||
2555 | max_active = fs_info->thread_pool_size; | 2753 | max_active = fs_info->thread_pool_size; |
2556 | 2754 | ||
2557 | fs_info->workers = | 2755 | ret = btrfs_init_workqueues(fs_info, fs_devices); |
2558 | btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, | 2756 | if (ret) { |
2559 | max_active, 16); | 2757 | err = ret; |
2560 | |||
2561 | fs_info->delalloc_workers = | ||
2562 | btrfs_alloc_workqueue("delalloc", flags, max_active, 2); | ||
2563 | |||
2564 | fs_info->flush_workers = | ||
2565 | btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0); | ||
2566 | |||
2567 | fs_info->caching_workers = | ||
2568 | btrfs_alloc_workqueue("cache", flags, max_active, 0); | ||
2569 | |||
2570 | /* | ||
2571 | * a higher idle thresh on the submit workers makes it much more | ||
2572 | * likely that bios will be send down in a sane order to the | ||
2573 | * devices | ||
2574 | */ | ||
2575 | fs_info->submit_workers = | ||
2576 | btrfs_alloc_workqueue("submit", flags, | ||
2577 | min_t(u64, fs_devices->num_devices, | ||
2578 | max_active), 64); | ||
2579 | |||
2580 | fs_info->fixup_workers = | ||
2581 | btrfs_alloc_workqueue("fixup", flags, 1, 0); | ||
2582 | |||
2583 | /* | ||
2584 | * endios are largely parallel and should have a very | ||
2585 | * low idle thresh | ||
2586 | */ | ||
2587 | fs_info->endio_workers = | ||
2588 | btrfs_alloc_workqueue("endio", flags, max_active, 4); | ||
2589 | fs_info->endio_meta_workers = | ||
2590 | btrfs_alloc_workqueue("endio-meta", flags, max_active, 4); | ||
2591 | fs_info->endio_meta_write_workers = | ||
2592 | btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); | ||
2593 | fs_info->endio_raid56_workers = | ||
2594 | btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); | ||
2595 | fs_info->endio_repair_workers = | ||
2596 | btrfs_alloc_workqueue("endio-repair", flags, 1, 0); | ||
2597 | fs_info->rmw_workers = | ||
2598 | btrfs_alloc_workqueue("rmw", flags, max_active, 2); | ||
2599 | fs_info->endio_write_workers = | ||
2600 | btrfs_alloc_workqueue("endio-write", flags, max_active, 2); | ||
2601 | fs_info->endio_freespace_worker = | ||
2602 | btrfs_alloc_workqueue("freespace-write", flags, max_active, 0); | ||
2603 | fs_info->delayed_workers = | ||
2604 | btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0); | ||
2605 | fs_info->readahead_workers = | ||
2606 | btrfs_alloc_workqueue("readahead", flags, max_active, 2); | ||
2607 | fs_info->qgroup_rescan_workers = | ||
2608 | btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); | ||
2609 | fs_info->extent_workers = | ||
2610 | btrfs_alloc_workqueue("extent-refs", flags, | ||
2611 | min_t(u64, fs_devices->num_devices, | ||
2612 | max_active), 8); | ||
2613 | |||
2614 | if (!(fs_info->workers && fs_info->delalloc_workers && | ||
2615 | fs_info->submit_workers && fs_info->flush_workers && | ||
2616 | fs_info->endio_workers && fs_info->endio_meta_workers && | ||
2617 | fs_info->endio_meta_write_workers && | ||
2618 | fs_info->endio_repair_workers && | ||
2619 | fs_info->endio_write_workers && fs_info->endio_raid56_workers && | ||
2620 | fs_info->endio_freespace_worker && fs_info->rmw_workers && | ||
2621 | fs_info->caching_workers && fs_info->readahead_workers && | ||
2622 | fs_info->fixup_workers && fs_info->delayed_workers && | ||
2623 | fs_info->extent_workers && | ||
2624 | fs_info->qgroup_rescan_workers)) { | ||
2625 | err = -ENOMEM; | ||
2626 | goto fail_sb_buffer; | 2758 | goto fail_sb_buffer; |
2627 | } | 2759 | } |
2628 | 2760 | ||
@@ -2688,7 +2820,7 @@ int open_ctree(struct super_block *sb, | |||
2688 | * keep the device that is marked to be the target device for the | 2820 | * keep the device that is marked to be the target device for the |
2689 | * dev_replace procedure | 2821 | * dev_replace procedure |
2690 | */ | 2822 | */ |
2691 | btrfs_close_extra_devices(fs_info, fs_devices, 0); | 2823 | btrfs_close_extra_devices(fs_devices, 0); |
2692 | 2824 | ||
2693 | if (!fs_devices->latest_bdev) { | 2825 | if (!fs_devices->latest_bdev) { |
2694 | printk(KERN_ERR "BTRFS: failed to read devices on %s\n", | 2826 | printk(KERN_ERR "BTRFS: failed to read devices on %s\n", |
@@ -2714,61 +2846,9 @@ retry_root_backup: | |||
2714 | tree_root->commit_root = btrfs_root_node(tree_root); | 2846 | tree_root->commit_root = btrfs_root_node(tree_root); |
2715 | btrfs_set_root_refs(&tree_root->root_item, 1); | 2847 | btrfs_set_root_refs(&tree_root->root_item, 1); |
2716 | 2848 | ||
2717 | location.objectid = BTRFS_EXTENT_TREE_OBJECTID; | 2849 | ret = btrfs_read_roots(fs_info, tree_root); |
2718 | location.type = BTRFS_ROOT_ITEM_KEY; | 2850 | if (ret) |
2719 | location.offset = 0; | ||
2720 | |||
2721 | extent_root = btrfs_read_tree_root(tree_root, &location); | ||
2722 | if (IS_ERR(extent_root)) { | ||
2723 | ret = PTR_ERR(extent_root); | ||
2724 | goto recovery_tree_root; | ||
2725 | } | ||
2726 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state); | ||
2727 | fs_info->extent_root = extent_root; | ||
2728 | |||
2729 | location.objectid = BTRFS_DEV_TREE_OBJECTID; | ||
2730 | dev_root = btrfs_read_tree_root(tree_root, &location); | ||
2731 | if (IS_ERR(dev_root)) { | ||
2732 | ret = PTR_ERR(dev_root); | ||
2733 | goto recovery_tree_root; | ||
2734 | } | ||
2735 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state); | ||
2736 | fs_info->dev_root = dev_root; | ||
2737 | btrfs_init_devices_late(fs_info); | ||
2738 | |||
2739 | location.objectid = BTRFS_CSUM_TREE_OBJECTID; | ||
2740 | csum_root = btrfs_read_tree_root(tree_root, &location); | ||
2741 | if (IS_ERR(csum_root)) { | ||
2742 | ret = PTR_ERR(csum_root); | ||
2743 | goto recovery_tree_root; | 2851 | goto recovery_tree_root; |
2744 | } | ||
2745 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state); | ||
2746 | fs_info->csum_root = csum_root; | ||
2747 | |||
2748 | location.objectid = BTRFS_QUOTA_TREE_OBJECTID; | ||
2749 | quota_root = btrfs_read_tree_root(tree_root, &location); | ||
2750 | if (!IS_ERR(quota_root)) { | ||
2751 | set_bit(BTRFS_ROOT_TRACK_DIRTY, "a_root->state); | ||
2752 | fs_info->quota_enabled = 1; | ||
2753 | fs_info->pending_quota_state = 1; | ||
2754 | fs_info->quota_root = quota_root; | ||
2755 | } | ||
2756 | |||
2757 | location.objectid = BTRFS_UUID_TREE_OBJECTID; | ||
2758 | uuid_root = btrfs_read_tree_root(tree_root, &location); | ||
2759 | if (IS_ERR(uuid_root)) { | ||
2760 | ret = PTR_ERR(uuid_root); | ||
2761 | if (ret != -ENOENT) | ||
2762 | goto recovery_tree_root; | ||
2763 | create_uuid_tree = true; | ||
2764 | check_uuid_tree = false; | ||
2765 | } else { | ||
2766 | set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state); | ||
2767 | fs_info->uuid_root = uuid_root; | ||
2768 | create_uuid_tree = false; | ||
2769 | check_uuid_tree = | ||
2770 | generation != btrfs_super_uuid_tree_generation(disk_super); | ||
2771 | } | ||
2772 | 2852 | ||
2773 | fs_info->generation = generation; | 2853 | fs_info->generation = generation; |
2774 | fs_info->last_trans_committed = generation; | 2854 | fs_info->last_trans_committed = generation; |
@@ -2792,7 +2872,7 @@ retry_root_backup: | |||
2792 | goto fail_block_groups; | 2872 | goto fail_block_groups; |
2793 | } | 2873 | } |
2794 | 2874 | ||
2795 | btrfs_close_extra_devices(fs_info, fs_devices, 1); | 2875 | btrfs_close_extra_devices(fs_devices, 1); |
2796 | 2876 | ||
2797 | ret = btrfs_sysfs_add_one(fs_info); | 2877 | ret = btrfs_sysfs_add_one(fs_info); |
2798 | if (ret) { | 2878 | if (ret) { |
@@ -2806,7 +2886,7 @@ retry_root_backup: | |||
2806 | goto fail_sysfs; | 2886 | goto fail_sysfs; |
2807 | } | 2887 | } |
2808 | 2888 | ||
2809 | ret = btrfs_read_block_groups(extent_root); | 2889 | ret = btrfs_read_block_groups(fs_info->extent_root); |
2810 | if (ret) { | 2890 | if (ret) { |
2811 | printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret); | 2891 | printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret); |
2812 | goto fail_sysfs; | 2892 | goto fail_sysfs; |
@@ -2864,48 +2944,11 @@ retry_root_backup: | |||
2864 | 2944 | ||
2865 | /* do not make disk changes in broken FS */ | 2945 | /* do not make disk changes in broken FS */ |
2866 | if (btrfs_super_log_root(disk_super) != 0) { | 2946 | if (btrfs_super_log_root(disk_super) != 0) { |
2867 | u64 bytenr = btrfs_super_log_root(disk_super); | 2947 | ret = btrfs_replay_log(fs_info, fs_devices); |
2868 | |||
2869 | if (fs_devices->rw_devices == 0) { | ||
2870 | printk(KERN_WARNING "BTRFS: log replay required " | ||
2871 | "on RO media\n"); | ||
2872 | err = -EIO; | ||
2873 | goto fail_qgroup; | ||
2874 | } | ||
2875 | |||
2876 | log_tree_root = btrfs_alloc_root(fs_info); | ||
2877 | if (!log_tree_root) { | ||
2878 | err = -ENOMEM; | ||
2879 | goto fail_qgroup; | ||
2880 | } | ||
2881 | |||
2882 | __setup_root(nodesize, sectorsize, stripesize, | ||
2883 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); | ||
2884 | |||
2885 | log_tree_root->node = read_tree_block(tree_root, bytenr, | ||
2886 | generation + 1); | ||
2887 | if (!log_tree_root->node || | ||
2888 | !extent_buffer_uptodate(log_tree_root->node)) { | ||
2889 | printk(KERN_ERR "BTRFS: failed to read log tree\n"); | ||
2890 | free_extent_buffer(log_tree_root->node); | ||
2891 | kfree(log_tree_root); | ||
2892 | goto fail_qgroup; | ||
2893 | } | ||
2894 | /* returns with log_tree_root freed on success */ | ||
2895 | ret = btrfs_recover_log_trees(log_tree_root); | ||
2896 | if (ret) { | 2948 | if (ret) { |
2897 | btrfs_error(tree_root->fs_info, ret, | 2949 | err = ret; |
2898 | "Failed to recover log tree"); | ||
2899 | free_extent_buffer(log_tree_root->node); | ||
2900 | kfree(log_tree_root); | ||
2901 | goto fail_qgroup; | 2950 | goto fail_qgroup; |
2902 | } | 2951 | } |
2903 | |||
2904 | if (sb->s_flags & MS_RDONLY) { | ||
2905 | ret = btrfs_commit_super(tree_root); | ||
2906 | if (ret) | ||
2907 | goto fail_qgroup; | ||
2908 | } | ||
2909 | } | 2952 | } |
2910 | 2953 | ||
2911 | ret = btrfs_find_orphan_roots(tree_root); | 2954 | ret = btrfs_find_orphan_roots(tree_root); |
@@ -2966,7 +3009,7 @@ retry_root_backup: | |||
2966 | 3009 | ||
2967 | btrfs_qgroup_rescan_resume(fs_info); | 3010 | btrfs_qgroup_rescan_resume(fs_info); |
2968 | 3011 | ||
2969 | if (create_uuid_tree) { | 3012 | if (!fs_info->uuid_root) { |
2970 | pr_info("BTRFS: creating UUID tree\n"); | 3013 | pr_info("BTRFS: creating UUID tree\n"); |
2971 | ret = btrfs_create_uuid_tree(fs_info); | 3014 | ret = btrfs_create_uuid_tree(fs_info); |
2972 | if (ret) { | 3015 | if (ret) { |
@@ -2975,8 +3018,9 @@ retry_root_backup: | |||
2975 | close_ctree(tree_root); | 3018 | close_ctree(tree_root); |
2976 | return ret; | 3019 | return ret; |
2977 | } | 3020 | } |
2978 | } else if (check_uuid_tree || | 3021 | } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) || |
2979 | btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) { | 3022 | fs_info->generation != |
3023 | btrfs_super_uuid_tree_generation(disk_super)) { | ||
2980 | pr_info("BTRFS: checking UUID tree\n"); | 3024 | pr_info("BTRFS: checking UUID tree\n"); |
2981 | ret = btrfs_check_uuid_tree(fs_info); | 3025 | ret = btrfs_check_uuid_tree(fs_info); |
2982 | if (ret) { | 3026 | if (ret) { |
@@ -3668,7 +3712,7 @@ void close_ctree(struct btrfs_root *root) | |||
3668 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 3712 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
3669 | ret = btrfs_commit_super(root); | 3713 | ret = btrfs_commit_super(root); |
3670 | if (ret) | 3714 | if (ret) |
3671 | btrfs_err(root->fs_info, "commit super ret %d", ret); | 3715 | btrfs_err(fs_info, "commit super ret %d", ret); |
3672 | } | 3716 | } |
3673 | 3717 | ||
3674 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) | 3718 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) |
@@ -3680,10 +3724,10 @@ void close_ctree(struct btrfs_root *root) | |||
3680 | fs_info->closing = 2; | 3724 | fs_info->closing = 2; |
3681 | smp_mb(); | 3725 | smp_mb(); |
3682 | 3726 | ||
3683 | btrfs_free_qgroup_config(root->fs_info); | 3727 | btrfs_free_qgroup_config(fs_info); |
3684 | 3728 | ||
3685 | if (percpu_counter_sum(&fs_info->delalloc_bytes)) { | 3729 | if (percpu_counter_sum(&fs_info->delalloc_bytes)) { |
3686 | btrfs_info(root->fs_info, "at unmount delalloc count %lld", | 3730 | btrfs_info(fs_info, "at unmount delalloc count %lld", |
3687 | percpu_counter_sum(&fs_info->delalloc_bytes)); | 3731 | percpu_counter_sum(&fs_info->delalloc_bytes)); |
3688 | } | 3732 | } |
3689 | 3733 | ||
@@ -3723,7 +3767,7 @@ void close_ctree(struct btrfs_root *root) | |||
3723 | 3767 | ||
3724 | btrfs_free_stripe_hash_table(fs_info); | 3768 | btrfs_free_stripe_hash_table(fs_info); |
3725 | 3769 | ||
3726 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | 3770 | __btrfs_free_block_rsv(root->orphan_block_rsv); |
3727 | root->orphan_block_rsv = NULL; | 3771 | root->orphan_block_rsv = NULL; |
3728 | 3772 | ||
3729 | lock_chunks(root); | 3773 | lock_chunks(root); |
@@ -4134,7 +4178,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, | |||
4134 | 4178 | ||
4135 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | 4179 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); |
4136 | while (start <= end) { | 4180 | while (start <= end) { |
4137 | eb = btrfs_find_tree_block(root, start); | 4181 | eb = btrfs_find_tree_block(root->fs_info, start); |
4138 | start += root->nodesize; | 4182 | start += root->nodesize; |
4139 | if (!eb) | 4183 | if (!eb) |
4140 | continue; | 4184 | continue; |
@@ -4285,7 +4329,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
4285 | return 0; | 4329 | return 0; |
4286 | } | 4330 | } |
4287 | 4331 | ||
4288 | static struct extent_io_ops btree_extent_io_ops = { | 4332 | static const struct extent_io_ops btree_extent_io_ops = { |
4289 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 4333 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
4290 | .readpage_io_failed_hook = btree_io_failed_hook, | 4334 | .readpage_io_failed_hook = btree_io_failed_hook, |
4291 | .submit_bio_hook = btree_submit_bio_hook, | 4335 | .submit_bio_hook = btree_submit_bio_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 27d44c0fd236..d4cbfeeeedd4 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -52,7 +52,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, | |||
52 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 52 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
53 | u64 bytenr); | 53 | u64 bytenr); |
54 | void clean_tree_block(struct btrfs_trans_handle *trans, | 54 | void clean_tree_block(struct btrfs_trans_handle *trans, |
55 | struct btrfs_root *root, struct extent_buffer *buf); | 55 | struct btrfs_fs_info *fs_info, struct extent_buffer *buf); |
56 | int open_ctree(struct super_block *sb, | 56 | int open_ctree(struct super_block *sb, |
57 | struct btrfs_fs_devices *fs_devices, | 57 | struct btrfs_fs_devices *fs_devices, |
58 | char *options); | 58 | char *options); |
@@ -61,7 +61,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
61 | struct btrfs_root *root, int max_mirrors); | 61 | struct btrfs_root *root, int max_mirrors); |
62 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | 62 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); |
63 | int btrfs_commit_super(struct btrfs_root *root); | 63 | int btrfs_commit_super(struct btrfs_root *root); |
64 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 64 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, |
65 | u64 bytenr); | 65 | u64 bytenr); |
66 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | 66 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, |
67 | struct btrfs_key *location); | 67 | struct btrfs_key *location); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8b353ad02f03..1eef4ee01d1a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -2538,6 +2538,12 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2538 | * list before we release it. | 2538 | * list before we release it. |
2539 | */ | 2539 | */ |
2540 | if (btrfs_delayed_ref_is_head(ref)) { | 2540 | if (btrfs_delayed_ref_is_head(ref)) { |
2541 | if (locked_ref->is_data && | ||
2542 | locked_ref->total_ref_mod < 0) { | ||
2543 | spin_lock(&delayed_refs->lock); | ||
2544 | delayed_refs->pending_csums -= ref->num_bytes; | ||
2545 | spin_unlock(&delayed_refs->lock); | ||
2546 | } | ||
2541 | btrfs_delayed_ref_unlock(locked_ref); | 2547 | btrfs_delayed_ref_unlock(locked_ref); |
2542 | locked_ref = NULL; | 2548 | locked_ref = NULL; |
2543 | } | 2549 | } |
@@ -2561,8 +2567,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2561 | */ | 2567 | */ |
2562 | spin_lock(&delayed_refs->lock); | 2568 | spin_lock(&delayed_refs->lock); |
2563 | avg = fs_info->avg_delayed_ref_runtime * 3 + runtime; | 2569 | avg = fs_info->avg_delayed_ref_runtime * 3 + runtime; |
2564 | avg = div64_u64(avg, 4); | 2570 | fs_info->avg_delayed_ref_runtime = avg >> 2; /* div by 4 */ |
2565 | fs_info->avg_delayed_ref_runtime = avg; | ||
2566 | spin_unlock(&delayed_refs->lock); | 2571 | spin_unlock(&delayed_refs->lock); |
2567 | } | 2572 | } |
2568 | return 0; | 2573 | return 0; |
@@ -2624,7 +2629,26 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) | |||
2624 | * We don't ever fill up leaves all the way so multiply by 2 just to be | 2629 | * We don't ever fill up leaves all the way so multiply by 2 just to be |
2625 | * closer to what we're really going to want to ouse. | 2630 | * closer to what we're really going to want to ouse. |
2626 | */ | 2631 | */ |
2627 | return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); | 2632 | return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); |
2633 | } | ||
2634 | |||
2635 | /* | ||
2636 | * Takes the number of bytes to be csumm'ed and figures out how many leaves it | ||
2637 | * would require to store the csums for that many bytes. | ||
2638 | */ | ||
2639 | u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes) | ||
2640 | { | ||
2641 | u64 csum_size; | ||
2642 | u64 num_csums_per_leaf; | ||
2643 | u64 num_csums; | ||
2644 | |||
2645 | csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); | ||
2646 | num_csums_per_leaf = div64_u64(csum_size, | ||
2647 | (u64)btrfs_super_csum_size(root->fs_info->super_copy)); | ||
2648 | num_csums = div64_u64(csum_bytes, root->sectorsize); | ||
2649 | num_csums += num_csums_per_leaf - 1; | ||
2650 | num_csums = div64_u64(num_csums, num_csums_per_leaf); | ||
2651 | return num_csums; | ||
2628 | } | 2652 | } |
2629 | 2653 | ||
2630 | int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, | 2654 | int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, |
@@ -2632,7 +2656,9 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, | |||
2632 | { | 2656 | { |
2633 | struct btrfs_block_rsv *global_rsv; | 2657 | struct btrfs_block_rsv *global_rsv; |
2634 | u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; | 2658 | u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; |
2635 | u64 num_bytes; | 2659 | u64 csum_bytes = trans->transaction->delayed_refs.pending_csums; |
2660 | u64 num_dirty_bgs = trans->transaction->num_dirty_bgs; | ||
2661 | u64 num_bytes, num_dirty_bgs_bytes; | ||
2636 | int ret = 0; | 2662 | int ret = 0; |
2637 | 2663 | ||
2638 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 2664 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
@@ -2640,17 +2666,22 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, | |||
2640 | if (num_heads > 1) | 2666 | if (num_heads > 1) |
2641 | num_bytes += (num_heads - 1) * root->nodesize; | 2667 | num_bytes += (num_heads - 1) * root->nodesize; |
2642 | num_bytes <<= 1; | 2668 | num_bytes <<= 1; |
2669 | num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize; | ||
2670 | num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root, | ||
2671 | num_dirty_bgs); | ||
2643 | global_rsv = &root->fs_info->global_block_rsv; | 2672 | global_rsv = &root->fs_info->global_block_rsv; |
2644 | 2673 | ||
2645 | /* | 2674 | /* |
2646 | * If we can't allocate any more chunks lets make sure we have _lots_ of | 2675 | * If we can't allocate any more chunks lets make sure we have _lots_ of |
2647 | * wiggle room since running delayed refs can create more delayed refs. | 2676 | * wiggle room since running delayed refs can create more delayed refs. |
2648 | */ | 2677 | */ |
2649 | if (global_rsv->space_info->full) | 2678 | if (global_rsv->space_info->full) { |
2679 | num_dirty_bgs_bytes <<= 1; | ||
2650 | num_bytes <<= 1; | 2680 | num_bytes <<= 1; |
2681 | } | ||
2651 | 2682 | ||
2652 | spin_lock(&global_rsv->lock); | 2683 | spin_lock(&global_rsv->lock); |
2653 | if (global_rsv->reserved <= num_bytes) | 2684 | if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes) |
2654 | ret = 1; | 2685 | ret = 1; |
2655 | spin_unlock(&global_rsv->lock); | 2686 | spin_unlock(&global_rsv->lock); |
2656 | return ret; | 2687 | return ret; |
@@ -3193,7 +3224,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, | |||
3193 | struct inode *inode = NULL; | 3224 | struct inode *inode = NULL; |
3194 | u64 alloc_hint = 0; | 3225 | u64 alloc_hint = 0; |
3195 | int dcs = BTRFS_DC_ERROR; | 3226 | int dcs = BTRFS_DC_ERROR; |
3196 | int num_pages = 0; | 3227 | u64 num_pages = 0; |
3197 | int retries = 0; | 3228 | int retries = 0; |
3198 | int ret = 0; | 3229 | int ret = 0; |
3199 | 3230 | ||
@@ -3267,7 +3298,7 @@ again: | |||
3267 | if (ret) | 3298 | if (ret) |
3268 | goto out_put; | 3299 | goto out_put; |
3269 | 3300 | ||
3270 | ret = btrfs_truncate_free_space_cache(root, trans, inode); | 3301 | ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode); |
3271 | if (ret) | 3302 | if (ret) |
3272 | goto out_put; | 3303 | goto out_put; |
3273 | } | 3304 | } |
@@ -3293,14 +3324,14 @@ again: | |||
3293 | * taking up quite a bit since it's not folded into the other space | 3324 | * taking up quite a bit since it's not folded into the other space |
3294 | * cache. | 3325 | * cache. |
3295 | */ | 3326 | */ |
3296 | num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024); | 3327 | num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024); |
3297 | if (!num_pages) | 3328 | if (!num_pages) |
3298 | num_pages = 1; | 3329 | num_pages = 1; |
3299 | 3330 | ||
3300 | num_pages *= 16; | 3331 | num_pages *= 16; |
3301 | num_pages *= PAGE_CACHE_SIZE; | 3332 | num_pages *= PAGE_CACHE_SIZE; |
3302 | 3333 | ||
3303 | ret = btrfs_check_data_free_space(inode, num_pages); | 3334 | ret = btrfs_check_data_free_space(inode, num_pages, num_pages); |
3304 | if (ret) | 3335 | if (ret) |
3305 | goto out_put; | 3336 | goto out_put; |
3306 | 3337 | ||
@@ -3351,16 +3382,156 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, | |||
3351 | return 0; | 3382 | return 0; |
3352 | } | 3383 | } |
3353 | 3384 | ||
3354 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 3385 | /* |
3386 | * transaction commit does final block group cache writeback during a | ||
3387 | * critical section where nothing is allowed to change the FS. This is | ||
3388 | * required in order for the cache to actually match the block group, | ||
3389 | * but can introduce a lot of latency into the commit. | ||
3390 | * | ||
3391 | * So, btrfs_start_dirty_block_groups is here to kick off block group | ||
3392 | * cache IO. There's a chance we'll have to redo some of it if the | ||
3393 | * block group changes again during the commit, but it greatly reduces | ||
3394 | * the commit latency by getting rid of the easy block groups while | ||
3395 | * we're still allowing others to join the commit. | ||
3396 | */ | ||
3397 | int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, | ||
3355 | struct btrfs_root *root) | 3398 | struct btrfs_root *root) |
3356 | { | 3399 | { |
3357 | struct btrfs_block_group_cache *cache; | 3400 | struct btrfs_block_group_cache *cache; |
3358 | struct btrfs_transaction *cur_trans = trans->transaction; | 3401 | struct btrfs_transaction *cur_trans = trans->transaction; |
3359 | int ret = 0; | 3402 | int ret = 0; |
3360 | struct btrfs_path *path; | 3403 | int should_put; |
3404 | struct btrfs_path *path = NULL; | ||
3405 | LIST_HEAD(dirty); | ||
3406 | struct list_head *io = &cur_trans->io_bgs; | ||
3407 | int num_started = 0; | ||
3408 | int loops = 0; | ||
3409 | |||
3410 | spin_lock(&cur_trans->dirty_bgs_lock); | ||
3411 | if (!list_empty(&cur_trans->dirty_bgs)) { | ||
3412 | list_splice_init(&cur_trans->dirty_bgs, &dirty); | ||
3413 | } | ||
3414 | spin_unlock(&cur_trans->dirty_bgs_lock); | ||
3361 | 3415 | ||
3362 | if (list_empty(&cur_trans->dirty_bgs)) | 3416 | again: |
3417 | if (list_empty(&dirty)) { | ||
3418 | btrfs_free_path(path); | ||
3363 | return 0; | 3419 | return 0; |
3420 | } | ||
3421 | |||
3422 | /* | ||
3423 | * make sure all the block groups on our dirty list actually | ||
3424 | * exist | ||
3425 | */ | ||
3426 | btrfs_create_pending_block_groups(trans, root); | ||
3427 | |||
3428 | if (!path) { | ||
3429 | path = btrfs_alloc_path(); | ||
3430 | if (!path) | ||
3431 | return -ENOMEM; | ||
3432 | } | ||
3433 | |||
3434 | while (!list_empty(&dirty)) { | ||
3435 | cache = list_first_entry(&dirty, | ||
3436 | struct btrfs_block_group_cache, | ||
3437 | dirty_list); | ||
3438 | |||
3439 | /* | ||
3440 | * cache_write_mutex is here only to save us from balance | ||
3441 | * deleting this block group while we are writing out the | ||
3442 | * cache | ||
3443 | */ | ||
3444 | mutex_lock(&trans->transaction->cache_write_mutex); | ||
3445 | |||
3446 | /* | ||
3447 | * this can happen if something re-dirties a block | ||
3448 | * group that is already under IO. Just wait for it to | ||
3449 | * finish and then do it all again | ||
3450 | */ | ||
3451 | if (!list_empty(&cache->io_list)) { | ||
3452 | list_del_init(&cache->io_list); | ||
3453 | btrfs_wait_cache_io(root, trans, cache, | ||
3454 | &cache->io_ctl, path, | ||
3455 | cache->key.objectid); | ||
3456 | btrfs_put_block_group(cache); | ||
3457 | } | ||
3458 | |||
3459 | |||
3460 | /* | ||
3461 | * btrfs_wait_cache_io uses the cache->dirty_list to decide | ||
3462 | * if it should update the cache_state. Don't delete | ||
3463 | * until after we wait. | ||
3464 | * | ||
3465 | * Since we're not running in the commit critical section | ||
3466 | * we need the dirty_bgs_lock to protect from update_block_group | ||
3467 | */ | ||
3468 | spin_lock(&cur_trans->dirty_bgs_lock); | ||
3469 | list_del_init(&cache->dirty_list); | ||
3470 | spin_unlock(&cur_trans->dirty_bgs_lock); | ||
3471 | |||
3472 | should_put = 1; | ||
3473 | |||
3474 | cache_save_setup(cache, trans, path); | ||
3475 | |||
3476 | if (cache->disk_cache_state == BTRFS_DC_SETUP) { | ||
3477 | cache->io_ctl.inode = NULL; | ||
3478 | ret = btrfs_write_out_cache(root, trans, cache, path); | ||
3479 | if (ret == 0 && cache->io_ctl.inode) { | ||
3480 | num_started++; | ||
3481 | should_put = 0; | ||
3482 | |||
3483 | /* | ||
3484 | * the cache_write_mutex is protecting | ||
3485 | * the io_list | ||
3486 | */ | ||
3487 | list_add_tail(&cache->io_list, io); | ||
3488 | } else { | ||
3489 | /* | ||
3490 | * if we failed to write the cache, the | ||
3491 | * generation will be bad and life goes on | ||
3492 | */ | ||
3493 | ret = 0; | ||
3494 | } | ||
3495 | } | ||
3496 | if (!ret) | ||
3497 | ret = write_one_cache_group(trans, root, path, cache); | ||
3498 | mutex_unlock(&trans->transaction->cache_write_mutex); | ||
3499 | |||
3500 | /* if its not on the io list, we need to put the block group */ | ||
3501 | if (should_put) | ||
3502 | btrfs_put_block_group(cache); | ||
3503 | |||
3504 | if (ret) | ||
3505 | break; | ||
3506 | } | ||
3507 | |||
3508 | /* | ||
3509 | * go through delayed refs for all the stuff we've just kicked off | ||
3510 | * and then loop back (just once) | ||
3511 | */ | ||
3512 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
3513 | if (!ret && loops == 0) { | ||
3514 | loops++; | ||
3515 | spin_lock(&cur_trans->dirty_bgs_lock); | ||
3516 | list_splice_init(&cur_trans->dirty_bgs, &dirty); | ||
3517 | spin_unlock(&cur_trans->dirty_bgs_lock); | ||
3518 | goto again; | ||
3519 | } | ||
3520 | |||
3521 | btrfs_free_path(path); | ||
3522 | return ret; | ||
3523 | } | ||
3524 | |||
3525 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | ||
3526 | struct btrfs_root *root) | ||
3527 | { | ||
3528 | struct btrfs_block_group_cache *cache; | ||
3529 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
3530 | int ret = 0; | ||
3531 | int should_put; | ||
3532 | struct btrfs_path *path; | ||
3533 | struct list_head *io = &cur_trans->io_bgs; | ||
3534 | int num_started = 0; | ||
3364 | 3535 | ||
3365 | path = btrfs_alloc_path(); | 3536 | path = btrfs_alloc_path(); |
3366 | if (!path) | 3537 | if (!path) |
@@ -3376,16 +3547,61 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
3376 | cache = list_first_entry(&cur_trans->dirty_bgs, | 3547 | cache = list_first_entry(&cur_trans->dirty_bgs, |
3377 | struct btrfs_block_group_cache, | 3548 | struct btrfs_block_group_cache, |
3378 | dirty_list); | 3549 | dirty_list); |
3550 | |||
3551 | /* | ||
3552 | * this can happen if cache_save_setup re-dirties a block | ||
3553 | * group that is already under IO. Just wait for it to | ||
3554 | * finish and then do it all again | ||
3555 | */ | ||
3556 | if (!list_empty(&cache->io_list)) { | ||
3557 | list_del_init(&cache->io_list); | ||
3558 | btrfs_wait_cache_io(root, trans, cache, | ||
3559 | &cache->io_ctl, path, | ||
3560 | cache->key.objectid); | ||
3561 | btrfs_put_block_group(cache); | ||
3562 | } | ||
3563 | |||
3564 | /* | ||
3565 | * don't remove from the dirty list until after we've waited | ||
3566 | * on any pending IO | ||
3567 | */ | ||
3379 | list_del_init(&cache->dirty_list); | 3568 | list_del_init(&cache->dirty_list); |
3380 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) | 3569 | should_put = 1; |
3381 | cache_save_setup(cache, trans, path); | 3570 | |
3571 | cache_save_setup(cache, trans, path); | ||
3572 | |||
3382 | if (!ret) | 3573 | if (!ret) |
3383 | ret = btrfs_run_delayed_refs(trans, root, | 3574 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1); |
3384 | (unsigned long) -1); | 3575 | |
3385 | if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) | 3576 | if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) { |
3386 | btrfs_write_out_cache(root, trans, cache, path); | 3577 | cache->io_ctl.inode = NULL; |
3578 | ret = btrfs_write_out_cache(root, trans, cache, path); | ||
3579 | if (ret == 0 && cache->io_ctl.inode) { | ||
3580 | num_started++; | ||
3581 | should_put = 0; | ||
3582 | list_add_tail(&cache->io_list, io); | ||
3583 | } else { | ||
3584 | /* | ||
3585 | * if we failed to write the cache, the | ||
3586 | * generation will be bad and life goes on | ||
3587 | */ | ||
3588 | ret = 0; | ||
3589 | } | ||
3590 | } | ||
3387 | if (!ret) | 3591 | if (!ret) |
3388 | ret = write_one_cache_group(trans, root, path, cache); | 3592 | ret = write_one_cache_group(trans, root, path, cache); |
3593 | |||
3594 | /* if its not on the io list, we need to put the block group */ | ||
3595 | if (should_put) | ||
3596 | btrfs_put_block_group(cache); | ||
3597 | } | ||
3598 | |||
3599 | while (!list_empty(io)) { | ||
3600 | cache = list_first_entry(io, struct btrfs_block_group_cache, | ||
3601 | io_list); | ||
3602 | list_del_init(&cache->io_list); | ||
3603 | btrfs_wait_cache_io(root, trans, cache, | ||
3604 | &cache->io_ctl, path, cache->key.objectid); | ||
3389 | btrfs_put_block_group(cache); | 3605 | btrfs_put_block_group(cache); |
3390 | } | 3606 | } |
3391 | 3607 | ||
@@ -3635,19 +3851,21 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | |||
3635 | * This will check the space that the inode allocates from to make sure we have | 3851 | * This will check the space that the inode allocates from to make sure we have |
3636 | * enough space for bytes. | 3852 | * enough space for bytes. |
3637 | */ | 3853 | */ |
3638 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | 3854 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes) |
3639 | { | 3855 | { |
3640 | struct btrfs_space_info *data_sinfo; | 3856 | struct btrfs_space_info *data_sinfo; |
3641 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3857 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3642 | struct btrfs_fs_info *fs_info = root->fs_info; | 3858 | struct btrfs_fs_info *fs_info = root->fs_info; |
3643 | u64 used; | 3859 | u64 used; |
3644 | int ret = 0, committed = 0, alloc_chunk = 1; | 3860 | int ret = 0; |
3861 | int need_commit = 2; | ||
3862 | int have_pinned_space; | ||
3645 | 3863 | ||
3646 | /* make sure bytes are sectorsize aligned */ | 3864 | /* make sure bytes are sectorsize aligned */ |
3647 | bytes = ALIGN(bytes, root->sectorsize); | 3865 | bytes = ALIGN(bytes, root->sectorsize); |
3648 | 3866 | ||
3649 | if (btrfs_is_free_space_inode(inode)) { | 3867 | if (btrfs_is_free_space_inode(inode)) { |
3650 | committed = 1; | 3868 | need_commit = 0; |
3651 | ASSERT(current->journal_info); | 3869 | ASSERT(current->journal_info); |
3652 | } | 3870 | } |
3653 | 3871 | ||
@@ -3669,7 +3887,7 @@ again: | |||
3669 | * if we don't have enough free bytes in this space then we need | 3887 | * if we don't have enough free bytes in this space then we need |
3670 | * to alloc a new chunk. | 3888 | * to alloc a new chunk. |
3671 | */ | 3889 | */ |
3672 | if (!data_sinfo->full && alloc_chunk) { | 3890 | if (!data_sinfo->full) { |
3673 | u64 alloc_target; | 3891 | u64 alloc_target; |
3674 | 3892 | ||
3675 | data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; | 3893 | data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; |
@@ -3697,8 +3915,10 @@ alloc: | |||
3697 | if (ret < 0) { | 3915 | if (ret < 0) { |
3698 | if (ret != -ENOSPC) | 3916 | if (ret != -ENOSPC) |
3699 | return ret; | 3917 | return ret; |
3700 | else | 3918 | else { |
3919 | have_pinned_space = 1; | ||
3701 | goto commit_trans; | 3920 | goto commit_trans; |
3921 | } | ||
3702 | } | 3922 | } |
3703 | 3923 | ||
3704 | if (!data_sinfo) | 3924 | if (!data_sinfo) |
@@ -3709,26 +3929,39 @@ alloc: | |||
3709 | 3929 | ||
3710 | /* | 3930 | /* |
3711 | * If we don't have enough pinned space to deal with this | 3931 | * If we don't have enough pinned space to deal with this |
3712 | * allocation don't bother committing the transaction. | 3932 | * allocation, and no removed chunk in current transaction, |
3933 | * don't bother committing the transaction. | ||
3713 | */ | 3934 | */ |
3714 | if (percpu_counter_compare(&data_sinfo->total_bytes_pinned, | 3935 | have_pinned_space = percpu_counter_compare( |
3715 | bytes) < 0) | 3936 | &data_sinfo->total_bytes_pinned, |
3716 | committed = 1; | 3937 | used + bytes - data_sinfo->total_bytes); |
3717 | spin_unlock(&data_sinfo->lock); | 3938 | spin_unlock(&data_sinfo->lock); |
3718 | 3939 | ||
3719 | /* commit the current transaction and try again */ | 3940 | /* commit the current transaction and try again */ |
3720 | commit_trans: | 3941 | commit_trans: |
3721 | if (!committed && | 3942 | if (need_commit && |
3722 | !atomic_read(&root->fs_info->open_ioctl_trans)) { | 3943 | !atomic_read(&root->fs_info->open_ioctl_trans)) { |
3723 | committed = 1; | 3944 | need_commit--; |
3724 | 3945 | ||
3725 | trans = btrfs_join_transaction(root); | 3946 | trans = btrfs_join_transaction(root); |
3726 | if (IS_ERR(trans)) | 3947 | if (IS_ERR(trans)) |
3727 | return PTR_ERR(trans); | 3948 | return PTR_ERR(trans); |
3728 | ret = btrfs_commit_transaction(trans, root); | 3949 | if (have_pinned_space >= 0 || |
3729 | if (ret) | 3950 | trans->transaction->have_free_bgs || |
3730 | return ret; | 3951 | need_commit > 0) { |
3731 | goto again; | 3952 | ret = btrfs_commit_transaction(trans, root); |
3953 | if (ret) | ||
3954 | return ret; | ||
3955 | /* | ||
3956 | * make sure that all running delayed iput are | ||
3957 | * done | ||
3958 | */ | ||
3959 | down_write(&root->fs_info->delayed_iput_sem); | ||
3960 | up_write(&root->fs_info->delayed_iput_sem); | ||
3961 | goto again; | ||
3962 | } else { | ||
3963 | btrfs_end_transaction(trans, root); | ||
3964 | } | ||
3732 | } | 3965 | } |
3733 | 3966 | ||
3734 | trace_btrfs_space_reservation(root->fs_info, | 3967 | trace_btrfs_space_reservation(root->fs_info, |
@@ -3736,12 +3969,16 @@ commit_trans: | |||
3736 | data_sinfo->flags, bytes, 1); | 3969 | data_sinfo->flags, bytes, 1); |
3737 | return -ENOSPC; | 3970 | return -ENOSPC; |
3738 | } | 3971 | } |
3972 | ret = btrfs_qgroup_reserve(root, write_bytes); | ||
3973 | if (ret) | ||
3974 | goto out; | ||
3739 | data_sinfo->bytes_may_use += bytes; | 3975 | data_sinfo->bytes_may_use += bytes; |
3740 | trace_btrfs_space_reservation(root->fs_info, "space_info", | 3976 | trace_btrfs_space_reservation(root->fs_info, "space_info", |
3741 | data_sinfo->flags, bytes, 1); | 3977 | data_sinfo->flags, bytes, 1); |
3978 | out: | ||
3742 | spin_unlock(&data_sinfo->lock); | 3979 | spin_unlock(&data_sinfo->lock); |
3743 | 3980 | ||
3744 | return 0; | 3981 | return ret; |
3745 | } | 3982 | } |
3746 | 3983 | ||
3747 | /* | 3984 | /* |
@@ -4298,8 +4535,13 @@ out: | |||
4298 | static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, | 4535 | static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, |
4299 | struct btrfs_fs_info *fs_info, u64 used) | 4536 | struct btrfs_fs_info *fs_info, u64 used) |
4300 | { | 4537 | { |
4301 | return (used >= div_factor_fine(space_info->total_bytes, 98) && | 4538 | u64 thresh = div_factor_fine(space_info->total_bytes, 98); |
4302 | !btrfs_fs_closing(fs_info) && | 4539 | |
4540 | /* If we're just plain full then async reclaim just slows us down. */ | ||
4541 | if (space_info->bytes_used >= thresh) | ||
4542 | return 0; | ||
4543 | |||
4544 | return (used >= thresh && !btrfs_fs_closing(fs_info) && | ||
4303 | !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); | 4545 | !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); |
4304 | } | 4546 | } |
4305 | 4547 | ||
@@ -4354,10 +4596,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | |||
4354 | if (!btrfs_need_do_async_reclaim(space_info, fs_info, | 4596 | if (!btrfs_need_do_async_reclaim(space_info, fs_info, |
4355 | flush_state)) | 4597 | flush_state)) |
4356 | return; | 4598 | return; |
4357 | } while (flush_state <= COMMIT_TRANS); | 4599 | } while (flush_state < COMMIT_TRANS); |
4358 | |||
4359 | if (btrfs_need_do_async_reclaim(space_info, fs_info, flush_state)) | ||
4360 | queue_work(system_unbound_wq, work); | ||
4361 | } | 4600 | } |
4362 | 4601 | ||
4363 | void btrfs_init_async_reclaim_work(struct work_struct *work) | 4602 | void btrfs_init_async_reclaim_work(struct work_struct *work) |
@@ -4700,6 +4939,11 @@ void btrfs_free_block_rsv(struct btrfs_root *root, | |||
4700 | kfree(rsv); | 4939 | kfree(rsv); |
4701 | } | 4940 | } |
4702 | 4941 | ||
4942 | void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv) | ||
4943 | { | ||
4944 | kfree(rsv); | ||
4945 | } | ||
4946 | |||
4703 | int btrfs_block_rsv_add(struct btrfs_root *root, | 4947 | int btrfs_block_rsv_add(struct btrfs_root *root, |
4704 | struct btrfs_block_rsv *block_rsv, u64 num_bytes, | 4948 | struct btrfs_block_rsv *block_rsv, u64 num_bytes, |
4705 | enum btrfs_reserve_flush_enum flush) | 4949 | enum btrfs_reserve_flush_enum flush) |
@@ -4812,10 +5056,10 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | |||
4812 | 5056 | ||
4813 | num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * | 5057 | num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * |
4814 | csum_size * 2; | 5058 | csum_size * 2; |
4815 | num_bytes += div64_u64(data_used + meta_used, 50); | 5059 | num_bytes += div_u64(data_used + meta_used, 50); |
4816 | 5060 | ||
4817 | if (num_bytes * 3 > meta_used) | 5061 | if (num_bytes * 3 > meta_used) |
4818 | num_bytes = div64_u64(meta_used, 3); | 5062 | num_bytes = div_u64(meta_used, 3); |
4819 | 5063 | ||
4820 | return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10); | 5064 | return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10); |
4821 | } | 5065 | } |
@@ -4998,8 +5242,6 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root, | |||
4998 | u64 qgroup_reserved) | 5242 | u64 qgroup_reserved) |
4999 | { | 5243 | { |
5000 | btrfs_block_rsv_release(root, rsv, (u64)-1); | 5244 | btrfs_block_rsv_release(root, rsv, (u64)-1); |
5001 | if (qgroup_reserved) | ||
5002 | btrfs_qgroup_free(root, qgroup_reserved); | ||
5003 | } | 5245 | } |
5004 | 5246 | ||
5005 | /** | 5247 | /** |
@@ -5066,30 +5308,18 @@ static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes, | |||
5066 | int reserve) | 5308 | int reserve) |
5067 | { | 5309 | { |
5068 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5310 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5069 | u64 csum_size; | 5311 | u64 old_csums, num_csums; |
5070 | int num_csums_per_leaf; | ||
5071 | int num_csums; | ||
5072 | int old_csums; | ||
5073 | 5312 | ||
5074 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM && | 5313 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM && |
5075 | BTRFS_I(inode)->csum_bytes == 0) | 5314 | BTRFS_I(inode)->csum_bytes == 0) |
5076 | return 0; | 5315 | return 0; |
5077 | 5316 | ||
5078 | old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); | 5317 | old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes); |
5079 | if (reserve) | 5318 | if (reserve) |
5080 | BTRFS_I(inode)->csum_bytes += num_bytes; | 5319 | BTRFS_I(inode)->csum_bytes += num_bytes; |
5081 | else | 5320 | else |
5082 | BTRFS_I(inode)->csum_bytes -= num_bytes; | 5321 | BTRFS_I(inode)->csum_bytes -= num_bytes; |
5083 | csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); | 5322 | num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes); |
5084 | num_csums_per_leaf = (int)div64_u64(csum_size, | ||
5085 | sizeof(struct btrfs_csum_item) + | ||
5086 | sizeof(struct btrfs_disk_key)); | ||
5087 | num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); | ||
5088 | num_csums = num_csums + num_csums_per_leaf - 1; | ||
5089 | num_csums = num_csums / num_csums_per_leaf; | ||
5090 | |||
5091 | old_csums = old_csums + num_csums_per_leaf - 1; | ||
5092 | old_csums = old_csums / num_csums_per_leaf; | ||
5093 | 5323 | ||
5094 | /* No change, no need to reserve more */ | 5324 | /* No change, no need to reserve more */ |
5095 | if (old_csums == num_csums) | 5325 | if (old_csums == num_csums) |
@@ -5163,8 +5393,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
5163 | spin_unlock(&BTRFS_I(inode)->lock); | 5393 | spin_unlock(&BTRFS_I(inode)->lock); |
5164 | 5394 | ||
5165 | if (root->fs_info->quota_enabled) { | 5395 | if (root->fs_info->quota_enabled) { |
5166 | ret = btrfs_qgroup_reserve(root, num_bytes + | 5396 | ret = btrfs_qgroup_reserve(root, nr_extents * root->nodesize); |
5167 | nr_extents * root->nodesize); | ||
5168 | if (ret) | 5397 | if (ret) |
5169 | goto out_fail; | 5398 | goto out_fail; |
5170 | } | 5399 | } |
@@ -5172,8 +5401,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
5172 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); | 5401 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
5173 | if (unlikely(ret)) { | 5402 | if (unlikely(ret)) { |
5174 | if (root->fs_info->quota_enabled) | 5403 | if (root->fs_info->quota_enabled) |
5175 | btrfs_qgroup_free(root, num_bytes + | 5404 | btrfs_qgroup_free(root, nr_extents * root->nodesize); |
5176 | nr_extents * root->nodesize); | ||
5177 | goto out_fail; | 5405 | goto out_fail; |
5178 | } | 5406 | } |
5179 | 5407 | ||
@@ -5290,10 +5518,6 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
5290 | 5518 | ||
5291 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | 5519 | trace_btrfs_space_reservation(root->fs_info, "delalloc", |
5292 | btrfs_ino(inode), to_free, 0); | 5520 | btrfs_ino(inode), to_free, 0); |
5293 | if (root->fs_info->quota_enabled) { | ||
5294 | btrfs_qgroup_free(root, num_bytes + | ||
5295 | dropped * root->nodesize); | ||
5296 | } | ||
5297 | 5521 | ||
5298 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 5522 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
5299 | to_free); | 5523 | to_free); |
@@ -5318,7 +5542,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | |||
5318 | { | 5542 | { |
5319 | int ret; | 5543 | int ret; |
5320 | 5544 | ||
5321 | ret = btrfs_check_data_free_space(inode, num_bytes); | 5545 | ret = btrfs_check_data_free_space(inode, num_bytes, num_bytes); |
5322 | if (ret) | 5546 | if (ret) |
5323 | return ret; | 5547 | return ret; |
5324 | 5548 | ||
@@ -5390,14 +5614,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
5390 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | 5614 | if (!alloc && cache->cached == BTRFS_CACHE_NO) |
5391 | cache_block_group(cache, 1); | 5615 | cache_block_group(cache, 1); |
5392 | 5616 | ||
5393 | spin_lock(&trans->transaction->dirty_bgs_lock); | ||
5394 | if (list_empty(&cache->dirty_list)) { | ||
5395 | list_add_tail(&cache->dirty_list, | ||
5396 | &trans->transaction->dirty_bgs); | ||
5397 | btrfs_get_block_group(cache); | ||
5398 | } | ||
5399 | spin_unlock(&trans->transaction->dirty_bgs_lock); | ||
5400 | |||
5401 | byte_in_group = bytenr - cache->key.objectid; | 5617 | byte_in_group = bytenr - cache->key.objectid; |
5402 | WARN_ON(byte_in_group > cache->key.offset); | 5618 | WARN_ON(byte_in_group > cache->key.offset); |
5403 | 5619 | ||
@@ -5446,6 +5662,16 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
5446 | spin_unlock(&info->unused_bgs_lock); | 5662 | spin_unlock(&info->unused_bgs_lock); |
5447 | } | 5663 | } |
5448 | } | 5664 | } |
5665 | |||
5666 | spin_lock(&trans->transaction->dirty_bgs_lock); | ||
5667 | if (list_empty(&cache->dirty_list)) { | ||
5668 | list_add_tail(&cache->dirty_list, | ||
5669 | &trans->transaction->dirty_bgs); | ||
5670 | trans->transaction->num_dirty_bgs++; | ||
5671 | btrfs_get_block_group(cache); | ||
5672 | } | ||
5673 | spin_unlock(&trans->transaction->dirty_bgs_lock); | ||
5674 | |||
5449 | btrfs_put_block_group(cache); | 5675 | btrfs_put_block_group(cache); |
5450 | total -= num_bytes; | 5676 | total -= num_bytes; |
5451 | bytenr += num_bytes; | 5677 | bytenr += num_bytes; |
@@ -6956,15 +7182,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, | |||
6956 | return -ENOSPC; | 7182 | return -ENOSPC; |
6957 | } | 7183 | } |
6958 | 7184 | ||
6959 | if (btrfs_test_opt(root, DISCARD)) | ||
6960 | ret = btrfs_discard_extent(root, start, len, NULL); | ||
6961 | |||
6962 | if (pin) | 7185 | if (pin) |
6963 | pin_down_extent(root, cache, start, len, 1); | 7186 | pin_down_extent(root, cache, start, len, 1); |
6964 | else { | 7187 | else { |
7188 | if (btrfs_test_opt(root, DISCARD)) | ||
7189 | ret = btrfs_discard_extent(root, start, len, NULL); | ||
6965 | btrfs_add_free_space(cache, start, len); | 7190 | btrfs_add_free_space(cache, start, len); |
6966 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); | 7191 | btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); |
6967 | } | 7192 | } |
7193 | |||
6968 | btrfs_put_block_group(cache); | 7194 | btrfs_put_block_group(cache); |
6969 | 7195 | ||
6970 | trace_btrfs_reserved_extent_free(root, start, len); | 7196 | trace_btrfs_reserved_extent_free(root, start, len); |
@@ -7095,9 +7321,9 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
7095 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, | 7321 | ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, |
7096 | ins, size); | 7322 | ins, size); |
7097 | if (ret) { | 7323 | if (ret) { |
7324 | btrfs_free_path(path); | ||
7098 | btrfs_free_and_pin_reserved_extent(root, ins->objectid, | 7325 | btrfs_free_and_pin_reserved_extent(root, ins->objectid, |
7099 | root->nodesize); | 7326 | root->nodesize); |
7100 | btrfs_free_path(path); | ||
7101 | return ret; | 7327 | return ret; |
7102 | } | 7328 | } |
7103 | 7329 | ||
@@ -7217,7 +7443,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
7217 | btrfs_set_header_generation(buf, trans->transid); | 7443 | btrfs_set_header_generation(buf, trans->transid); |
7218 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); | 7444 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); |
7219 | btrfs_tree_lock(buf); | 7445 | btrfs_tree_lock(buf); |
7220 | clean_tree_block(trans, root, buf); | 7446 | clean_tree_block(trans, root->fs_info, buf); |
7221 | clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); | 7447 | clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); |
7222 | 7448 | ||
7223 | btrfs_set_lock_blocking(buf); | 7449 | btrfs_set_lock_blocking(buf); |
@@ -7815,7 +8041,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
7815 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | 8041 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); |
7816 | blocksize = root->nodesize; | 8042 | blocksize = root->nodesize; |
7817 | 8043 | ||
7818 | next = btrfs_find_tree_block(root, bytenr); | 8044 | next = btrfs_find_tree_block(root->fs_info, bytenr); |
7819 | if (!next) { | 8045 | if (!next) { |
7820 | next = btrfs_find_create_tree_block(root, bytenr); | 8046 | next = btrfs_find_create_tree_block(root, bytenr); |
7821 | if (!next) | 8047 | if (!next) |
@@ -8016,7 +8242,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
8016 | btrfs_set_lock_blocking(eb); | 8242 | btrfs_set_lock_blocking(eb); |
8017 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; | 8243 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
8018 | } | 8244 | } |
8019 | clean_tree_block(trans, root, eb); | 8245 | clean_tree_block(trans, root->fs_info, eb); |
8020 | } | 8246 | } |
8021 | 8247 | ||
8022 | if (eb == root->node) { | 8248 | if (eb == root->node) { |
@@ -8533,10 +8759,30 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
8533 | 8759 | ||
8534 | BUG_ON(cache->ro); | 8760 | BUG_ON(cache->ro); |
8535 | 8761 | ||
8762 | again: | ||
8536 | trans = btrfs_join_transaction(root); | 8763 | trans = btrfs_join_transaction(root); |
8537 | if (IS_ERR(trans)) | 8764 | if (IS_ERR(trans)) |
8538 | return PTR_ERR(trans); | 8765 | return PTR_ERR(trans); |
8539 | 8766 | ||
8767 | /* | ||
8768 | * we're not allowed to set block groups readonly after the dirty | ||
8769 | * block groups cache has started writing. If it already started, | ||
8770 | * back off and let this transaction commit | ||
8771 | */ | ||
8772 | mutex_lock(&root->fs_info->ro_block_group_mutex); | ||
8773 | if (trans->transaction->dirty_bg_run) { | ||
8774 | u64 transid = trans->transid; | ||
8775 | |||
8776 | mutex_unlock(&root->fs_info->ro_block_group_mutex); | ||
8777 | btrfs_end_transaction(trans, root); | ||
8778 | |||
8779 | ret = btrfs_wait_for_commit(root, transid); | ||
8780 | if (ret) | ||
8781 | return ret; | ||
8782 | goto again; | ||
8783 | } | ||
8784 | |||
8785 | |||
8540 | ret = set_block_group_ro(cache, 0); | 8786 | ret = set_block_group_ro(cache, 0); |
8541 | if (!ret) | 8787 | if (!ret) |
8542 | goto out; | 8788 | goto out; |
@@ -8551,6 +8797,7 @@ out: | |||
8551 | alloc_flags = update_block_group_flags(root, cache->flags); | 8797 | alloc_flags = update_block_group_flags(root, cache->flags); |
8552 | check_system_chunk(trans, root, alloc_flags); | 8798 | check_system_chunk(trans, root, alloc_flags); |
8553 | } | 8799 | } |
8800 | mutex_unlock(&root->fs_info->ro_block_group_mutex); | ||
8554 | 8801 | ||
8555 | btrfs_end_transaction(trans, root); | 8802 | btrfs_end_transaction(trans, root); |
8556 | return ret; | 8803 | return ret; |
@@ -8720,7 +8967,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
8720 | min_free <<= 1; | 8967 | min_free <<= 1; |
8721 | } else if (index == BTRFS_RAID_RAID0) { | 8968 | } else if (index == BTRFS_RAID_RAID0) { |
8722 | dev_min = fs_devices->rw_devices; | 8969 | dev_min = fs_devices->rw_devices; |
8723 | do_div(min_free, dev_min); | 8970 | min_free = div64_u64(min_free, dev_min); |
8724 | } | 8971 | } |
8725 | 8972 | ||
8726 | /* We need to do this so that we can look at pending chunks */ | 8973 | /* We need to do this so that we can look at pending chunks */ |
@@ -8992,6 +9239,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) | |||
8992 | INIT_LIST_HEAD(&cache->bg_list); | 9239 | INIT_LIST_HEAD(&cache->bg_list); |
8993 | INIT_LIST_HEAD(&cache->ro_list); | 9240 | INIT_LIST_HEAD(&cache->ro_list); |
8994 | INIT_LIST_HEAD(&cache->dirty_list); | 9241 | INIT_LIST_HEAD(&cache->dirty_list); |
9242 | INIT_LIST_HEAD(&cache->io_list); | ||
8995 | btrfs_init_free_space_ctl(cache); | 9243 | btrfs_init_free_space_ctl(cache); |
8996 | atomic_set(&cache->trimming, 0); | 9244 | atomic_set(&cache->trimming, 0); |
8997 | 9245 | ||
@@ -9355,7 +9603,38 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9355 | goto out; | 9603 | goto out; |
9356 | } | 9604 | } |
9357 | 9605 | ||
9606 | /* | ||
9607 | * get the inode first so any iput calls done for the io_list | ||
9608 | * aren't the final iput (no unlinks allowed now) | ||
9609 | */ | ||
9358 | inode = lookup_free_space_inode(tree_root, block_group, path); | 9610 | inode = lookup_free_space_inode(tree_root, block_group, path); |
9611 | |||
9612 | mutex_lock(&trans->transaction->cache_write_mutex); | ||
9613 | /* | ||
9614 | * make sure our free spache cache IO is done before remove the | ||
9615 | * free space inode | ||
9616 | */ | ||
9617 | spin_lock(&trans->transaction->dirty_bgs_lock); | ||
9618 | if (!list_empty(&block_group->io_list)) { | ||
9619 | list_del_init(&block_group->io_list); | ||
9620 | |||
9621 | WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode); | ||
9622 | |||
9623 | spin_unlock(&trans->transaction->dirty_bgs_lock); | ||
9624 | btrfs_wait_cache_io(root, trans, block_group, | ||
9625 | &block_group->io_ctl, path, | ||
9626 | block_group->key.objectid); | ||
9627 | btrfs_put_block_group(block_group); | ||
9628 | spin_lock(&trans->transaction->dirty_bgs_lock); | ||
9629 | } | ||
9630 | |||
9631 | if (!list_empty(&block_group->dirty_list)) { | ||
9632 | list_del_init(&block_group->dirty_list); | ||
9633 | btrfs_put_block_group(block_group); | ||
9634 | } | ||
9635 | spin_unlock(&trans->transaction->dirty_bgs_lock); | ||
9636 | mutex_unlock(&trans->transaction->cache_write_mutex); | ||
9637 | |||
9359 | if (!IS_ERR(inode)) { | 9638 | if (!IS_ERR(inode)) { |
9360 | ret = btrfs_orphan_add(trans, inode); | 9639 | ret = btrfs_orphan_add(trans, inode); |
9361 | if (ret) { | 9640 | if (ret) { |
@@ -9448,18 +9727,29 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9448 | 9727 | ||
9449 | spin_lock(&trans->transaction->dirty_bgs_lock); | 9728 | spin_lock(&trans->transaction->dirty_bgs_lock); |
9450 | if (!list_empty(&block_group->dirty_list)) { | 9729 | if (!list_empty(&block_group->dirty_list)) { |
9451 | list_del_init(&block_group->dirty_list); | 9730 | WARN_ON(1); |
9452 | btrfs_put_block_group(block_group); | 9731 | } |
9732 | if (!list_empty(&block_group->io_list)) { | ||
9733 | WARN_ON(1); | ||
9453 | } | 9734 | } |
9454 | spin_unlock(&trans->transaction->dirty_bgs_lock); | 9735 | spin_unlock(&trans->transaction->dirty_bgs_lock); |
9455 | |||
9456 | btrfs_remove_free_space_cache(block_group); | 9736 | btrfs_remove_free_space_cache(block_group); |
9457 | 9737 | ||
9458 | spin_lock(&block_group->space_info->lock); | 9738 | spin_lock(&block_group->space_info->lock); |
9459 | list_del_init(&block_group->ro_list); | 9739 | list_del_init(&block_group->ro_list); |
9740 | |||
9741 | if (btrfs_test_opt(root, ENOSPC_DEBUG)) { | ||
9742 | WARN_ON(block_group->space_info->total_bytes | ||
9743 | < block_group->key.offset); | ||
9744 | WARN_ON(block_group->space_info->bytes_readonly | ||
9745 | < block_group->key.offset); | ||
9746 | WARN_ON(block_group->space_info->disk_total | ||
9747 | < block_group->key.offset * factor); | ||
9748 | } | ||
9460 | block_group->space_info->total_bytes -= block_group->key.offset; | 9749 | block_group->space_info->total_bytes -= block_group->key.offset; |
9461 | block_group->space_info->bytes_readonly -= block_group->key.offset; | 9750 | block_group->space_info->bytes_readonly -= block_group->key.offset; |
9462 | block_group->space_info->disk_total -= block_group->key.offset * factor; | 9751 | block_group->space_info->disk_total -= block_group->key.offset * factor; |
9752 | |||
9463 | spin_unlock(&block_group->space_info->lock); | 9753 | spin_unlock(&block_group->space_info->lock); |
9464 | 9754 | ||
9465 | memcpy(&key, &block_group->key, sizeof(key)); | 9755 | memcpy(&key, &block_group->key, sizeof(key)); |
@@ -9647,8 +9937,18 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
9647 | mutex_unlock(&fs_info->unused_bg_unpin_mutex); | 9937 | mutex_unlock(&fs_info->unused_bg_unpin_mutex); |
9648 | 9938 | ||
9649 | /* Reset pinned so btrfs_put_block_group doesn't complain */ | 9939 | /* Reset pinned so btrfs_put_block_group doesn't complain */ |
9940 | spin_lock(&space_info->lock); | ||
9941 | spin_lock(&block_group->lock); | ||
9942 | |||
9943 | space_info->bytes_pinned -= block_group->pinned; | ||
9944 | space_info->bytes_readonly += block_group->pinned; | ||
9945 | percpu_counter_add(&space_info->total_bytes_pinned, | ||
9946 | -block_group->pinned); | ||
9650 | block_group->pinned = 0; | 9947 | block_group->pinned = 0; |
9651 | 9948 | ||
9949 | spin_unlock(&block_group->lock); | ||
9950 | spin_unlock(&space_info->lock); | ||
9951 | |||
9652 | /* | 9952 | /* |
9653 | * Btrfs_remove_chunk will abort the transaction if things go | 9953 | * Btrfs_remove_chunk will abort the transaction if things go |
9654 | * horribly wrong. | 9954 | * horribly wrong. |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d688cfe5d496..782f3bc4651d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -4514,8 +4514,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4514 | } | 4514 | } |
4515 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | 4515 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, |
4516 | em_len, flags); | 4516 | em_len, flags); |
4517 | if (ret) | 4517 | if (ret) { |
4518 | if (ret == 1) | ||
4519 | ret = 0; | ||
4518 | goto out_free; | 4520 | goto out_free; |
4521 | } | ||
4519 | } | 4522 | } |
4520 | out_free: | 4523 | out_free: |
4521 | free_extent_map(em); | 4524 | free_extent_map(em); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 695b0ccfb755..c668f36898d3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -97,7 +97,7 @@ struct extent_io_tree { | |||
97 | u64 dirty_bytes; | 97 | u64 dirty_bytes; |
98 | int track_uptodate; | 98 | int track_uptodate; |
99 | spinlock_t lock; | 99 | spinlock_t lock; |
100 | struct extent_io_ops *ops; | 100 | const struct extent_io_ops *ops; |
101 | }; | 101 | }; |
102 | 102 | ||
103 | struct extent_state { | 103 | struct extent_state { |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 84a2d1868271..58ece6558430 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -185,8 +185,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
185 | nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits; | 185 | nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits; |
186 | if (!dst) { | 186 | if (!dst) { |
187 | if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { | 187 | if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { |
188 | btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size, | 188 | btrfs_bio->csum_allocated = kmalloc_array(nblocks, |
189 | GFP_NOFS); | 189 | csum_size, GFP_NOFS); |
190 | if (!btrfs_bio->csum_allocated) { | 190 | if (!btrfs_bio->csum_allocated) { |
191 | btrfs_free_path(path); | 191 | btrfs_free_path(path); |
192 | return -ENOMEM; | 192 | return -ENOMEM; |
@@ -553,7 +553,7 @@ static noinline void truncate_one_csum(struct btrfs_root *root, | |||
553 | btrfs_truncate_item(root, path, new_size, 0); | 553 | btrfs_truncate_item(root, path, new_size, 0); |
554 | 554 | ||
555 | key->offset = end_byte; | 555 | key->offset = end_byte; |
556 | btrfs_set_item_key_safe(root, path, key); | 556 | btrfs_set_item_key_safe(root->fs_info, path, key); |
557 | } else { | 557 | } else { |
558 | BUG(); | 558 | BUG(); |
559 | } | 559 | } |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index faa7d390841b..467620a3b1f9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -273,11 +273,7 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
273 | defrag = rb_entry(node, struct inode_defrag, rb_node); | 273 | defrag = rb_entry(node, struct inode_defrag, rb_node); |
274 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | 274 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); |
275 | 275 | ||
276 | if (need_resched()) { | 276 | cond_resched_lock(&fs_info->defrag_inodes_lock); |
277 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
278 | cond_resched(); | ||
279 | spin_lock(&fs_info->defrag_inodes_lock); | ||
280 | } | ||
281 | 277 | ||
282 | node = rb_first(&fs_info->defrag_inodes); | 278 | node = rb_first(&fs_info->defrag_inodes); |
283 | } | 279 | } |
@@ -868,7 +864,7 @@ next_slot: | |||
868 | 864 | ||
869 | memcpy(&new_key, &key, sizeof(new_key)); | 865 | memcpy(&new_key, &key, sizeof(new_key)); |
870 | new_key.offset = end; | 866 | new_key.offset = end; |
871 | btrfs_set_item_key_safe(root, path, &new_key); | 867 | btrfs_set_item_key_safe(root->fs_info, path, &new_key); |
872 | 868 | ||
873 | extent_offset += end - key.offset; | 869 | extent_offset += end - key.offset; |
874 | btrfs_set_file_extent_offset(leaf, fi, extent_offset); | 870 | btrfs_set_file_extent_offset(leaf, fi, extent_offset); |
@@ -1126,7 +1122,7 @@ again: | |||
1126 | ino, bytenr, orig_offset, | 1122 | ino, bytenr, orig_offset, |
1127 | &other_start, &other_end)) { | 1123 | &other_start, &other_end)) { |
1128 | new_key.offset = end; | 1124 | new_key.offset = end; |
1129 | btrfs_set_item_key_safe(root, path, &new_key); | 1125 | btrfs_set_item_key_safe(root->fs_info, path, &new_key); |
1130 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1126 | fi = btrfs_item_ptr(leaf, path->slots[0], |
1131 | struct btrfs_file_extent_item); | 1127 | struct btrfs_file_extent_item); |
1132 | btrfs_set_file_extent_generation(leaf, fi, | 1128 | btrfs_set_file_extent_generation(leaf, fi, |
@@ -1160,7 +1156,7 @@ again: | |||
1160 | trans->transid); | 1156 | trans->transid); |
1161 | path->slots[0]++; | 1157 | path->slots[0]++; |
1162 | new_key.offset = start; | 1158 | new_key.offset = start; |
1163 | btrfs_set_item_key_safe(root, path, &new_key); | 1159 | btrfs_set_item_key_safe(root->fs_info, path, &new_key); |
1164 | 1160 | ||
1165 | fi = btrfs_item_ptr(leaf, path->slots[0], | 1161 | fi = btrfs_item_ptr(leaf, path->slots[0], |
1166 | struct btrfs_file_extent_item); | 1162 | struct btrfs_file_extent_item); |
@@ -1485,7 +1481,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1485 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | 1481 | PAGE_CACHE_SIZE / (sizeof(struct page *))); |
1486 | nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); | 1482 | nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); |
1487 | nrptrs = max(nrptrs, 8); | 1483 | nrptrs = max(nrptrs, 8); |
1488 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 1484 | pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL); |
1489 | if (!pages) | 1485 | if (!pages) |
1490 | return -ENOMEM; | 1486 | return -ENOMEM; |
1491 | 1487 | ||
@@ -1514,7 +1510,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1514 | } | 1510 | } |
1515 | 1511 | ||
1516 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; | 1512 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; |
1517 | ret = btrfs_check_data_free_space(inode, reserve_bytes); | 1513 | ret = btrfs_check_data_free_space(inode, reserve_bytes, write_bytes); |
1518 | if (ret == -ENOSPC && | 1514 | if (ret == -ENOSPC && |
1519 | (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | | 1515 | (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | |
1520 | BTRFS_INODE_PREALLOC))) { | 1516 | BTRFS_INODE_PREALLOC))) { |
@@ -1635,8 +1631,8 @@ again: | |||
1635 | btrfs_end_write_no_snapshoting(root); | 1631 | btrfs_end_write_no_snapshoting(root); |
1636 | 1632 | ||
1637 | if (only_release_metadata && copied > 0) { | 1633 | if (only_release_metadata && copied > 0) { |
1638 | u64 lockstart = round_down(pos, root->sectorsize); | 1634 | lockstart = round_down(pos, root->sectorsize); |
1639 | u64 lockend = lockstart + | 1635 | lockend = lockstart + |
1640 | (dirty_pages << PAGE_CACHE_SHIFT) - 1; | 1636 | (dirty_pages << PAGE_CACHE_SHIFT) - 1; |
1641 | 1637 | ||
1642 | set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | 1638 | set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
@@ -1809,7 +1805,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, | |||
1809 | * otherwise subsequent syncs to a file that's been synced in this | 1805 | * otherwise subsequent syncs to a file that's been synced in this |
1810 | * transaction will appear to have already occured. | 1806 | * transaction will appear to have already occured. |
1811 | */ | 1807 | */ |
1808 | spin_lock(&BTRFS_I(inode)->lock); | ||
1812 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | 1809 | BTRFS_I(inode)->last_sub_trans = root->log_transid; |
1810 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1813 | if (num_written > 0) { | 1811 | if (num_written > 0) { |
1814 | err = generic_write_sync(file, pos, num_written); | 1812 | err = generic_write_sync(file, pos, num_written); |
1815 | if (err < 0) | 1813 | if (err < 0) |
@@ -2162,7 +2160,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, | |||
2162 | u64 num_bytes; | 2160 | u64 num_bytes; |
2163 | 2161 | ||
2164 | key.offset = offset; | 2162 | key.offset = offset; |
2165 | btrfs_set_item_key_safe(root, path, &key); | 2163 | btrfs_set_item_key_safe(root->fs_info, path, &key); |
2166 | fi = btrfs_item_ptr(leaf, path->slots[0], | 2164 | fi = btrfs_item_ptr(leaf, path->slots[0], |
2167 | struct btrfs_file_extent_item); | 2165 | struct btrfs_file_extent_item); |
2168 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - | 2166 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - |
@@ -2545,7 +2543,6 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2545 | { | 2543 | { |
2546 | struct inode *inode = file_inode(file); | 2544 | struct inode *inode = file_inode(file); |
2547 | struct extent_state *cached_state = NULL; | 2545 | struct extent_state *cached_state = NULL; |
2548 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2549 | u64 cur_offset; | 2546 | u64 cur_offset; |
2550 | u64 last_byte; | 2547 | u64 last_byte; |
2551 | u64 alloc_start; | 2548 | u64 alloc_start; |
@@ -2570,14 +2567,9 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2570 | * Make sure we have enough space before we do the | 2567 | * Make sure we have enough space before we do the |
2571 | * allocation. | 2568 | * allocation. |
2572 | */ | 2569 | */ |
2573 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | 2570 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start, alloc_end - alloc_start); |
2574 | if (ret) | 2571 | if (ret) |
2575 | return ret; | 2572 | return ret; |
2576 | if (root->fs_info->quota_enabled) { | ||
2577 | ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start); | ||
2578 | if (ret) | ||
2579 | goto out_reserve_fail; | ||
2580 | } | ||
2581 | 2573 | ||
2582 | mutex_lock(&inode->i_mutex); | 2574 | mutex_lock(&inode->i_mutex); |
2583 | ret = inode_newsize_ok(inode, alloc_end); | 2575 | ret = inode_newsize_ok(inode, alloc_end); |
@@ -2667,23 +2659,35 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2667 | 1 << inode->i_blkbits, | 2659 | 1 << inode->i_blkbits, |
2668 | offset + len, | 2660 | offset + len, |
2669 | &alloc_hint); | 2661 | &alloc_hint); |
2670 | |||
2671 | if (ret < 0) { | ||
2672 | free_extent_map(em); | ||
2673 | break; | ||
2674 | } | ||
2675 | } else if (actual_end > inode->i_size && | 2662 | } else if (actual_end > inode->i_size && |
2676 | !(mode & FALLOC_FL_KEEP_SIZE)) { | 2663 | !(mode & FALLOC_FL_KEEP_SIZE)) { |
2664 | struct btrfs_trans_handle *trans; | ||
2665 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2666 | |||
2677 | /* | 2667 | /* |
2678 | * We didn't need to allocate any more space, but we | 2668 | * We didn't need to allocate any more space, but we |
2679 | * still extended the size of the file so we need to | 2669 | * still extended the size of the file so we need to |
2680 | * update i_size. | 2670 | * update i_size and the inode item. |
2681 | */ | 2671 | */ |
2682 | inode->i_ctime = CURRENT_TIME; | 2672 | trans = btrfs_start_transaction(root, 1); |
2683 | i_size_write(inode, actual_end); | 2673 | if (IS_ERR(trans)) { |
2684 | btrfs_ordered_update_i_size(inode, actual_end, NULL); | 2674 | ret = PTR_ERR(trans); |
2675 | } else { | ||
2676 | inode->i_ctime = CURRENT_TIME; | ||
2677 | i_size_write(inode, actual_end); | ||
2678 | btrfs_ordered_update_i_size(inode, actual_end, | ||
2679 | NULL); | ||
2680 | ret = btrfs_update_inode(trans, root, inode); | ||
2681 | if (ret) | ||
2682 | btrfs_end_transaction(trans, root); | ||
2683 | else | ||
2684 | ret = btrfs_end_transaction(trans, | ||
2685 | root); | ||
2686 | } | ||
2685 | } | 2687 | } |
2686 | free_extent_map(em); | 2688 | free_extent_map(em); |
2689 | if (ret < 0) | ||
2690 | break; | ||
2687 | 2691 | ||
2688 | cur_offset = last_byte; | 2692 | cur_offset = last_byte; |
2689 | if (cur_offset >= alloc_end) { | 2693 | if (cur_offset >= alloc_end) { |
@@ -2695,9 +2699,6 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2695 | &cached_state, GFP_NOFS); | 2699 | &cached_state, GFP_NOFS); |
2696 | out: | 2700 | out: |
2697 | mutex_unlock(&inode->i_mutex); | 2701 | mutex_unlock(&inode->i_mutex); |
2698 | if (root->fs_info->quota_enabled) | ||
2699 | btrfs_qgroup_free(root, alloc_end - alloc_start); | ||
2700 | out_reserve_fail: | ||
2701 | /* Let go of our reservation. */ | 2702 | /* Let go of our reservation. */ |
2702 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | 2703 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); |
2703 | return ret; | 2704 | return ret; |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a71978578fa7..253cb74b0e27 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -85,7 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, | |||
85 | } | 85 | } |
86 | 86 | ||
87 | mapping_set_gfp_mask(inode->i_mapping, | 87 | mapping_set_gfp_mask(inode->i_mapping, |
88 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | 88 | mapping_gfp_mask(inode->i_mapping) & |
89 | ~(GFP_NOFS & ~__GFP_HIGHMEM)); | ||
89 | 90 | ||
90 | return inode; | 91 | return inode; |
91 | } | 92 | } |
@@ -170,13 +171,13 @@ static int __create_free_space_inode(struct btrfs_root *root, | |||
170 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 171 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
171 | key.offset = offset; | 172 | key.offset = offset; |
172 | key.type = 0; | 173 | key.type = 0; |
173 | |||
174 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 174 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
175 | sizeof(struct btrfs_free_space_header)); | 175 | sizeof(struct btrfs_free_space_header)); |
176 | if (ret < 0) { | 176 | if (ret < 0) { |
177 | btrfs_release_path(path); | 177 | btrfs_release_path(path); |
178 | return ret; | 178 | return ret; |
179 | } | 179 | } |
180 | |||
180 | leaf = path->nodes[0]; | 181 | leaf = path->nodes[0]; |
181 | header = btrfs_item_ptr(leaf, path->slots[0], | 182 | header = btrfs_item_ptr(leaf, path->slots[0], |
182 | struct btrfs_free_space_header); | 183 | struct btrfs_free_space_header); |
@@ -225,9 +226,37 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, | |||
225 | 226 | ||
226 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, | 227 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, |
227 | struct btrfs_trans_handle *trans, | 228 | struct btrfs_trans_handle *trans, |
229 | struct btrfs_block_group_cache *block_group, | ||
228 | struct inode *inode) | 230 | struct inode *inode) |
229 | { | 231 | { |
230 | int ret = 0; | 232 | int ret = 0; |
233 | struct btrfs_path *path = btrfs_alloc_path(); | ||
234 | |||
235 | if (!path) { | ||
236 | ret = -ENOMEM; | ||
237 | goto fail; | ||
238 | } | ||
239 | |||
240 | if (block_group) { | ||
241 | mutex_lock(&trans->transaction->cache_write_mutex); | ||
242 | if (!list_empty(&block_group->io_list)) { | ||
243 | list_del_init(&block_group->io_list); | ||
244 | |||
245 | btrfs_wait_cache_io(root, trans, block_group, | ||
246 | &block_group->io_ctl, path, | ||
247 | block_group->key.objectid); | ||
248 | btrfs_put_block_group(block_group); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * now that we've truncated the cache away, its no longer | ||
253 | * setup or written | ||
254 | */ | ||
255 | spin_lock(&block_group->lock); | ||
256 | block_group->disk_cache_state = BTRFS_DC_CLEAR; | ||
257 | spin_unlock(&block_group->lock); | ||
258 | } | ||
259 | btrfs_free_path(path); | ||
231 | 260 | ||
232 | btrfs_i_size_write(inode, 0); | 261 | btrfs_i_size_write(inode, 0); |
233 | truncate_pagecache(inode, 0); | 262 | truncate_pagecache(inode, 0); |
@@ -235,15 +264,23 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, | |||
235 | /* | 264 | /* |
236 | * We don't need an orphan item because truncating the free space cache | 265 | * We don't need an orphan item because truncating the free space cache |
237 | * will never be split across transactions. | 266 | * will never be split across transactions. |
267 | * We don't need to check for -EAGAIN because we're a free space | ||
268 | * cache inode | ||
238 | */ | 269 | */ |
239 | ret = btrfs_truncate_inode_items(trans, root, inode, | 270 | ret = btrfs_truncate_inode_items(trans, root, inode, |
240 | 0, BTRFS_EXTENT_DATA_KEY); | 271 | 0, BTRFS_EXTENT_DATA_KEY); |
241 | if (ret) { | 272 | if (ret) { |
273 | mutex_unlock(&trans->transaction->cache_write_mutex); | ||
242 | btrfs_abort_transaction(trans, root, ret); | 274 | btrfs_abort_transaction(trans, root, ret); |
243 | return ret; | 275 | return ret; |
244 | } | 276 | } |
245 | 277 | ||
246 | ret = btrfs_update_inode(trans, root, inode); | 278 | ret = btrfs_update_inode(trans, root, inode); |
279 | |||
280 | if (block_group) | ||
281 | mutex_unlock(&trans->transaction->cache_write_mutex); | ||
282 | |||
283 | fail: | ||
247 | if (ret) | 284 | if (ret) |
248 | btrfs_abort_transaction(trans, root, ret); | 285 | btrfs_abort_transaction(trans, root, ret); |
249 | 286 | ||
@@ -269,18 +306,7 @@ static int readahead_cache(struct inode *inode) | |||
269 | return 0; | 306 | return 0; |
270 | } | 307 | } |
271 | 308 | ||
272 | struct io_ctl { | 309 | static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, |
273 | void *cur, *orig; | ||
274 | struct page *page; | ||
275 | struct page **pages; | ||
276 | struct btrfs_root *root; | ||
277 | unsigned long size; | ||
278 | int index; | ||
279 | int num_pages; | ||
280 | unsigned check_crcs:1; | ||
281 | }; | ||
282 | |||
283 | static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, | ||
284 | struct btrfs_root *root, int write) | 310 | struct btrfs_root *root, int write) |
285 | { | 311 | { |
286 | int num_pages; | 312 | int num_pages; |
@@ -296,45 +322,46 @@ static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, | |||
296 | (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) | 322 | (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) |
297 | return -ENOSPC; | 323 | return -ENOSPC; |
298 | 324 | ||
299 | memset(io_ctl, 0, sizeof(struct io_ctl)); | 325 | memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); |
300 | 326 | ||
301 | io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | 327 | io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); |
302 | if (!io_ctl->pages) | 328 | if (!io_ctl->pages) |
303 | return -ENOMEM; | 329 | return -ENOMEM; |
304 | 330 | ||
305 | io_ctl->num_pages = num_pages; | 331 | io_ctl->num_pages = num_pages; |
306 | io_ctl->root = root; | 332 | io_ctl->root = root; |
307 | io_ctl->check_crcs = check_crcs; | 333 | io_ctl->check_crcs = check_crcs; |
334 | io_ctl->inode = inode; | ||
308 | 335 | ||
309 | return 0; | 336 | return 0; |
310 | } | 337 | } |
311 | 338 | ||
312 | static void io_ctl_free(struct io_ctl *io_ctl) | 339 | static void io_ctl_free(struct btrfs_io_ctl *io_ctl) |
313 | { | 340 | { |
314 | kfree(io_ctl->pages); | 341 | kfree(io_ctl->pages); |
342 | io_ctl->pages = NULL; | ||
315 | } | 343 | } |
316 | 344 | ||
317 | static void io_ctl_unmap_page(struct io_ctl *io_ctl) | 345 | static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) |
318 | { | 346 | { |
319 | if (io_ctl->cur) { | 347 | if (io_ctl->cur) { |
320 | kunmap(io_ctl->page); | ||
321 | io_ctl->cur = NULL; | 348 | io_ctl->cur = NULL; |
322 | io_ctl->orig = NULL; | 349 | io_ctl->orig = NULL; |
323 | } | 350 | } |
324 | } | 351 | } |
325 | 352 | ||
326 | static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) | 353 | static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) |
327 | { | 354 | { |
328 | ASSERT(io_ctl->index < io_ctl->num_pages); | 355 | ASSERT(io_ctl->index < io_ctl->num_pages); |
329 | io_ctl->page = io_ctl->pages[io_ctl->index++]; | 356 | io_ctl->page = io_ctl->pages[io_ctl->index++]; |
330 | io_ctl->cur = kmap(io_ctl->page); | 357 | io_ctl->cur = page_address(io_ctl->page); |
331 | io_ctl->orig = io_ctl->cur; | 358 | io_ctl->orig = io_ctl->cur; |
332 | io_ctl->size = PAGE_CACHE_SIZE; | 359 | io_ctl->size = PAGE_CACHE_SIZE; |
333 | if (clear) | 360 | if (clear) |
334 | memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); | 361 | memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); |
335 | } | 362 | } |
336 | 363 | ||
337 | static void io_ctl_drop_pages(struct io_ctl *io_ctl) | 364 | static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) |
338 | { | 365 | { |
339 | int i; | 366 | int i; |
340 | 367 | ||
@@ -349,7 +376,7 @@ static void io_ctl_drop_pages(struct io_ctl *io_ctl) | |||
349 | } | 376 | } |
350 | } | 377 | } |
351 | 378 | ||
352 | static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, | 379 | static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, |
353 | int uptodate) | 380 | int uptodate) |
354 | { | 381 | { |
355 | struct page *page; | 382 | struct page *page; |
@@ -383,7 +410,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, | |||
383 | return 0; | 410 | return 0; |
384 | } | 411 | } |
385 | 412 | ||
386 | static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) | 413 | static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) |
387 | { | 414 | { |
388 | __le64 *val; | 415 | __le64 *val; |
389 | 416 | ||
@@ -406,7 +433,7 @@ static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) | |||
406 | io_ctl->cur += sizeof(u64); | 433 | io_ctl->cur += sizeof(u64); |
407 | } | 434 | } |
408 | 435 | ||
409 | static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) | 436 | static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) |
410 | { | 437 | { |
411 | __le64 *gen; | 438 | __le64 *gen; |
412 | 439 | ||
@@ -435,7 +462,7 @@ static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) | |||
435 | return 0; | 462 | return 0; |
436 | } | 463 | } |
437 | 464 | ||
438 | static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) | 465 | static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) |
439 | { | 466 | { |
440 | u32 *tmp; | 467 | u32 *tmp; |
441 | u32 crc = ~(u32)0; | 468 | u32 crc = ~(u32)0; |
@@ -453,13 +480,12 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) | |||
453 | PAGE_CACHE_SIZE - offset); | 480 | PAGE_CACHE_SIZE - offset); |
454 | btrfs_csum_final(crc, (char *)&crc); | 481 | btrfs_csum_final(crc, (char *)&crc); |
455 | io_ctl_unmap_page(io_ctl); | 482 | io_ctl_unmap_page(io_ctl); |
456 | tmp = kmap(io_ctl->pages[0]); | 483 | tmp = page_address(io_ctl->pages[0]); |
457 | tmp += index; | 484 | tmp += index; |
458 | *tmp = crc; | 485 | *tmp = crc; |
459 | kunmap(io_ctl->pages[0]); | ||
460 | } | 486 | } |
461 | 487 | ||
462 | static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) | 488 | static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) |
463 | { | 489 | { |
464 | u32 *tmp, val; | 490 | u32 *tmp, val; |
465 | u32 crc = ~(u32)0; | 491 | u32 crc = ~(u32)0; |
@@ -473,10 +499,9 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) | |||
473 | if (index == 0) | 499 | if (index == 0) |
474 | offset = sizeof(u32) * io_ctl->num_pages; | 500 | offset = sizeof(u32) * io_ctl->num_pages; |
475 | 501 | ||
476 | tmp = kmap(io_ctl->pages[0]); | 502 | tmp = page_address(io_ctl->pages[0]); |
477 | tmp += index; | 503 | tmp += index; |
478 | val = *tmp; | 504 | val = *tmp; |
479 | kunmap(io_ctl->pages[0]); | ||
480 | 505 | ||
481 | io_ctl_map_page(io_ctl, 0); | 506 | io_ctl_map_page(io_ctl, 0); |
482 | crc = btrfs_csum_data(io_ctl->orig + offset, crc, | 507 | crc = btrfs_csum_data(io_ctl->orig + offset, crc, |
@@ -492,7 +517,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) | |||
492 | return 0; | 517 | return 0; |
493 | } | 518 | } |
494 | 519 | ||
495 | static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, | 520 | static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, |
496 | void *bitmap) | 521 | void *bitmap) |
497 | { | 522 | { |
498 | struct btrfs_free_space_entry *entry; | 523 | struct btrfs_free_space_entry *entry; |
@@ -522,7 +547,7 @@ static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, | |||
522 | return 0; | 547 | return 0; |
523 | } | 548 | } |
524 | 549 | ||
525 | static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) | 550 | static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) |
526 | { | 551 | { |
527 | if (!io_ctl->cur) | 552 | if (!io_ctl->cur) |
528 | return -ENOSPC; | 553 | return -ENOSPC; |
@@ -545,7 +570,7 @@ static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) | |||
545 | return 0; | 570 | return 0; |
546 | } | 571 | } |
547 | 572 | ||
548 | static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) | 573 | static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) |
549 | { | 574 | { |
550 | /* | 575 | /* |
551 | * If we're not on the boundary we know we've modified the page and we | 576 | * If we're not on the boundary we know we've modified the page and we |
@@ -562,7 +587,7 @@ static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) | |||
562 | } | 587 | } |
563 | } | 588 | } |
564 | 589 | ||
565 | static int io_ctl_read_entry(struct io_ctl *io_ctl, | 590 | static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, |
566 | struct btrfs_free_space *entry, u8 *type) | 591 | struct btrfs_free_space *entry, u8 *type) |
567 | { | 592 | { |
568 | struct btrfs_free_space_entry *e; | 593 | struct btrfs_free_space_entry *e; |
@@ -589,7 +614,7 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl, | |||
589 | return 0; | 614 | return 0; |
590 | } | 615 | } |
591 | 616 | ||
592 | static int io_ctl_read_bitmap(struct io_ctl *io_ctl, | 617 | static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, |
593 | struct btrfs_free_space *entry) | 618 | struct btrfs_free_space *entry) |
594 | { | 619 | { |
595 | int ret; | 620 | int ret; |
@@ -648,7 +673,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
648 | { | 673 | { |
649 | struct btrfs_free_space_header *header; | 674 | struct btrfs_free_space_header *header; |
650 | struct extent_buffer *leaf; | 675 | struct extent_buffer *leaf; |
651 | struct io_ctl io_ctl; | 676 | struct btrfs_io_ctl io_ctl; |
652 | struct btrfs_key key; | 677 | struct btrfs_key key; |
653 | struct btrfs_free_space *e, *n; | 678 | struct btrfs_free_space *e, *n; |
654 | LIST_HEAD(bitmaps); | 679 | LIST_HEAD(bitmaps); |
@@ -877,7 +902,7 @@ out: | |||
877 | } | 902 | } |
878 | 903 | ||
879 | static noinline_for_stack | 904 | static noinline_for_stack |
880 | int write_cache_extent_entries(struct io_ctl *io_ctl, | 905 | int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, |
881 | struct btrfs_free_space_ctl *ctl, | 906 | struct btrfs_free_space_ctl *ctl, |
882 | struct btrfs_block_group_cache *block_group, | 907 | struct btrfs_block_group_cache *block_group, |
883 | int *entries, int *bitmaps, | 908 | int *entries, int *bitmaps, |
@@ -885,6 +910,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl, | |||
885 | { | 910 | { |
886 | int ret; | 911 | int ret; |
887 | struct btrfs_free_cluster *cluster = NULL; | 912 | struct btrfs_free_cluster *cluster = NULL; |
913 | struct btrfs_free_cluster *cluster_locked = NULL; | ||
888 | struct rb_node *node = rb_first(&ctl->free_space_offset); | 914 | struct rb_node *node = rb_first(&ctl->free_space_offset); |
889 | struct btrfs_trim_range *trim_entry; | 915 | struct btrfs_trim_range *trim_entry; |
890 | 916 | ||
@@ -896,6 +922,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl, | |||
896 | } | 922 | } |
897 | 923 | ||
898 | if (!node && cluster) { | 924 | if (!node && cluster) { |
925 | cluster_locked = cluster; | ||
926 | spin_lock(&cluster_locked->lock); | ||
899 | node = rb_first(&cluster->root); | 927 | node = rb_first(&cluster->root); |
900 | cluster = NULL; | 928 | cluster = NULL; |
901 | } | 929 | } |
@@ -919,9 +947,15 @@ int write_cache_extent_entries(struct io_ctl *io_ctl, | |||
919 | node = rb_next(node); | 947 | node = rb_next(node); |
920 | if (!node && cluster) { | 948 | if (!node && cluster) { |
921 | node = rb_first(&cluster->root); | 949 | node = rb_first(&cluster->root); |
950 | cluster_locked = cluster; | ||
951 | spin_lock(&cluster_locked->lock); | ||
922 | cluster = NULL; | 952 | cluster = NULL; |
923 | } | 953 | } |
924 | } | 954 | } |
955 | if (cluster_locked) { | ||
956 | spin_unlock(&cluster_locked->lock); | ||
957 | cluster_locked = NULL; | ||
958 | } | ||
925 | 959 | ||
926 | /* | 960 | /* |
927 | * Make sure we don't miss any range that was removed from our rbtree | 961 | * Make sure we don't miss any range that was removed from our rbtree |
@@ -939,6 +973,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl, | |||
939 | 973 | ||
940 | return 0; | 974 | return 0; |
941 | fail: | 975 | fail: |
976 | if (cluster_locked) | ||
977 | spin_unlock(&cluster_locked->lock); | ||
942 | return -ENOSPC; | 978 | return -ENOSPC; |
943 | } | 979 | } |
944 | 980 | ||
@@ -1000,7 +1036,7 @@ fail: | |||
1000 | static noinline_for_stack int | 1036 | static noinline_for_stack int |
1001 | write_pinned_extent_entries(struct btrfs_root *root, | 1037 | write_pinned_extent_entries(struct btrfs_root *root, |
1002 | struct btrfs_block_group_cache *block_group, | 1038 | struct btrfs_block_group_cache *block_group, |
1003 | struct io_ctl *io_ctl, | 1039 | struct btrfs_io_ctl *io_ctl, |
1004 | int *entries) | 1040 | int *entries) |
1005 | { | 1041 | { |
1006 | u64 start, extent_start, extent_end, len; | 1042 | u64 start, extent_start, extent_end, len; |
@@ -1050,7 +1086,7 @@ write_pinned_extent_entries(struct btrfs_root *root, | |||
1050 | } | 1086 | } |
1051 | 1087 | ||
1052 | static noinline_for_stack int | 1088 | static noinline_for_stack int |
1053 | write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list) | 1089 | write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) |
1054 | { | 1090 | { |
1055 | struct list_head *pos, *n; | 1091 | struct list_head *pos, *n; |
1056 | int ret; | 1092 | int ret; |
@@ -1084,7 +1120,7 @@ static int flush_dirty_cache(struct inode *inode) | |||
1084 | 1120 | ||
1085 | static void noinline_for_stack | 1121 | static void noinline_for_stack |
1086 | cleanup_write_cache_enospc(struct inode *inode, | 1122 | cleanup_write_cache_enospc(struct inode *inode, |
1087 | struct io_ctl *io_ctl, | 1123 | struct btrfs_io_ctl *io_ctl, |
1088 | struct extent_state **cached_state, | 1124 | struct extent_state **cached_state, |
1089 | struct list_head *bitmap_list) | 1125 | struct list_head *bitmap_list) |
1090 | { | 1126 | { |
@@ -1101,6 +1137,70 @@ cleanup_write_cache_enospc(struct inode *inode, | |||
1101 | GFP_NOFS); | 1137 | GFP_NOFS); |
1102 | } | 1138 | } |
1103 | 1139 | ||
1140 | int btrfs_wait_cache_io(struct btrfs_root *root, | ||
1141 | struct btrfs_trans_handle *trans, | ||
1142 | struct btrfs_block_group_cache *block_group, | ||
1143 | struct btrfs_io_ctl *io_ctl, | ||
1144 | struct btrfs_path *path, u64 offset) | ||
1145 | { | ||
1146 | int ret; | ||
1147 | struct inode *inode = io_ctl->inode; | ||
1148 | |||
1149 | if (!inode) | ||
1150 | return 0; | ||
1151 | |||
1152 | root = root->fs_info->tree_root; | ||
1153 | |||
1154 | /* Flush the dirty pages in the cache file. */ | ||
1155 | ret = flush_dirty_cache(inode); | ||
1156 | if (ret) | ||
1157 | goto out; | ||
1158 | |||
1159 | /* Update the cache item to tell everyone this cache file is valid. */ | ||
1160 | ret = update_cache_item(trans, root, inode, path, offset, | ||
1161 | io_ctl->entries, io_ctl->bitmaps); | ||
1162 | out: | ||
1163 | io_ctl_free(io_ctl); | ||
1164 | if (ret) { | ||
1165 | invalidate_inode_pages2(inode->i_mapping); | ||
1166 | BTRFS_I(inode)->generation = 0; | ||
1167 | if (block_group) { | ||
1168 | #ifdef DEBUG | ||
1169 | btrfs_err(root->fs_info, | ||
1170 | "failed to write free space cache for block group %llu", | ||
1171 | block_group->key.objectid); | ||
1172 | #endif | ||
1173 | } | ||
1174 | } | ||
1175 | btrfs_update_inode(trans, root, inode); | ||
1176 | |||
1177 | if (block_group) { | ||
1178 | /* the dirty list is protected by the dirty_bgs_lock */ | ||
1179 | spin_lock(&trans->transaction->dirty_bgs_lock); | ||
1180 | |||
1181 | /* the disk_cache_state is protected by the block group lock */ | ||
1182 | spin_lock(&block_group->lock); | ||
1183 | |||
1184 | /* | ||
1185 | * only mark this as written if we didn't get put back on | ||
1186 | * the dirty list while waiting for IO. Otherwise our | ||
1187 | * cache state won't be right, and we won't get written again | ||
1188 | */ | ||
1189 | if (!ret && list_empty(&block_group->dirty_list)) | ||
1190 | block_group->disk_cache_state = BTRFS_DC_WRITTEN; | ||
1191 | else if (ret) | ||
1192 | block_group->disk_cache_state = BTRFS_DC_ERROR; | ||
1193 | |||
1194 | spin_unlock(&block_group->lock); | ||
1195 | spin_unlock(&trans->transaction->dirty_bgs_lock); | ||
1196 | io_ctl->inode = NULL; | ||
1197 | iput(inode); | ||
1198 | } | ||
1199 | |||
1200 | return ret; | ||
1201 | |||
1202 | } | ||
1203 | |||
1104 | /** | 1204 | /** |
1105 | * __btrfs_write_out_cache - write out cached info to an inode | 1205 | * __btrfs_write_out_cache - write out cached info to an inode |
1106 | * @root - the root the inode belongs to | 1206 | * @root - the root the inode belongs to |
@@ -1117,20 +1217,22 @@ cleanup_write_cache_enospc(struct inode *inode, | |||
1117 | static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | 1217 | static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, |
1118 | struct btrfs_free_space_ctl *ctl, | 1218 | struct btrfs_free_space_ctl *ctl, |
1119 | struct btrfs_block_group_cache *block_group, | 1219 | struct btrfs_block_group_cache *block_group, |
1220 | struct btrfs_io_ctl *io_ctl, | ||
1120 | struct btrfs_trans_handle *trans, | 1221 | struct btrfs_trans_handle *trans, |
1121 | struct btrfs_path *path, u64 offset) | 1222 | struct btrfs_path *path, u64 offset) |
1122 | { | 1223 | { |
1123 | struct extent_state *cached_state = NULL; | 1224 | struct extent_state *cached_state = NULL; |
1124 | struct io_ctl io_ctl; | ||
1125 | LIST_HEAD(bitmap_list); | 1225 | LIST_HEAD(bitmap_list); |
1126 | int entries = 0; | 1226 | int entries = 0; |
1127 | int bitmaps = 0; | 1227 | int bitmaps = 0; |
1128 | int ret; | 1228 | int ret; |
1229 | int must_iput = 0; | ||
1129 | 1230 | ||
1130 | if (!i_size_read(inode)) | 1231 | if (!i_size_read(inode)) |
1131 | return -1; | 1232 | return -1; |
1132 | 1233 | ||
1133 | ret = io_ctl_init(&io_ctl, inode, root, 1); | 1234 | WARN_ON(io_ctl->pages); |
1235 | ret = io_ctl_init(io_ctl, inode, root, 1); | ||
1134 | if (ret) | 1236 | if (ret) |
1135 | return -1; | 1237 | return -1; |
1136 | 1238 | ||
@@ -1143,24 +1245,27 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
1143 | up_write(&block_group->data_rwsem); | 1245 | up_write(&block_group->data_rwsem); |
1144 | BTRFS_I(inode)->generation = 0; | 1246 | BTRFS_I(inode)->generation = 0; |
1145 | ret = 0; | 1247 | ret = 0; |
1248 | must_iput = 1; | ||
1146 | goto out; | 1249 | goto out; |
1147 | } | 1250 | } |
1148 | spin_unlock(&block_group->lock); | 1251 | spin_unlock(&block_group->lock); |
1149 | } | 1252 | } |
1150 | 1253 | ||
1151 | /* Lock all pages first so we can lock the extent safely. */ | 1254 | /* Lock all pages first so we can lock the extent safely. */ |
1152 | io_ctl_prepare_pages(&io_ctl, inode, 0); | 1255 | io_ctl_prepare_pages(io_ctl, inode, 0); |
1153 | 1256 | ||
1154 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | 1257 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, |
1155 | 0, &cached_state); | 1258 | 0, &cached_state); |
1156 | 1259 | ||
1157 | io_ctl_set_generation(&io_ctl, trans->transid); | 1260 | io_ctl_set_generation(io_ctl, trans->transid); |
1158 | 1261 | ||
1159 | mutex_lock(&ctl->cache_writeout_mutex); | 1262 | mutex_lock(&ctl->cache_writeout_mutex); |
1160 | /* Write out the extent entries in the free space cache */ | 1263 | /* Write out the extent entries in the free space cache */ |
1161 | ret = write_cache_extent_entries(&io_ctl, ctl, | 1264 | spin_lock(&ctl->tree_lock); |
1265 | ret = write_cache_extent_entries(io_ctl, ctl, | ||
1162 | block_group, &entries, &bitmaps, | 1266 | block_group, &entries, &bitmaps, |
1163 | &bitmap_list); | 1267 | &bitmap_list); |
1268 | spin_unlock(&ctl->tree_lock); | ||
1164 | if (ret) { | 1269 | if (ret) { |
1165 | mutex_unlock(&ctl->cache_writeout_mutex); | 1270 | mutex_unlock(&ctl->cache_writeout_mutex); |
1166 | goto out_nospc; | 1271 | goto out_nospc; |
@@ -1170,8 +1275,11 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
1170 | * Some spaces that are freed in the current transaction are pinned, | 1275 | * Some spaces that are freed in the current transaction are pinned, |
1171 | * they will be added into free space cache after the transaction is | 1276 | * they will be added into free space cache after the transaction is |
1172 | * committed, we shouldn't lose them. | 1277 | * committed, we shouldn't lose them. |
1278 | * | ||
1279 | * If this changes while we are working we'll get added back to | ||
1280 | * the dirty list and redo it. No locking needed | ||
1173 | */ | 1281 | */ |
1174 | ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); | 1282 | ret = write_pinned_extent_entries(root, block_group, io_ctl, &entries); |
1175 | if (ret) { | 1283 | if (ret) { |
1176 | mutex_unlock(&ctl->cache_writeout_mutex); | 1284 | mutex_unlock(&ctl->cache_writeout_mutex); |
1177 | goto out_nospc; | 1285 | goto out_nospc; |
@@ -1182,16 +1290,18 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
1182 | * locked while doing it because a concurrent trim can be manipulating | 1290 | * locked while doing it because a concurrent trim can be manipulating |
1183 | * or freeing the bitmap. | 1291 | * or freeing the bitmap. |
1184 | */ | 1292 | */ |
1185 | ret = write_bitmap_entries(&io_ctl, &bitmap_list); | 1293 | spin_lock(&ctl->tree_lock); |
1294 | ret = write_bitmap_entries(io_ctl, &bitmap_list); | ||
1295 | spin_unlock(&ctl->tree_lock); | ||
1186 | mutex_unlock(&ctl->cache_writeout_mutex); | 1296 | mutex_unlock(&ctl->cache_writeout_mutex); |
1187 | if (ret) | 1297 | if (ret) |
1188 | goto out_nospc; | 1298 | goto out_nospc; |
1189 | 1299 | ||
1190 | /* Zero out the rest of the pages just to make sure */ | 1300 | /* Zero out the rest of the pages just to make sure */ |
1191 | io_ctl_zero_remaining_pages(&io_ctl); | 1301 | io_ctl_zero_remaining_pages(io_ctl); |
1192 | 1302 | ||
1193 | /* Everything is written out, now we dirty the pages in the file. */ | 1303 | /* Everything is written out, now we dirty the pages in the file. */ |
1194 | ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, | 1304 | ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages, |
1195 | 0, i_size_read(inode), &cached_state); | 1305 | 0, i_size_read(inode), &cached_state); |
1196 | if (ret) | 1306 | if (ret) |
1197 | goto out_nospc; | 1307 | goto out_nospc; |
@@ -1202,30 +1312,39 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
1202 | * Release the pages and unlock the extent, we will flush | 1312 | * Release the pages and unlock the extent, we will flush |
1203 | * them out later | 1313 | * them out later |
1204 | */ | 1314 | */ |
1205 | io_ctl_drop_pages(&io_ctl); | 1315 | io_ctl_drop_pages(io_ctl); |
1206 | 1316 | ||
1207 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | 1317 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, |
1208 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | 1318 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); |
1209 | 1319 | ||
1210 | /* Flush the dirty pages in the cache file. */ | 1320 | /* |
1211 | ret = flush_dirty_cache(inode); | 1321 | * at this point the pages are under IO and we're happy, |
1322 | * The caller is responsible for waiting on them and updating the | ||
1323 | * the cache and the inode | ||
1324 | */ | ||
1325 | io_ctl->entries = entries; | ||
1326 | io_ctl->bitmaps = bitmaps; | ||
1327 | |||
1328 | ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); | ||
1212 | if (ret) | 1329 | if (ret) |
1213 | goto out; | 1330 | goto out; |
1214 | 1331 | ||
1215 | /* Update the cache item to tell everyone this cache file is valid. */ | 1332 | return 0; |
1216 | ret = update_cache_item(trans, root, inode, path, offset, | 1333 | |
1217 | entries, bitmaps); | ||
1218 | out: | 1334 | out: |
1219 | io_ctl_free(&io_ctl); | 1335 | io_ctl->inode = NULL; |
1336 | io_ctl_free(io_ctl); | ||
1220 | if (ret) { | 1337 | if (ret) { |
1221 | invalidate_inode_pages2(inode->i_mapping); | 1338 | invalidate_inode_pages2(inode->i_mapping); |
1222 | BTRFS_I(inode)->generation = 0; | 1339 | BTRFS_I(inode)->generation = 0; |
1223 | } | 1340 | } |
1224 | btrfs_update_inode(trans, root, inode); | 1341 | btrfs_update_inode(trans, root, inode); |
1342 | if (must_iput) | ||
1343 | iput(inode); | ||
1225 | return ret; | 1344 | return ret; |
1226 | 1345 | ||
1227 | out_nospc: | 1346 | out_nospc: |
1228 | cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); | 1347 | cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list); |
1229 | 1348 | ||
1230 | if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) | 1349 | if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) |
1231 | up_write(&block_group->data_rwsem); | 1350 | up_write(&block_group->data_rwsem); |
@@ -1241,7 +1360,6 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
1241 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 1360 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
1242 | struct inode *inode; | 1361 | struct inode *inode; |
1243 | int ret = 0; | 1362 | int ret = 0; |
1244 | enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN; | ||
1245 | 1363 | ||
1246 | root = root->fs_info->tree_root; | 1364 | root = root->fs_info->tree_root; |
1247 | 1365 | ||
@@ -1250,34 +1368,34 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
1250 | spin_unlock(&block_group->lock); | 1368 | spin_unlock(&block_group->lock); |
1251 | return 0; | 1369 | return 0; |
1252 | } | 1370 | } |
1253 | |||
1254 | if (block_group->delalloc_bytes) { | ||
1255 | block_group->disk_cache_state = BTRFS_DC_WRITTEN; | ||
1256 | spin_unlock(&block_group->lock); | ||
1257 | return 0; | ||
1258 | } | ||
1259 | spin_unlock(&block_group->lock); | 1371 | spin_unlock(&block_group->lock); |
1260 | 1372 | ||
1261 | inode = lookup_free_space_inode(root, block_group, path); | 1373 | inode = lookup_free_space_inode(root, block_group, path); |
1262 | if (IS_ERR(inode)) | 1374 | if (IS_ERR(inode)) |
1263 | return 0; | 1375 | return 0; |
1264 | 1376 | ||
1265 | ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, | 1377 | ret = __btrfs_write_out_cache(root, inode, ctl, block_group, |
1378 | &block_group->io_ctl, trans, | ||
1266 | path, block_group->key.objectid); | 1379 | path, block_group->key.objectid); |
1267 | if (ret) { | 1380 | if (ret) { |
1268 | dcs = BTRFS_DC_ERROR; | ||
1269 | ret = 0; | ||
1270 | #ifdef DEBUG | 1381 | #ifdef DEBUG |
1271 | btrfs_err(root->fs_info, | 1382 | btrfs_err(root->fs_info, |
1272 | "failed to write free space cache for block group %llu", | 1383 | "failed to write free space cache for block group %llu", |
1273 | block_group->key.objectid); | 1384 | block_group->key.objectid); |
1274 | #endif | 1385 | #endif |
1386 | spin_lock(&block_group->lock); | ||
1387 | block_group->disk_cache_state = BTRFS_DC_ERROR; | ||
1388 | spin_unlock(&block_group->lock); | ||
1389 | |||
1390 | block_group->io_ctl.inode = NULL; | ||
1391 | iput(inode); | ||
1275 | } | 1392 | } |
1276 | 1393 | ||
1277 | spin_lock(&block_group->lock); | 1394 | /* |
1278 | block_group->disk_cache_state = dcs; | 1395 | * if ret == 0 the caller is expected to call btrfs_wait_cache_io |
1279 | spin_unlock(&block_group->lock); | 1396 | * to wait for IO and put the inode |
1280 | iput(inode); | 1397 | */ |
1398 | |||
1281 | return ret; | 1399 | return ret; |
1282 | } | 1400 | } |
1283 | 1401 | ||
@@ -1298,11 +1416,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, | |||
1298 | u64 offset) | 1416 | u64 offset) |
1299 | { | 1417 | { |
1300 | u64 bitmap_start; | 1418 | u64 bitmap_start; |
1301 | u64 bytes_per_bitmap; | 1419 | u32 bytes_per_bitmap; |
1302 | 1420 | ||
1303 | bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; | 1421 | bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; |
1304 | bitmap_start = offset - ctl->start; | 1422 | bitmap_start = offset - ctl->start; |
1305 | bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); | 1423 | bitmap_start = div_u64(bitmap_start, bytes_per_bitmap); |
1306 | bitmap_start *= bytes_per_bitmap; | 1424 | bitmap_start *= bytes_per_bitmap; |
1307 | bitmap_start += ctl->start; | 1425 | bitmap_start += ctl->start; |
1308 | 1426 | ||
@@ -1521,10 +1639,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | |||
1521 | u64 bitmap_bytes; | 1639 | u64 bitmap_bytes; |
1522 | u64 extent_bytes; | 1640 | u64 extent_bytes; |
1523 | u64 size = block_group->key.offset; | 1641 | u64 size = block_group->key.offset; |
1524 | u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; | 1642 | u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; |
1525 | int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); | 1643 | u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg); |
1526 | 1644 | ||
1527 | max_bitmaps = max(max_bitmaps, 1); | 1645 | max_bitmaps = max_t(u32, max_bitmaps, 1); |
1528 | 1646 | ||
1529 | ASSERT(ctl->total_bitmaps <= max_bitmaps); | 1647 | ASSERT(ctl->total_bitmaps <= max_bitmaps); |
1530 | 1648 | ||
@@ -1537,7 +1655,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | |||
1537 | max_bytes = MAX_CACHE_BYTES_PER_GIG; | 1655 | max_bytes = MAX_CACHE_BYTES_PER_GIG; |
1538 | else | 1656 | else |
1539 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | 1657 | max_bytes = MAX_CACHE_BYTES_PER_GIG * |
1540 | div64_u64(size, 1024 * 1024 * 1024); | 1658 | div_u64(size, 1024 * 1024 * 1024); |
1541 | 1659 | ||
1542 | /* | 1660 | /* |
1543 | * we want to account for 1 more bitmap than what we have so we can make | 1661 | * we want to account for 1 more bitmap than what we have so we can make |
@@ -1552,14 +1670,14 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | |||
1552 | } | 1670 | } |
1553 | 1671 | ||
1554 | /* | 1672 | /* |
1555 | * we want the extent entry threshold to always be at most 1/2 the maxw | 1673 | * we want the extent entry threshold to always be at most 1/2 the max |
1556 | * bytes we can have, or whatever is less than that. | 1674 | * bytes we can have, or whatever is less than that. |
1557 | */ | 1675 | */ |
1558 | extent_bytes = max_bytes - bitmap_bytes; | 1676 | extent_bytes = max_bytes - bitmap_bytes; |
1559 | extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); | 1677 | extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); |
1560 | 1678 | ||
1561 | ctl->extents_thresh = | 1679 | ctl->extents_thresh = |
1562 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); | 1680 | div_u64(extent_bytes, sizeof(struct btrfs_free_space)); |
1563 | } | 1681 | } |
1564 | 1682 | ||
1565 | static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, | 1683 | static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, |
@@ -1673,7 +1791,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, | |||
1673 | */ | 1791 | */ |
1674 | if (*bytes >= align) { | 1792 | if (*bytes >= align) { |
1675 | tmp = entry->offset - ctl->start + align - 1; | 1793 | tmp = entry->offset - ctl->start + align - 1; |
1676 | do_div(tmp, align); | 1794 | tmp = div64_u64(tmp, align); |
1677 | tmp = tmp * align + ctl->start; | 1795 | tmp = tmp * align + ctl->start; |
1678 | align_off = tmp - entry->offset; | 1796 | align_off = tmp - entry->offset; |
1679 | } else { | 1797 | } else { |
@@ -2402,11 +2520,8 @@ static void __btrfs_remove_free_space_cache_locked( | |||
2402 | } else { | 2520 | } else { |
2403 | free_bitmap(ctl, info); | 2521 | free_bitmap(ctl, info); |
2404 | } | 2522 | } |
2405 | if (need_resched()) { | 2523 | |
2406 | spin_unlock(&ctl->tree_lock); | 2524 | cond_resched_lock(&ctl->tree_lock); |
2407 | cond_resched(); | ||
2408 | spin_lock(&ctl->tree_lock); | ||
2409 | } | ||
2410 | } | 2525 | } |
2411 | } | 2526 | } |
2412 | 2527 | ||
@@ -2431,11 +2546,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | |||
2431 | 2546 | ||
2432 | WARN_ON(cluster->block_group != block_group); | 2547 | WARN_ON(cluster->block_group != block_group); |
2433 | __btrfs_return_cluster_to_free_space(block_group, cluster); | 2548 | __btrfs_return_cluster_to_free_space(block_group, cluster); |
2434 | if (need_resched()) { | 2549 | |
2435 | spin_unlock(&ctl->tree_lock); | 2550 | cond_resched_lock(&ctl->tree_lock); |
2436 | cond_resched(); | ||
2437 | spin_lock(&ctl->tree_lock); | ||
2438 | } | ||
2439 | } | 2551 | } |
2440 | __btrfs_remove_free_space_cache_locked(ctl); | 2552 | __btrfs_remove_free_space_cache_locked(ctl); |
2441 | spin_unlock(&ctl->tree_lock); | 2553 | spin_unlock(&ctl->tree_lock); |
@@ -3346,11 +3458,14 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, | |||
3346 | { | 3458 | { |
3347 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | 3459 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
3348 | int ret; | 3460 | int ret; |
3461 | struct btrfs_io_ctl io_ctl; | ||
3349 | 3462 | ||
3350 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | 3463 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
3351 | return 0; | 3464 | return 0; |
3352 | 3465 | ||
3353 | ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); | 3466 | ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, |
3467 | trans, path, 0) || | ||
3468 | btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0); | ||
3354 | if (ret) { | 3469 | if (ret) { |
3355 | btrfs_delalloc_release_metadata(inode, inode->i_size); | 3470 | btrfs_delalloc_release_metadata(inode, inode->i_size); |
3356 | #ifdef DEBUG | 3471 | #ifdef DEBUG |
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 88b2238a0aed..a16a029ad3b1 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h | |||
@@ -48,6 +48,8 @@ struct btrfs_free_space_op { | |||
48 | struct btrfs_free_space *info); | 48 | struct btrfs_free_space *info); |
49 | }; | 49 | }; |
50 | 50 | ||
51 | struct btrfs_io_ctl; | ||
52 | |||
51 | struct inode *lookup_free_space_inode(struct btrfs_root *root, | 53 | struct inode *lookup_free_space_inode(struct btrfs_root *root, |
52 | struct btrfs_block_group_cache | 54 | struct btrfs_block_group_cache |
53 | *block_group, struct btrfs_path *path); | 55 | *block_group, struct btrfs_path *path); |
@@ -60,14 +62,19 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, | |||
60 | struct btrfs_block_rsv *rsv); | 62 | struct btrfs_block_rsv *rsv); |
61 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, | 63 | int btrfs_truncate_free_space_cache(struct btrfs_root *root, |
62 | struct btrfs_trans_handle *trans, | 64 | struct btrfs_trans_handle *trans, |
65 | struct btrfs_block_group_cache *block_group, | ||
63 | struct inode *inode); | 66 | struct inode *inode); |
64 | int load_free_space_cache(struct btrfs_fs_info *fs_info, | 67 | int load_free_space_cache(struct btrfs_fs_info *fs_info, |
65 | struct btrfs_block_group_cache *block_group); | 68 | struct btrfs_block_group_cache *block_group); |
69 | int btrfs_wait_cache_io(struct btrfs_root *root, | ||
70 | struct btrfs_trans_handle *trans, | ||
71 | struct btrfs_block_group_cache *block_group, | ||
72 | struct btrfs_io_ctl *io_ctl, | ||
73 | struct btrfs_path *path, u64 offset); | ||
66 | int btrfs_write_out_cache(struct btrfs_root *root, | 74 | int btrfs_write_out_cache(struct btrfs_root *root, |
67 | struct btrfs_trans_handle *trans, | 75 | struct btrfs_trans_handle *trans, |
68 | struct btrfs_block_group_cache *block_group, | 76 | struct btrfs_block_group_cache *block_group, |
69 | struct btrfs_path *path); | 77 | struct btrfs_path *path); |
70 | |||
71 | struct inode *lookup_free_ino_inode(struct btrfs_root *root, | 78 | struct inode *lookup_free_ino_inode(struct btrfs_root *root, |
72 | struct btrfs_path *path); | 79 | struct btrfs_path *path); |
73 | int create_free_ino_inode(struct btrfs_root *root, | 80 | int create_free_ino_inode(struct btrfs_root *root, |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 74faea3a516e..f6a596d5a637 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -456,7 +456,7 @@ again: | |||
456 | } | 456 | } |
457 | 457 | ||
458 | if (i_size_read(inode) > 0) { | 458 | if (i_size_read(inode) > 0) { |
459 | ret = btrfs_truncate_free_space_cache(root, trans, inode); | 459 | ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode); |
460 | if (ret) { | 460 | if (ret) { |
461 | if (ret != -ENOSPC) | 461 | if (ret != -ENOSPC) |
462 | btrfs_abort_transaction(trans, root, ret); | 462 | btrfs_abort_transaction(trans, root, ret); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 43192e10cc43..56f00a25c003 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -59,6 +59,7 @@ | |||
59 | #include "backref.h" | 59 | #include "backref.h" |
60 | #include "hash.h" | 60 | #include "hash.h" |
61 | #include "props.h" | 61 | #include "props.h" |
62 | #include "qgroup.h" | ||
62 | 63 | ||
63 | struct btrfs_iget_args { | 64 | struct btrfs_iget_args { |
64 | struct btrfs_key *location; | 65 | struct btrfs_key *location; |
@@ -470,7 +471,7 @@ again: | |||
470 | */ | 471 | */ |
471 | if (inode_need_compress(inode)) { | 472 | if (inode_need_compress(inode)) { |
472 | WARN_ON(pages); | 473 | WARN_ON(pages); |
473 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 474 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); |
474 | if (!pages) { | 475 | if (!pages) { |
475 | /* just bail out to the uncompressed code */ | 476 | /* just bail out to the uncompressed code */ |
476 | goto cont; | 477 | goto cont; |
@@ -752,7 +753,6 @@ retry: | |||
752 | } | 753 | } |
753 | goto out_free; | 754 | goto out_free; |
754 | } | 755 | } |
755 | |||
756 | /* | 756 | /* |
757 | * here we're doing allocation and writeback of the | 757 | * here we're doing allocation and writeback of the |
758 | * compressed pages | 758 | * compressed pages |
@@ -3110,6 +3110,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
3110 | if (empty) | 3110 | if (empty) |
3111 | return; | 3111 | return; |
3112 | 3112 | ||
3113 | down_read(&fs_info->delayed_iput_sem); | ||
3114 | |||
3113 | spin_lock(&fs_info->delayed_iput_lock); | 3115 | spin_lock(&fs_info->delayed_iput_lock); |
3114 | list_splice_init(&fs_info->delayed_iputs, &list); | 3116 | list_splice_init(&fs_info->delayed_iputs, &list); |
3115 | spin_unlock(&fs_info->delayed_iput_lock); | 3117 | spin_unlock(&fs_info->delayed_iput_lock); |
@@ -3120,6 +3122,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
3120 | iput(delayed->inode); | 3122 | iput(delayed->inode); |
3121 | kfree(delayed); | 3123 | kfree(delayed); |
3122 | } | 3124 | } |
3125 | |||
3126 | up_read(&root->fs_info->delayed_iput_sem); | ||
3123 | } | 3127 | } |
3124 | 3128 | ||
3125 | /* | 3129 | /* |
@@ -4162,6 +4166,21 @@ out: | |||
4162 | return err; | 4166 | return err; |
4163 | } | 4167 | } |
4164 | 4168 | ||
4169 | static int truncate_space_check(struct btrfs_trans_handle *trans, | ||
4170 | struct btrfs_root *root, | ||
4171 | u64 bytes_deleted) | ||
4172 | { | ||
4173 | int ret; | ||
4174 | |||
4175 | bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted); | ||
4176 | ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, | ||
4177 | bytes_deleted, BTRFS_RESERVE_NO_FLUSH); | ||
4178 | if (!ret) | ||
4179 | trans->bytes_reserved += bytes_deleted; | ||
4180 | return ret; | ||
4181 | |||
4182 | } | ||
4183 | |||
4165 | /* | 4184 | /* |
4166 | * this can truncate away extent items, csum items and directory items. | 4185 | * this can truncate away extent items, csum items and directory items. |
4167 | * It starts at a high offset and removes keys until it can't find | 4186 | * It starts at a high offset and removes keys until it can't find |
@@ -4197,9 +4216,21 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
4197 | int ret; | 4216 | int ret; |
4198 | int err = 0; | 4217 | int err = 0; |
4199 | u64 ino = btrfs_ino(inode); | 4218 | u64 ino = btrfs_ino(inode); |
4219 | u64 bytes_deleted = 0; | ||
4220 | bool be_nice = 0; | ||
4221 | bool should_throttle = 0; | ||
4222 | bool should_end = 0; | ||
4200 | 4223 | ||
4201 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); | 4224 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); |
4202 | 4225 | ||
4226 | /* | ||
4227 | * for non-free space inodes and ref cows, we want to back off from | ||
4228 | * time to time | ||
4229 | */ | ||
4230 | if (!btrfs_is_free_space_inode(inode) && | ||
4231 | test_bit(BTRFS_ROOT_REF_COWS, &root->state)) | ||
4232 | be_nice = 1; | ||
4233 | |||
4203 | path = btrfs_alloc_path(); | 4234 | path = btrfs_alloc_path(); |
4204 | if (!path) | 4235 | if (!path) |
4205 | return -ENOMEM; | 4236 | return -ENOMEM; |
@@ -4229,6 +4260,19 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
4229 | key.type = (u8)-1; | 4260 | key.type = (u8)-1; |
4230 | 4261 | ||
4231 | search_again: | 4262 | search_again: |
4263 | /* | ||
4264 | * with a 16K leaf size and 128MB extents, you can actually queue | ||
4265 | * up a huge file in a single leaf. Most of the time that | ||
4266 | * bytes_deleted is > 0, it will be huge by the time we get here | ||
4267 | */ | ||
4268 | if (be_nice && bytes_deleted > 32 * 1024 * 1024) { | ||
4269 | if (btrfs_should_end_transaction(trans, root)) { | ||
4270 | err = -EAGAIN; | ||
4271 | goto error; | ||
4272 | } | ||
4273 | } | ||
4274 | |||
4275 | |||
4232 | path->leave_spinning = 1; | 4276 | path->leave_spinning = 1; |
4233 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 4277 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
4234 | if (ret < 0) { | 4278 | if (ret < 0) { |
@@ -4371,22 +4415,39 @@ delete: | |||
4371 | } else { | 4415 | } else { |
4372 | break; | 4416 | break; |
4373 | } | 4417 | } |
4418 | should_throttle = 0; | ||
4419 | |||
4374 | if (found_extent && | 4420 | if (found_extent && |
4375 | (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || | 4421 | (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
4376 | root == root->fs_info->tree_root)) { | 4422 | root == root->fs_info->tree_root)) { |
4377 | btrfs_set_path_blocking(path); | 4423 | btrfs_set_path_blocking(path); |
4424 | bytes_deleted += extent_num_bytes; | ||
4378 | ret = btrfs_free_extent(trans, root, extent_start, | 4425 | ret = btrfs_free_extent(trans, root, extent_start, |
4379 | extent_num_bytes, 0, | 4426 | extent_num_bytes, 0, |
4380 | btrfs_header_owner(leaf), | 4427 | btrfs_header_owner(leaf), |
4381 | ino, extent_offset, 0); | 4428 | ino, extent_offset, 0); |
4382 | BUG_ON(ret); | 4429 | BUG_ON(ret); |
4430 | if (btrfs_should_throttle_delayed_refs(trans, root)) | ||
4431 | btrfs_async_run_delayed_refs(root, | ||
4432 | trans->delayed_ref_updates * 2, 0); | ||
4433 | if (be_nice) { | ||
4434 | if (truncate_space_check(trans, root, | ||
4435 | extent_num_bytes)) { | ||
4436 | should_end = 1; | ||
4437 | } | ||
4438 | if (btrfs_should_throttle_delayed_refs(trans, | ||
4439 | root)) { | ||
4440 | should_throttle = 1; | ||
4441 | } | ||
4442 | } | ||
4383 | } | 4443 | } |
4384 | 4444 | ||
4385 | if (found_type == BTRFS_INODE_ITEM_KEY) | 4445 | if (found_type == BTRFS_INODE_ITEM_KEY) |
4386 | break; | 4446 | break; |
4387 | 4447 | ||
4388 | if (path->slots[0] == 0 || | 4448 | if (path->slots[0] == 0 || |
4389 | path->slots[0] != pending_del_slot) { | 4449 | path->slots[0] != pending_del_slot || |
4450 | should_throttle || should_end) { | ||
4390 | if (pending_del_nr) { | 4451 | if (pending_del_nr) { |
4391 | ret = btrfs_del_items(trans, root, path, | 4452 | ret = btrfs_del_items(trans, root, path, |
4392 | pending_del_slot, | 4453 | pending_del_slot, |
@@ -4399,6 +4460,23 @@ delete: | |||
4399 | pending_del_nr = 0; | 4460 | pending_del_nr = 0; |
4400 | } | 4461 | } |
4401 | btrfs_release_path(path); | 4462 | btrfs_release_path(path); |
4463 | if (should_throttle) { | ||
4464 | unsigned long updates = trans->delayed_ref_updates; | ||
4465 | if (updates) { | ||
4466 | trans->delayed_ref_updates = 0; | ||
4467 | ret = btrfs_run_delayed_refs(trans, root, updates * 2); | ||
4468 | if (ret && !err) | ||
4469 | err = ret; | ||
4470 | } | ||
4471 | } | ||
4472 | /* | ||
4473 | * if we failed to refill our space rsv, bail out | ||
4474 | * and let the transaction restart | ||
4475 | */ | ||
4476 | if (should_end) { | ||
4477 | err = -EAGAIN; | ||
4478 | goto error; | ||
4479 | } | ||
4402 | goto search_again; | 4480 | goto search_again; |
4403 | } else { | 4481 | } else { |
4404 | path->slots[0]--; | 4482 | path->slots[0]--; |
@@ -4415,7 +4493,18 @@ error: | |||
4415 | if (last_size != (u64)-1 && | 4493 | if (last_size != (u64)-1 && |
4416 | root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) | 4494 | root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) |
4417 | btrfs_ordered_update_i_size(inode, last_size, NULL); | 4495 | btrfs_ordered_update_i_size(inode, last_size, NULL); |
4496 | |||
4418 | btrfs_free_path(path); | 4497 | btrfs_free_path(path); |
4498 | |||
4499 | if (be_nice && bytes_deleted > 32 * 1024 * 1024) { | ||
4500 | unsigned long updates = trans->delayed_ref_updates; | ||
4501 | if (updates) { | ||
4502 | trans->delayed_ref_updates = 0; | ||
4503 | ret = btrfs_run_delayed_refs(trans, root, updates * 2); | ||
4504 | if (ret && !err) | ||
4505 | err = ret; | ||
4506 | } | ||
4507 | } | ||
4419 | return err; | 4508 | return err; |
4420 | } | 4509 | } |
4421 | 4510 | ||
@@ -4924,6 +5013,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
4924 | struct btrfs_trans_handle *trans; | 5013 | struct btrfs_trans_handle *trans; |
4925 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5014 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4926 | struct btrfs_block_rsv *rsv, *global_rsv; | 5015 | struct btrfs_block_rsv *rsv, *global_rsv; |
5016 | int steal_from_global = 0; | ||
4927 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 5017 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
4928 | int ret; | 5018 | int ret; |
4929 | 5019 | ||
@@ -4991,9 +5081,20 @@ void btrfs_evict_inode(struct inode *inode) | |||
4991 | * hard as possible to get this to work. | 5081 | * hard as possible to get this to work. |
4992 | */ | 5082 | */ |
4993 | if (ret) | 5083 | if (ret) |
4994 | ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); | 5084 | steal_from_global++; |
5085 | else | ||
5086 | steal_from_global = 0; | ||
5087 | ret = 0; | ||
4995 | 5088 | ||
4996 | if (ret) { | 5089 | /* |
5090 | * steal_from_global == 0: we reserved stuff, hooray! | ||
5091 | * steal_from_global == 1: we didn't reserve stuff, boo! | ||
5092 | * steal_from_global == 2: we've committed, still not a lot of | ||
5093 | * room but maybe we'll have room in the global reserve this | ||
5094 | * time. | ||
5095 | * steal_from_global == 3: abandon all hope! | ||
5096 | */ | ||
5097 | if (steal_from_global > 2) { | ||
4997 | btrfs_warn(root->fs_info, | 5098 | btrfs_warn(root->fs_info, |
4998 | "Could not get space for a delete, will truncate on mount %d", | 5099 | "Could not get space for a delete, will truncate on mount %d", |
4999 | ret); | 5100 | ret); |
@@ -5009,10 +5110,40 @@ void btrfs_evict_inode(struct inode *inode) | |||
5009 | goto no_delete; | 5110 | goto no_delete; |
5010 | } | 5111 | } |
5011 | 5112 | ||
5113 | /* | ||
5114 | * We can't just steal from the global reserve, we need tomake | ||
5115 | * sure there is room to do it, if not we need to commit and try | ||
5116 | * again. | ||
5117 | */ | ||
5118 | if (steal_from_global) { | ||
5119 | if (!btrfs_check_space_for_delayed_refs(trans, root)) | ||
5120 | ret = btrfs_block_rsv_migrate(global_rsv, rsv, | ||
5121 | min_size); | ||
5122 | else | ||
5123 | ret = -ENOSPC; | ||
5124 | } | ||
5125 | |||
5126 | /* | ||
5127 | * Couldn't steal from the global reserve, we have too much | ||
5128 | * pending stuff built up, commit the transaction and try it | ||
5129 | * again. | ||
5130 | */ | ||
5131 | if (ret) { | ||
5132 | ret = btrfs_commit_transaction(trans, root); | ||
5133 | if (ret) { | ||
5134 | btrfs_orphan_del(NULL, inode); | ||
5135 | btrfs_free_block_rsv(root, rsv); | ||
5136 | goto no_delete; | ||
5137 | } | ||
5138 | continue; | ||
5139 | } else { | ||
5140 | steal_from_global = 0; | ||
5141 | } | ||
5142 | |||
5012 | trans->block_rsv = rsv; | 5143 | trans->block_rsv = rsv; |
5013 | 5144 | ||
5014 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 5145 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); |
5015 | if (ret != -ENOSPC) | 5146 | if (ret != -ENOSPC && ret != -EAGAIN) |
5016 | break; | 5147 | break; |
5017 | 5148 | ||
5018 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 5149 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
@@ -8581,7 +8712,7 @@ static int btrfs_truncate(struct inode *inode) | |||
8581 | ret = btrfs_truncate_inode_items(trans, root, inode, | 8712 | ret = btrfs_truncate_inode_items(trans, root, inode, |
8582 | inode->i_size, | 8713 | inode->i_size, |
8583 | BTRFS_EXTENT_DATA_KEY); | 8714 | BTRFS_EXTENT_DATA_KEY); |
8584 | if (ret != -ENOSPC) { | 8715 | if (ret != -ENOSPC && ret != -EAGAIN) { |
8585 | err = ret; | 8716 | err = ret; |
8586 | break; | 8717 | break; |
8587 | } | 8718 | } |
@@ -9451,6 +9582,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
9451 | btrfs_end_transaction(trans, root); | 9582 | btrfs_end_transaction(trans, root); |
9452 | break; | 9583 | break; |
9453 | } | 9584 | } |
9585 | |||
9454 | btrfs_drop_extent_cache(inode, cur_offset, | 9586 | btrfs_drop_extent_cache(inode, cur_offset, |
9455 | cur_offset + ins.offset -1, 0); | 9587 | cur_offset + ins.offset -1, 0); |
9456 | 9588 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 74609b931ba5..ca5d968f4c37 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -456,6 +456,13 @@ static noinline int create_subvol(struct inode *dir, | |||
456 | if (ret) | 456 | if (ret) |
457 | return ret; | 457 | return ret; |
458 | 458 | ||
459 | /* | ||
460 | * Don't create subvolume whose level is not zero. Or qgroup will be | ||
461 | * screwed up since it assume subvolme qgroup's level to be 0. | ||
462 | */ | ||
463 | if (btrfs_qgroup_level(objectid)) | ||
464 | return -ENOSPC; | ||
465 | |||
459 | btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); | 466 | btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); |
460 | /* | 467 | /* |
461 | * The same as the snapshot creation, please see the comment | 468 | * The same as the snapshot creation, please see the comment |
@@ -1564,7 +1571,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
1564 | goto out_free; | 1571 | goto out_free; |
1565 | } | 1572 | } |
1566 | 1573 | ||
1567 | do_div(new_size, root->sectorsize); | 1574 | new_size = div_u64(new_size, root->sectorsize); |
1568 | new_size *= root->sectorsize; | 1575 | new_size *= root->sectorsize; |
1569 | 1576 | ||
1570 | printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n", | 1577 | printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n", |
@@ -2897,6 +2904,9 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, | |||
2897 | if (src == dst) | 2904 | if (src == dst) |
2898 | return -EINVAL; | 2905 | return -EINVAL; |
2899 | 2906 | ||
2907 | if (len == 0) | ||
2908 | return 0; | ||
2909 | |||
2900 | btrfs_double_lock(src, loff, dst, dst_loff, len); | 2910 | btrfs_double_lock(src, loff, dst, dst_loff, len); |
2901 | 2911 | ||
2902 | ret = extent_same_check_offsets(src, loff, len); | 2912 | ret = extent_same_check_offsets(src, loff, len); |
@@ -3039,7 +3049,7 @@ out: | |||
3039 | static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 3049 | static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
3040 | u64 disko) | 3050 | u64 disko) |
3041 | { | 3051 | { |
3042 | struct seq_list tree_mod_seq_elem = {}; | 3052 | struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); |
3043 | struct ulist *roots; | 3053 | struct ulist *roots; |
3044 | struct ulist_iterator uiter; | 3054 | struct ulist_iterator uiter; |
3045 | struct ulist_node *root_node = NULL; | 3055 | struct ulist_node *root_node = NULL; |
@@ -3202,6 +3212,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode, | |||
3202 | key.offset = off; | 3212 | key.offset = off; |
3203 | 3213 | ||
3204 | while (1) { | 3214 | while (1) { |
3215 | u64 next_key_min_offset = key.offset + 1; | ||
3216 | |||
3205 | /* | 3217 | /* |
3206 | * note the key will change type as we walk through the | 3218 | * note the key will change type as we walk through the |
3207 | * tree. | 3219 | * tree. |
@@ -3282,7 +3294,7 @@ process_slot: | |||
3282 | } else if (key.offset >= off + len) { | 3294 | } else if (key.offset >= off + len) { |
3283 | break; | 3295 | break; |
3284 | } | 3296 | } |
3285 | 3297 | next_key_min_offset = key.offset + datal; | |
3286 | size = btrfs_item_size_nr(leaf, slot); | 3298 | size = btrfs_item_size_nr(leaf, slot); |
3287 | read_extent_buffer(leaf, buf, | 3299 | read_extent_buffer(leaf, buf, |
3288 | btrfs_item_ptr_offset(leaf, slot), | 3300 | btrfs_item_ptr_offset(leaf, slot), |
@@ -3497,7 +3509,7 @@ process_slot: | |||
3497 | break; | 3509 | break; |
3498 | } | 3510 | } |
3499 | btrfs_release_path(path); | 3511 | btrfs_release_path(path); |
3500 | key.offset++; | 3512 | key.offset = next_key_min_offset; |
3501 | } | 3513 | } |
3502 | ret = 0; | 3514 | ret = 0; |
3503 | 3515 | ||
@@ -3626,6 +3638,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
3626 | if (off + len == src->i_size) | 3638 | if (off + len == src->i_size) |
3627 | len = ALIGN(src->i_size, bs) - off; | 3639 | len = ALIGN(src->i_size, bs) - off; |
3628 | 3640 | ||
3641 | if (len == 0) { | ||
3642 | ret = 0; | ||
3643 | goto out_unlock; | ||
3644 | } | ||
3645 | |||
3629 | /* verify the end result is block aligned */ | 3646 | /* verify the end result is block aligned */ |
3630 | if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || | 3647 | if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || |
3631 | !IS_ALIGNED(destoff, bs)) | 3648 | !IS_ALIGNED(destoff, bs)) |
@@ -4624,6 +4641,11 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) | |||
4624 | sa->src, sa->dst); | 4641 | sa->src, sa->dst); |
4625 | } | 4642 | } |
4626 | 4643 | ||
4644 | /* update qgroup status and info */ | ||
4645 | err = btrfs_run_qgroups(trans, root->fs_info); | ||
4646 | if (err < 0) | ||
4647 | btrfs_error(root->fs_info, ret, | ||
4648 | "failed to update qgroup status and info\n"); | ||
4627 | err = btrfs_end_transaction(trans, root); | 4649 | err = btrfs_end_transaction(trans, root); |
4628 | if (err && !ret) | 4650 | if (err && !ret) |
4629 | ret = err; | 4651 | ret = err; |
@@ -4669,8 +4691,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) | |||
4669 | 4691 | ||
4670 | /* FIXME: check if the IDs really exist */ | 4692 | /* FIXME: check if the IDs really exist */ |
4671 | if (sa->create) { | 4693 | if (sa->create) { |
4672 | ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid, | 4694 | ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid); |
4673 | NULL); | ||
4674 | } else { | 4695 | } else { |
4675 | ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); | 4696 | ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); |
4676 | } | 4697 | } |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 617553cdb7d3..a2f051347731 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c | |||
@@ -434,7 +434,7 @@ out: | |||
434 | return ret; | 434 | return ret; |
435 | } | 435 | } |
436 | 436 | ||
437 | struct btrfs_compress_op btrfs_lzo_compress = { | 437 | const struct btrfs_compress_op btrfs_lzo_compress = { |
438 | .alloc_workspace = lzo_alloc_workspace, | 438 | .alloc_workspace = lzo_alloc_workspace, |
439 | .free_workspace = lzo_free_workspace, | 439 | .free_workspace = lzo_free_workspace, |
440 | .compress_pages = lzo_compress_pages, | 440 | .compress_pages = lzo_compress_pages, |
diff --git a/fs/btrfs/math.h b/fs/btrfs/math.h index b7816cefbd13..1b10a3cd1195 100644 --- a/fs/btrfs/math.h +++ b/fs/btrfs/math.h | |||
@@ -28,8 +28,7 @@ static inline u64 div_factor(u64 num, int factor) | |||
28 | if (factor == 10) | 28 | if (factor == 10) |
29 | return num; | 29 | return num; |
30 | num *= factor; | 30 | num *= factor; |
31 | do_div(num, 10); | 31 | return div_u64(num, 10); |
32 | return num; | ||
33 | } | 32 | } |
34 | 33 | ||
35 | static inline u64 div_factor_fine(u64 num, int factor) | 34 | static inline u64 div_factor_fine(u64 num, int factor) |
@@ -37,8 +36,7 @@ static inline u64 div_factor_fine(u64 num, int factor) | |||
37 | if (factor == 100) | 36 | if (factor == 100) |
38 | return num; | 37 | return num; |
39 | num *= factor; | 38 | num *= factor; |
40 | do_div(num, 100); | 39 | return div_u64(num, 100); |
41 | return num; | ||
42 | } | 40 | } |
43 | 41 | ||
44 | #endif | 42 | #endif |
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 129b1dd28527..dca137b04095 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c | |||
@@ -425,3 +425,5 @@ static const char *prop_compression_extract(struct inode *inode) | |||
425 | 425 | ||
426 | return NULL; | 426 | return NULL; |
427 | } | 427 | } |
428 | |||
429 | |||
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 058c79eecbfb..3d6546581bb9 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -644,9 +644,8 @@ out: | |||
644 | } | 644 | } |
645 | 645 | ||
646 | static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, | 646 | static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, |
647 | struct btrfs_root *root, u64 qgroupid, | 647 | struct btrfs_root *root, |
648 | u64 flags, u64 max_rfer, u64 max_excl, | 648 | struct btrfs_qgroup *qgroup) |
649 | u64 rsv_rfer, u64 rsv_excl) | ||
650 | { | 649 | { |
651 | struct btrfs_path *path; | 650 | struct btrfs_path *path; |
652 | struct btrfs_key key; | 651 | struct btrfs_key key; |
@@ -657,7 +656,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, | |||
657 | 656 | ||
658 | key.objectid = 0; | 657 | key.objectid = 0; |
659 | key.type = BTRFS_QGROUP_LIMIT_KEY; | 658 | key.type = BTRFS_QGROUP_LIMIT_KEY; |
660 | key.offset = qgroupid; | 659 | key.offset = qgroup->qgroupid; |
661 | 660 | ||
662 | path = btrfs_alloc_path(); | 661 | path = btrfs_alloc_path(); |
663 | if (!path) | 662 | if (!path) |
@@ -673,11 +672,11 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, | |||
673 | l = path->nodes[0]; | 672 | l = path->nodes[0]; |
674 | slot = path->slots[0]; | 673 | slot = path->slots[0]; |
675 | qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); | 674 | qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); |
676 | btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); | 675 | btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); |
677 | btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); | 676 | btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); |
678 | btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); | 677 | btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); |
679 | btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer); | 678 | btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); |
680 | btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl); | 679 | btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); |
681 | 680 | ||
682 | btrfs_mark_buffer_dirty(l); | 681 | btrfs_mark_buffer_dirty(l); |
683 | 682 | ||
@@ -967,6 +966,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, | |||
967 | fs_info->pending_quota_state = 0; | 966 | fs_info->pending_quota_state = 0; |
968 | quota_root = fs_info->quota_root; | 967 | quota_root = fs_info->quota_root; |
969 | fs_info->quota_root = NULL; | 968 | fs_info->quota_root = NULL; |
969 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; | ||
970 | spin_unlock(&fs_info->qgroup_lock); | 970 | spin_unlock(&fs_info->qgroup_lock); |
971 | 971 | ||
972 | btrfs_free_qgroup_config(fs_info); | 972 | btrfs_free_qgroup_config(fs_info); |
@@ -982,7 +982,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, | |||
982 | list_del("a_root->dirty_list); | 982 | list_del("a_root->dirty_list); |
983 | 983 | ||
984 | btrfs_tree_lock(quota_root->node); | 984 | btrfs_tree_lock(quota_root->node); |
985 | clean_tree_block(trans, tree_root, quota_root->node); | 985 | clean_tree_block(trans, tree_root->fs_info, quota_root->node); |
986 | btrfs_tree_unlock(quota_root->node); | 986 | btrfs_tree_unlock(quota_root->node); |
987 | btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); | 987 | btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); |
988 | 988 | ||
@@ -1001,6 +1001,110 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info, | |||
1001 | list_add(&qgroup->dirty, &fs_info->dirty_qgroups); | 1001 | list_add(&qgroup->dirty, &fs_info->dirty_qgroups); |
1002 | } | 1002 | } |
1003 | 1003 | ||
1004 | /* | ||
1005 | * The easy accounting, if we are adding/removing the only ref for an extent | ||
1006 | * then this qgroup and all of the parent qgroups get their refrence and | ||
1007 | * exclusive counts adjusted. | ||
1008 | * | ||
1009 | * Caller should hold fs_info->qgroup_lock. | ||
1010 | */ | ||
1011 | static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, | ||
1012 | struct ulist *tmp, u64 ref_root, | ||
1013 | u64 num_bytes, int sign) | ||
1014 | { | ||
1015 | struct btrfs_qgroup *qgroup; | ||
1016 | struct btrfs_qgroup_list *glist; | ||
1017 | struct ulist_node *unode; | ||
1018 | struct ulist_iterator uiter; | ||
1019 | int ret = 0; | ||
1020 | |||
1021 | qgroup = find_qgroup_rb(fs_info, ref_root); | ||
1022 | if (!qgroup) | ||
1023 | goto out; | ||
1024 | |||
1025 | qgroup->rfer += sign * num_bytes; | ||
1026 | qgroup->rfer_cmpr += sign * num_bytes; | ||
1027 | |||
1028 | WARN_ON(sign < 0 && qgroup->excl < num_bytes); | ||
1029 | qgroup->excl += sign * num_bytes; | ||
1030 | qgroup->excl_cmpr += sign * num_bytes; | ||
1031 | if (sign > 0) | ||
1032 | qgroup->reserved -= num_bytes; | ||
1033 | |||
1034 | qgroup_dirty(fs_info, qgroup); | ||
1035 | |||
1036 | /* Get all of the parent groups that contain this qgroup */ | ||
1037 | list_for_each_entry(glist, &qgroup->groups, next_group) { | ||
1038 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
1039 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
1040 | if (ret < 0) | ||
1041 | goto out; | ||
1042 | } | ||
1043 | |||
1044 | /* Iterate all of the parents and adjust their reference counts */ | ||
1045 | ULIST_ITER_INIT(&uiter); | ||
1046 | while ((unode = ulist_next(tmp, &uiter))) { | ||
1047 | qgroup = u64_to_ptr(unode->aux); | ||
1048 | qgroup->rfer += sign * num_bytes; | ||
1049 | qgroup->rfer_cmpr += sign * num_bytes; | ||
1050 | WARN_ON(sign < 0 && qgroup->excl < num_bytes); | ||
1051 | qgroup->excl += sign * num_bytes; | ||
1052 | if (sign > 0) | ||
1053 | qgroup->reserved -= num_bytes; | ||
1054 | qgroup->excl_cmpr += sign * num_bytes; | ||
1055 | qgroup_dirty(fs_info, qgroup); | ||
1056 | |||
1057 | /* Add any parents of the parents */ | ||
1058 | list_for_each_entry(glist, &qgroup->groups, next_group) { | ||
1059 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
1060 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
1061 | if (ret < 0) | ||
1062 | goto out; | ||
1063 | } | ||
1064 | } | ||
1065 | ret = 0; | ||
1066 | out: | ||
1067 | return ret; | ||
1068 | } | ||
1069 | |||
1070 | |||
1071 | /* | ||
1072 | * Quick path for updating qgroup with only excl refs. | ||
1073 | * | ||
1074 | * In that case, just update all parent will be enough. | ||
1075 | * Or we needs to do a full rescan. | ||
1076 | * Caller should also hold fs_info->qgroup_lock. | ||
1077 | * | ||
1078 | * Return 0 for quick update, return >0 for need to full rescan | ||
1079 | * and mark INCONSISTENT flag. | ||
1080 | * Return < 0 for other error. | ||
1081 | */ | ||
1082 | static int quick_update_accounting(struct btrfs_fs_info *fs_info, | ||
1083 | struct ulist *tmp, u64 src, u64 dst, | ||
1084 | int sign) | ||
1085 | { | ||
1086 | struct btrfs_qgroup *qgroup; | ||
1087 | int ret = 1; | ||
1088 | int err = 0; | ||
1089 | |||
1090 | qgroup = find_qgroup_rb(fs_info, src); | ||
1091 | if (!qgroup) | ||
1092 | goto out; | ||
1093 | if (qgroup->excl == qgroup->rfer) { | ||
1094 | ret = 0; | ||
1095 | err = __qgroup_excl_accounting(fs_info, tmp, dst, | ||
1096 | qgroup->excl, sign); | ||
1097 | if (err < 0) { | ||
1098 | ret = err; | ||
1099 | goto out; | ||
1100 | } | ||
1101 | } | ||
1102 | out: | ||
1103 | if (ret) | ||
1104 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
1105 | return ret; | ||
1106 | } | ||
1107 | |||
1004 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | 1108 | int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, |
1005 | struct btrfs_fs_info *fs_info, u64 src, u64 dst) | 1109 | struct btrfs_fs_info *fs_info, u64 src, u64 dst) |
1006 | { | 1110 | { |
@@ -1008,8 +1112,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | |||
1008 | struct btrfs_qgroup *parent; | 1112 | struct btrfs_qgroup *parent; |
1009 | struct btrfs_qgroup *member; | 1113 | struct btrfs_qgroup *member; |
1010 | struct btrfs_qgroup_list *list; | 1114 | struct btrfs_qgroup_list *list; |
1115 | struct ulist *tmp; | ||
1011 | int ret = 0; | 1116 | int ret = 0; |
1012 | 1117 | ||
1118 | tmp = ulist_alloc(GFP_NOFS); | ||
1119 | if (!tmp) | ||
1120 | return -ENOMEM; | ||
1121 | |||
1122 | /* Check the level of src and dst first */ | ||
1123 | if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) | ||
1124 | return -EINVAL; | ||
1125 | |||
1013 | mutex_lock(&fs_info->qgroup_ioctl_lock); | 1126 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
1014 | quota_root = fs_info->quota_root; | 1127 | quota_root = fs_info->quota_root; |
1015 | if (!quota_root) { | 1128 | if (!quota_root) { |
@@ -1043,23 +1156,33 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | |||
1043 | 1156 | ||
1044 | spin_lock(&fs_info->qgroup_lock); | 1157 | spin_lock(&fs_info->qgroup_lock); |
1045 | ret = add_relation_rb(quota_root->fs_info, src, dst); | 1158 | ret = add_relation_rb(quota_root->fs_info, src, dst); |
1159 | if (ret < 0) { | ||
1160 | spin_unlock(&fs_info->qgroup_lock); | ||
1161 | goto out; | ||
1162 | } | ||
1163 | ret = quick_update_accounting(fs_info, tmp, src, dst, 1); | ||
1046 | spin_unlock(&fs_info->qgroup_lock); | 1164 | spin_unlock(&fs_info->qgroup_lock); |
1047 | out: | 1165 | out: |
1048 | mutex_unlock(&fs_info->qgroup_ioctl_lock); | 1166 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
1167 | ulist_free(tmp); | ||
1049 | return ret; | 1168 | return ret; |
1050 | } | 1169 | } |
1051 | 1170 | ||
1052 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | 1171 | int __del_qgroup_relation(struct btrfs_trans_handle *trans, |
1053 | struct btrfs_fs_info *fs_info, u64 src, u64 dst) | 1172 | struct btrfs_fs_info *fs_info, u64 src, u64 dst) |
1054 | { | 1173 | { |
1055 | struct btrfs_root *quota_root; | 1174 | struct btrfs_root *quota_root; |
1056 | struct btrfs_qgroup *parent; | 1175 | struct btrfs_qgroup *parent; |
1057 | struct btrfs_qgroup *member; | 1176 | struct btrfs_qgroup *member; |
1058 | struct btrfs_qgroup_list *list; | 1177 | struct btrfs_qgroup_list *list; |
1178 | struct ulist *tmp; | ||
1059 | int ret = 0; | 1179 | int ret = 0; |
1060 | int err; | 1180 | int err; |
1061 | 1181 | ||
1062 | mutex_lock(&fs_info->qgroup_ioctl_lock); | 1182 | tmp = ulist_alloc(GFP_NOFS); |
1183 | if (!tmp) | ||
1184 | return -ENOMEM; | ||
1185 | |||
1063 | quota_root = fs_info->quota_root; | 1186 | quota_root = fs_info->quota_root; |
1064 | if (!quota_root) { | 1187 | if (!quota_root) { |
1065 | ret = -EINVAL; | 1188 | ret = -EINVAL; |
@@ -1088,14 +1211,27 @@ exist: | |||
1088 | 1211 | ||
1089 | spin_lock(&fs_info->qgroup_lock); | 1212 | spin_lock(&fs_info->qgroup_lock); |
1090 | del_relation_rb(fs_info, src, dst); | 1213 | del_relation_rb(fs_info, src, dst); |
1214 | ret = quick_update_accounting(fs_info, tmp, src, dst, -1); | ||
1091 | spin_unlock(&fs_info->qgroup_lock); | 1215 | spin_unlock(&fs_info->qgroup_lock); |
1092 | out: | 1216 | out: |
1217 | ulist_free(tmp); | ||
1218 | return ret; | ||
1219 | } | ||
1220 | |||
1221 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | ||
1222 | struct btrfs_fs_info *fs_info, u64 src, u64 dst) | ||
1223 | { | ||
1224 | int ret = 0; | ||
1225 | |||
1226 | mutex_lock(&fs_info->qgroup_ioctl_lock); | ||
1227 | ret = __del_qgroup_relation(trans, fs_info, src, dst); | ||
1093 | mutex_unlock(&fs_info->qgroup_ioctl_lock); | 1228 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
1229 | |||
1094 | return ret; | 1230 | return ret; |
1095 | } | 1231 | } |
1096 | 1232 | ||
1097 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, | 1233 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, |
1098 | struct btrfs_fs_info *fs_info, u64 qgroupid, char *name) | 1234 | struct btrfs_fs_info *fs_info, u64 qgroupid) |
1099 | { | 1235 | { |
1100 | struct btrfs_root *quota_root; | 1236 | struct btrfs_root *quota_root; |
1101 | struct btrfs_qgroup *qgroup; | 1237 | struct btrfs_qgroup *qgroup; |
@@ -1133,6 +1269,7 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | |||
1133 | { | 1269 | { |
1134 | struct btrfs_root *quota_root; | 1270 | struct btrfs_root *quota_root; |
1135 | struct btrfs_qgroup *qgroup; | 1271 | struct btrfs_qgroup *qgroup; |
1272 | struct btrfs_qgroup_list *list; | ||
1136 | int ret = 0; | 1273 | int ret = 0; |
1137 | 1274 | ||
1138 | mutex_lock(&fs_info->qgroup_ioctl_lock); | 1275 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
@@ -1147,15 +1284,24 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | |||
1147 | ret = -ENOENT; | 1284 | ret = -ENOENT; |
1148 | goto out; | 1285 | goto out; |
1149 | } else { | 1286 | } else { |
1150 | /* check if there are no relations to this qgroup */ | 1287 | /* check if there are no children of this qgroup */ |
1151 | if (!list_empty(&qgroup->groups) || | 1288 | if (!list_empty(&qgroup->members)) { |
1152 | !list_empty(&qgroup->members)) { | ||
1153 | ret = -EBUSY; | 1289 | ret = -EBUSY; |
1154 | goto out; | 1290 | goto out; |
1155 | } | 1291 | } |
1156 | } | 1292 | } |
1157 | ret = del_qgroup_item(trans, quota_root, qgroupid); | 1293 | ret = del_qgroup_item(trans, quota_root, qgroupid); |
1158 | 1294 | ||
1295 | while (!list_empty(&qgroup->groups)) { | ||
1296 | list = list_first_entry(&qgroup->groups, | ||
1297 | struct btrfs_qgroup_list, next_group); | ||
1298 | ret = __del_qgroup_relation(trans, fs_info, | ||
1299 | qgroupid, | ||
1300 | list->group->qgroupid); | ||
1301 | if (ret) | ||
1302 | goto out; | ||
1303 | } | ||
1304 | |||
1159 | spin_lock(&fs_info->qgroup_lock); | 1305 | spin_lock(&fs_info->qgroup_lock); |
1160 | del_qgroup_rb(quota_root->fs_info, qgroupid); | 1306 | del_qgroup_rb(quota_root->fs_info, qgroupid); |
1161 | spin_unlock(&fs_info->qgroup_lock); | 1307 | spin_unlock(&fs_info->qgroup_lock); |
@@ -1184,23 +1330,27 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | |||
1184 | ret = -ENOENT; | 1330 | ret = -ENOENT; |
1185 | goto out; | 1331 | goto out; |
1186 | } | 1332 | } |
1187 | ret = update_qgroup_limit_item(trans, quota_root, qgroupid, | 1333 | |
1188 | limit->flags, limit->max_rfer, | 1334 | spin_lock(&fs_info->qgroup_lock); |
1189 | limit->max_excl, limit->rsv_rfer, | 1335 | if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) |
1190 | limit->rsv_excl); | 1336 | qgroup->max_rfer = limit->max_rfer; |
1337 | if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) | ||
1338 | qgroup->max_excl = limit->max_excl; | ||
1339 | if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) | ||
1340 | qgroup->rsv_rfer = limit->rsv_rfer; | ||
1341 | if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) | ||
1342 | qgroup->rsv_excl = limit->rsv_excl; | ||
1343 | qgroup->lim_flags |= limit->flags; | ||
1344 | |||
1345 | spin_unlock(&fs_info->qgroup_lock); | ||
1346 | |||
1347 | ret = update_qgroup_limit_item(trans, quota_root, qgroup); | ||
1191 | if (ret) { | 1348 | if (ret) { |
1192 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | 1349 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
1193 | btrfs_info(fs_info, "unable to update quota limit for %llu", | 1350 | btrfs_info(fs_info, "unable to update quota limit for %llu", |
1194 | qgroupid); | 1351 | qgroupid); |
1195 | } | 1352 | } |
1196 | 1353 | ||
1197 | spin_lock(&fs_info->qgroup_lock); | ||
1198 | qgroup->lim_flags = limit->flags; | ||
1199 | qgroup->max_rfer = limit->max_rfer; | ||
1200 | qgroup->max_excl = limit->max_excl; | ||
1201 | qgroup->rsv_rfer = limit->rsv_rfer; | ||
1202 | qgroup->rsv_excl = limit->rsv_excl; | ||
1203 | spin_unlock(&fs_info->qgroup_lock); | ||
1204 | out: | 1354 | out: |
1205 | mutex_unlock(&fs_info->qgroup_ioctl_lock); | 1355 | mutex_unlock(&fs_info->qgroup_ioctl_lock); |
1206 | return ret; | 1356 | return ret; |
@@ -1256,14 +1406,14 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1, | |||
1256 | return -1; | 1406 | return -1; |
1257 | if (oper1->bytenr > oper2->bytenr) | 1407 | if (oper1->bytenr > oper2->bytenr) |
1258 | return 1; | 1408 | return 1; |
1259 | if (oper1->seq < oper2->seq) | ||
1260 | return -1; | ||
1261 | if (oper1->seq > oper2->seq) | ||
1262 | return 1; | ||
1263 | if (oper1->ref_root < oper2->ref_root) | 1409 | if (oper1->ref_root < oper2->ref_root) |
1264 | return -1; | 1410 | return -1; |
1265 | if (oper1->ref_root > oper2->ref_root) | 1411 | if (oper1->ref_root > oper2->ref_root) |
1266 | return 1; | 1412 | return 1; |
1413 | if (oper1->seq < oper2->seq) | ||
1414 | return -1; | ||
1415 | if (oper1->seq > oper2->seq) | ||
1416 | return 1; | ||
1267 | if (oper1->type < oper2->type) | 1417 | if (oper1->type < oper2->type) |
1268 | return -1; | 1418 | return -1; |
1269 | if (oper1->type > oper2->type) | 1419 | if (oper1->type > oper2->type) |
@@ -1372,19 +1522,10 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | |||
1372 | return 0; | 1522 | return 0; |
1373 | } | 1523 | } |
1374 | 1524 | ||
1375 | /* | ||
1376 | * The easy accounting, if we are adding/removing the only ref for an extent | ||
1377 | * then this qgroup and all of the parent qgroups get their refrence and | ||
1378 | * exclusive counts adjusted. | ||
1379 | */ | ||
1380 | static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, | 1525 | static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, |
1381 | struct btrfs_qgroup_operation *oper) | 1526 | struct btrfs_qgroup_operation *oper) |
1382 | { | 1527 | { |
1383 | struct btrfs_qgroup *qgroup; | ||
1384 | struct ulist *tmp; | 1528 | struct ulist *tmp; |
1385 | struct btrfs_qgroup_list *glist; | ||
1386 | struct ulist_node *unode; | ||
1387 | struct ulist_iterator uiter; | ||
1388 | int sign = 0; | 1529 | int sign = 0; |
1389 | int ret = 0; | 1530 | int ret = 0; |
1390 | 1531 | ||
@@ -1395,9 +1536,7 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, | |||
1395 | spin_lock(&fs_info->qgroup_lock); | 1536 | spin_lock(&fs_info->qgroup_lock); |
1396 | if (!fs_info->quota_root) | 1537 | if (!fs_info->quota_root) |
1397 | goto out; | 1538 | goto out; |
1398 | qgroup = find_qgroup_rb(fs_info, oper->ref_root); | 1539 | |
1399 | if (!qgroup) | ||
1400 | goto out; | ||
1401 | switch (oper->type) { | 1540 | switch (oper->type) { |
1402 | case BTRFS_QGROUP_OPER_ADD_EXCL: | 1541 | case BTRFS_QGROUP_OPER_ADD_EXCL: |
1403 | sign = 1; | 1542 | sign = 1; |
@@ -1408,43 +1547,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, | |||
1408 | default: | 1547 | default: |
1409 | ASSERT(0); | 1548 | ASSERT(0); |
1410 | } | 1549 | } |
1411 | qgroup->rfer += sign * oper->num_bytes; | 1550 | ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root, |
1412 | qgroup->rfer_cmpr += sign * oper->num_bytes; | 1551 | oper->num_bytes, sign); |
1413 | |||
1414 | WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); | ||
1415 | qgroup->excl += sign * oper->num_bytes; | ||
1416 | qgroup->excl_cmpr += sign * oper->num_bytes; | ||
1417 | |||
1418 | qgroup_dirty(fs_info, qgroup); | ||
1419 | |||
1420 | /* Get all of the parent groups that contain this qgroup */ | ||
1421 | list_for_each_entry(glist, &qgroup->groups, next_group) { | ||
1422 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
1423 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
1424 | if (ret < 0) | ||
1425 | goto out; | ||
1426 | } | ||
1427 | |||
1428 | /* Iterate all of the parents and adjust their reference counts */ | ||
1429 | ULIST_ITER_INIT(&uiter); | ||
1430 | while ((unode = ulist_next(tmp, &uiter))) { | ||
1431 | qgroup = u64_to_ptr(unode->aux); | ||
1432 | qgroup->rfer += sign * oper->num_bytes; | ||
1433 | qgroup->rfer_cmpr += sign * oper->num_bytes; | ||
1434 | WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); | ||
1435 | qgroup->excl += sign * oper->num_bytes; | ||
1436 | qgroup->excl_cmpr += sign * oper->num_bytes; | ||
1437 | qgroup_dirty(fs_info, qgroup); | ||
1438 | |||
1439 | /* Add any parents of the parents */ | ||
1440 | list_for_each_entry(glist, &qgroup->groups, next_group) { | ||
1441 | ret = ulist_add(tmp, glist->group->qgroupid, | ||
1442 | ptr_to_u64(glist->group), GFP_ATOMIC); | ||
1443 | if (ret < 0) | ||
1444 | goto out; | ||
1445 | } | ||
1446 | } | ||
1447 | ret = 0; | ||
1448 | out: | 1552 | out: |
1449 | spin_unlock(&fs_info->qgroup_lock); | 1553 | spin_unlock(&fs_info->qgroup_lock); |
1450 | ulist_free(tmp); | 1554 | ulist_free(tmp); |
@@ -1845,7 +1949,7 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, | |||
1845 | struct ulist *roots = NULL; | 1949 | struct ulist *roots = NULL; |
1846 | struct ulist *qgroups, *tmp; | 1950 | struct ulist *qgroups, *tmp; |
1847 | struct btrfs_qgroup *qgroup; | 1951 | struct btrfs_qgroup *qgroup; |
1848 | struct seq_list elem = {}; | 1952 | struct seq_list elem = SEQ_LIST_INIT(elem); |
1849 | u64 seq; | 1953 | u64 seq; |
1850 | int old_roots = 0; | 1954 | int old_roots = 0; |
1851 | int new_roots = 0; | 1955 | int new_roots = 0; |
@@ -1967,7 +2071,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, | |||
1967 | int err; | 2071 | int err; |
1968 | struct btrfs_qgroup *qg; | 2072 | struct btrfs_qgroup *qg; |
1969 | u64 root_obj = 0; | 2073 | u64 root_obj = 0; |
1970 | struct seq_list elem = {}; | 2074 | struct seq_list elem = SEQ_LIST_INIT(elem); |
1971 | 2075 | ||
1972 | parents = ulist_alloc(GFP_NOFS); | 2076 | parents = ulist_alloc(GFP_NOFS); |
1973 | if (!parents) | 2077 | if (!parents) |
@@ -2156,6 +2260,10 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | |||
2156 | if (ret) | 2260 | if (ret) |
2157 | fs_info->qgroup_flags |= | 2261 | fs_info->qgroup_flags |= |
2158 | BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | 2262 | BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
2263 | ret = update_qgroup_limit_item(trans, quota_root, qgroup); | ||
2264 | if (ret) | ||
2265 | fs_info->qgroup_flags |= | ||
2266 | BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
2159 | spin_lock(&fs_info->qgroup_lock); | 2267 | spin_lock(&fs_info->qgroup_lock); |
2160 | } | 2268 | } |
2161 | if (fs_info->quota_enabled) | 2269 | if (fs_info->quota_enabled) |
@@ -2219,6 +2327,11 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | |||
2219 | ret = -EINVAL; | 2327 | ret = -EINVAL; |
2220 | goto out; | 2328 | goto out; |
2221 | } | 2329 | } |
2330 | |||
2331 | if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) { | ||
2332 | ret = -EINVAL; | ||
2333 | goto out; | ||
2334 | } | ||
2222 | ++i_qgroups; | 2335 | ++i_qgroups; |
2223 | } | 2336 | } |
2224 | } | 2337 | } |
@@ -2230,17 +2343,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | |||
2230 | if (ret) | 2343 | if (ret) |
2231 | goto out; | 2344 | goto out; |
2232 | 2345 | ||
2233 | if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { | ||
2234 | ret = update_qgroup_limit_item(trans, quota_root, objectid, | ||
2235 | inherit->lim.flags, | ||
2236 | inherit->lim.max_rfer, | ||
2237 | inherit->lim.max_excl, | ||
2238 | inherit->lim.rsv_rfer, | ||
2239 | inherit->lim.rsv_excl); | ||
2240 | if (ret) | ||
2241 | goto out; | ||
2242 | } | ||
2243 | |||
2244 | if (srcid) { | 2346 | if (srcid) { |
2245 | struct btrfs_root *srcroot; | 2347 | struct btrfs_root *srcroot; |
2246 | struct btrfs_key srckey; | 2348 | struct btrfs_key srckey; |
@@ -2286,6 +2388,22 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | |||
2286 | goto unlock; | 2388 | goto unlock; |
2287 | } | 2389 | } |
2288 | 2390 | ||
2391 | if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { | ||
2392 | dstgroup->lim_flags = inherit->lim.flags; | ||
2393 | dstgroup->max_rfer = inherit->lim.max_rfer; | ||
2394 | dstgroup->max_excl = inherit->lim.max_excl; | ||
2395 | dstgroup->rsv_rfer = inherit->lim.rsv_rfer; | ||
2396 | dstgroup->rsv_excl = inherit->lim.rsv_excl; | ||
2397 | |||
2398 | ret = update_qgroup_limit_item(trans, quota_root, dstgroup); | ||
2399 | if (ret) { | ||
2400 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | ||
2401 | btrfs_info(fs_info, "unable to update quota limit for %llu", | ||
2402 | dstgroup->qgroupid); | ||
2403 | goto unlock; | ||
2404 | } | ||
2405 | } | ||
2406 | |||
2289 | if (srcid) { | 2407 | if (srcid) { |
2290 | srcgroup = find_qgroup_rb(fs_info, srcid); | 2408 | srcgroup = find_qgroup_rb(fs_info, srcid); |
2291 | if (!srcgroup) | 2409 | if (!srcgroup) |
@@ -2302,6 +2420,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | |||
2302 | dstgroup->excl_cmpr = level_size; | 2420 | dstgroup->excl_cmpr = level_size; |
2303 | srcgroup->excl = level_size; | 2421 | srcgroup->excl = level_size; |
2304 | srcgroup->excl_cmpr = level_size; | 2422 | srcgroup->excl_cmpr = level_size; |
2423 | |||
2424 | /* inherit the limit info */ | ||
2425 | dstgroup->lim_flags = srcgroup->lim_flags; | ||
2426 | dstgroup->max_rfer = srcgroup->max_rfer; | ||
2427 | dstgroup->max_excl = srcgroup->max_excl; | ||
2428 | dstgroup->rsv_rfer = srcgroup->rsv_rfer; | ||
2429 | dstgroup->rsv_excl = srcgroup->rsv_excl; | ||
2430 | |||
2305 | qgroup_dirty(fs_info, dstgroup); | 2431 | qgroup_dirty(fs_info, dstgroup); |
2306 | qgroup_dirty(fs_info, srcgroup); | 2432 | qgroup_dirty(fs_info, srcgroup); |
2307 | } | 2433 | } |
@@ -2358,12 +2484,6 @@ out: | |||
2358 | return ret; | 2484 | return ret; |
2359 | } | 2485 | } |
2360 | 2486 | ||
2361 | /* | ||
2362 | * reserve some space for a qgroup and all its parents. The reservation takes | ||
2363 | * place with start_transaction or dealloc_reserve, similar to ENOSPC | ||
2364 | * accounting. If not enough space is available, EDQUOT is returned. | ||
2365 | * We assume that the requested space is new for all qgroups. | ||
2366 | */ | ||
2367 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) | 2487 | int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) |
2368 | { | 2488 | { |
2369 | struct btrfs_root *quota_root; | 2489 | struct btrfs_root *quota_root; |
@@ -2513,7 +2633,7 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) | |||
2513 | 2633 | ||
2514 | /* | 2634 | /* |
2515 | * returns < 0 on error, 0 when more leafs are to be scanned. | 2635 | * returns < 0 on error, 0 when more leafs are to be scanned. |
2516 | * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. | 2636 | * returns 1 when done. |
2517 | */ | 2637 | */ |
2518 | static int | 2638 | static int |
2519 | qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | 2639 | qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, |
@@ -2522,7 +2642,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | |||
2522 | { | 2642 | { |
2523 | struct btrfs_key found; | 2643 | struct btrfs_key found; |
2524 | struct ulist *roots = NULL; | 2644 | struct ulist *roots = NULL; |
2525 | struct seq_list tree_mod_seq_elem = {}; | 2645 | struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); |
2526 | u64 num_bytes; | 2646 | u64 num_bytes; |
2527 | u64 seq; | 2647 | u64 seq; |
2528 | int new_roots; | 2648 | int new_roots; |
@@ -2618,6 +2738,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) | |||
2618 | struct ulist *tmp = NULL, *qgroups = NULL; | 2738 | struct ulist *tmp = NULL, *qgroups = NULL; |
2619 | struct extent_buffer *scratch_leaf = NULL; | 2739 | struct extent_buffer *scratch_leaf = NULL; |
2620 | int err = -ENOMEM; | 2740 | int err = -ENOMEM; |
2741 | int ret = 0; | ||
2621 | 2742 | ||
2622 | path = btrfs_alloc_path(); | 2743 | path = btrfs_alloc_path(); |
2623 | if (!path) | 2744 | if (!path) |
@@ -2660,7 +2781,7 @@ out: | |||
2660 | mutex_lock(&fs_info->qgroup_rescan_lock); | 2781 | mutex_lock(&fs_info->qgroup_rescan_lock); |
2661 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; | 2782 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; |
2662 | 2783 | ||
2663 | if (err == 2 && | 2784 | if (err > 0 && |
2664 | fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { | 2785 | fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { |
2665 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; | 2786 | fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; |
2666 | } else if (err < 0) { | 2787 | } else if (err < 0) { |
@@ -2668,13 +2789,33 @@ out: | |||
2668 | } | 2789 | } |
2669 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 2790 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
2670 | 2791 | ||
2792 | /* | ||
2793 | * only update status, since the previous part has alreay updated the | ||
2794 | * qgroup info. | ||
2795 | */ | ||
2796 | trans = btrfs_start_transaction(fs_info->quota_root, 1); | ||
2797 | if (IS_ERR(trans)) { | ||
2798 | err = PTR_ERR(trans); | ||
2799 | btrfs_err(fs_info, | ||
2800 | "fail to start transaction for status update: %d\n", | ||
2801 | err); | ||
2802 | goto done; | ||
2803 | } | ||
2804 | ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); | ||
2805 | if (ret < 0) { | ||
2806 | err = ret; | ||
2807 | btrfs_err(fs_info, "fail to update qgroup status: %d\n", err); | ||
2808 | } | ||
2809 | btrfs_end_transaction(trans, fs_info->quota_root); | ||
2810 | |||
2671 | if (err >= 0) { | 2811 | if (err >= 0) { |
2672 | btrfs_info(fs_info, "qgroup scan completed%s", | 2812 | btrfs_info(fs_info, "qgroup scan completed%s", |
2673 | err == 2 ? " (inconsistency flag cleared)" : ""); | 2813 | err > 0 ? " (inconsistency flag cleared)" : ""); |
2674 | } else { | 2814 | } else { |
2675 | btrfs_err(fs_info, "qgroup scan failed with %d", err); | 2815 | btrfs_err(fs_info, "qgroup scan failed with %d", err); |
2676 | } | 2816 | } |
2677 | 2817 | ||
2818 | done: | ||
2678 | complete_all(&fs_info->qgroup_rescan_completion); | 2819 | complete_all(&fs_info->qgroup_rescan_completion); |
2679 | } | 2820 | } |
2680 | 2821 | ||
@@ -2709,7 +2850,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, | |||
2709 | mutex_unlock(&fs_info->qgroup_rescan_lock); | 2850 | mutex_unlock(&fs_info->qgroup_rescan_lock); |
2710 | goto err; | 2851 | goto err; |
2711 | } | 2852 | } |
2712 | |||
2713 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; | 2853 | fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; |
2714 | } | 2854 | } |
2715 | 2855 | ||
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 18cc68ca3090..c5242aa9a4b2 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h | |||
@@ -70,8 +70,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, | |||
70 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, | 70 | int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, |
71 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); | 71 | struct btrfs_fs_info *fs_info, u64 src, u64 dst); |
72 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, | 72 | int btrfs_create_qgroup(struct btrfs_trans_handle *trans, |
73 | struct btrfs_fs_info *fs_info, u64 qgroupid, | 73 | struct btrfs_fs_info *fs_info, u64 qgroupid); |
74 | char *name); | ||
75 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | 74 | int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, |
76 | struct btrfs_fs_info *fs_info, u64 qgroupid); | 75 | struct btrfs_fs_info *fs_info, u64 qgroupid); |
77 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | 76 | int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 5264858ed768..fa72068bd256 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -237,12 +237,8 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) | |||
237 | } | 237 | } |
238 | 238 | ||
239 | x = cmpxchg(&info->stripe_hash_table, NULL, table); | 239 | x = cmpxchg(&info->stripe_hash_table, NULL, table); |
240 | if (x) { | 240 | if (x) |
241 | if (is_vmalloc_addr(x)) | 241 | kvfree(x); |
242 | vfree(x); | ||
243 | else | ||
244 | kfree(x); | ||
245 | } | ||
246 | return 0; | 242 | return 0; |
247 | } | 243 | } |
248 | 244 | ||
@@ -453,10 +449,7 @@ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info) | |||
453 | if (!info->stripe_hash_table) | 449 | if (!info->stripe_hash_table) |
454 | return; | 450 | return; |
455 | btrfs_clear_rbio_cache(info); | 451 | btrfs_clear_rbio_cache(info); |
456 | if (is_vmalloc_addr(info->stripe_hash_table)) | 452 | kvfree(info->stripe_hash_table); |
457 | vfree(info->stripe_hash_table); | ||
458 | else | ||
459 | kfree(info->stripe_hash_table); | ||
460 | info->stripe_hash_table = NULL; | 453 | info->stripe_hash_table = NULL; |
461 | } | 454 | } |
462 | 455 | ||
@@ -1807,8 +1800,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1807 | int err; | 1800 | int err; |
1808 | int i; | 1801 | int i; |
1809 | 1802 | ||
1810 | pointers = kzalloc(rbio->real_stripes * sizeof(void *), | 1803 | pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); |
1811 | GFP_NOFS); | ||
1812 | if (!pointers) { | 1804 | if (!pointers) { |
1813 | err = -ENOMEM; | 1805 | err = -ENOMEM; |
1814 | goto cleanup_io; | 1806 | goto cleanup_io; |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d83085381bcc..74b24b01d574 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -3027,7 +3027,7 @@ int prealloc_file_extent_cluster(struct inode *inode, | |||
3027 | mutex_lock(&inode->i_mutex); | 3027 | mutex_lock(&inode->i_mutex); |
3028 | 3028 | ||
3029 | ret = btrfs_check_data_free_space(inode, cluster->end + | 3029 | ret = btrfs_check_data_free_space(inode, cluster->end + |
3030 | 1 - cluster->start); | 3030 | 1 - cluster->start, 0); |
3031 | if (ret) | 3031 | if (ret) |
3032 | goto out; | 3032 | goto out; |
3033 | 3033 | ||
@@ -3430,7 +3430,9 @@ static int block_use_full_backref(struct reloc_control *rc, | |||
3430 | } | 3430 | } |
3431 | 3431 | ||
3432 | static int delete_block_group_cache(struct btrfs_fs_info *fs_info, | 3432 | static int delete_block_group_cache(struct btrfs_fs_info *fs_info, |
3433 | struct inode *inode, u64 ino) | 3433 | struct btrfs_block_group_cache *block_group, |
3434 | struct inode *inode, | ||
3435 | u64 ino) | ||
3434 | { | 3436 | { |
3435 | struct btrfs_key key; | 3437 | struct btrfs_key key; |
3436 | struct btrfs_root *root = fs_info->tree_root; | 3438 | struct btrfs_root *root = fs_info->tree_root; |
@@ -3463,7 +3465,7 @@ truncate: | |||
3463 | goto out; | 3465 | goto out; |
3464 | } | 3466 | } |
3465 | 3467 | ||
3466 | ret = btrfs_truncate_free_space_cache(root, trans, inode); | 3468 | ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode); |
3467 | 3469 | ||
3468 | btrfs_end_transaction(trans, root); | 3470 | btrfs_end_transaction(trans, root); |
3469 | btrfs_btree_balance_dirty(root); | 3471 | btrfs_btree_balance_dirty(root); |
@@ -3509,6 +3511,7 @@ static int find_data_references(struct reloc_control *rc, | |||
3509 | */ | 3511 | */ |
3510 | if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { | 3512 | if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { |
3511 | ret = delete_block_group_cache(rc->extent_root->fs_info, | 3513 | ret = delete_block_group_cache(rc->extent_root->fs_info, |
3514 | rc->block_group, | ||
3512 | NULL, ref_objectid); | 3515 | NULL, ref_objectid); |
3513 | if (ret != -ENOENT) | 3516 | if (ret != -ENOENT) |
3514 | return ret; | 3517 | return ret; |
@@ -4223,7 +4226,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
4223 | btrfs_free_path(path); | 4226 | btrfs_free_path(path); |
4224 | 4227 | ||
4225 | if (!IS_ERR(inode)) | 4228 | if (!IS_ERR(inode)) |
4226 | ret = delete_block_group_cache(fs_info, inode, 0); | 4229 | ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0); |
4227 | else | 4230 | else |
4228 | ret = PTR_ERR(inode); | 4231 | ret = PTR_ERR(inode); |
4229 | 4232 | ||
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ec57687c9a4d..ab5811545a98 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -964,9 +964,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
964 | * the statistics. | 964 | * the statistics. |
965 | */ | 965 | */ |
966 | 966 | ||
967 | sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS * | 967 | sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS, |
968 | sizeof(*sblocks_for_recheck), | 968 | sizeof(*sblocks_for_recheck), GFP_NOFS); |
969 | GFP_NOFS); | ||
970 | if (!sblocks_for_recheck) { | 969 | if (!sblocks_for_recheck) { |
971 | spin_lock(&sctx->stat_lock); | 970 | spin_lock(&sctx->stat_lock); |
972 | sctx->stat.malloc_errors++; | 971 | sctx->stat.malloc_errors++; |
@@ -2319,7 +2318,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, | |||
2319 | unsigned long *bitmap, | 2318 | unsigned long *bitmap, |
2320 | u64 start, u64 len) | 2319 | u64 start, u64 len) |
2321 | { | 2320 | { |
2322 | int offset; | 2321 | u32 offset; |
2323 | int nsectors; | 2322 | int nsectors; |
2324 | int sectorsize = sparity->sctx->dev_root->sectorsize; | 2323 | int sectorsize = sparity->sctx->dev_root->sectorsize; |
2325 | 2324 | ||
@@ -2329,7 +2328,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, | |||
2329 | } | 2328 | } |
2330 | 2329 | ||
2331 | start -= sparity->logic_start; | 2330 | start -= sparity->logic_start; |
2332 | offset = (int)do_div(start, sparity->stripe_len); | 2331 | start = div_u64_rem(start, sparity->stripe_len, &offset); |
2333 | offset /= sectorsize; | 2332 | offset /= sectorsize; |
2334 | nsectors = (int)len / sectorsize; | 2333 | nsectors = (int)len / sectorsize; |
2335 | 2334 | ||
@@ -2612,8 +2611,8 @@ static int get_raid56_logic_offset(u64 physical, int num, | |||
2612 | int j = 0; | 2611 | int j = 0; |
2613 | u64 stripe_nr; | 2612 | u64 stripe_nr; |
2614 | u64 last_offset; | 2613 | u64 last_offset; |
2615 | int stripe_index; | 2614 | u32 stripe_index; |
2616 | int rot; | 2615 | u32 rot; |
2617 | 2616 | ||
2618 | last_offset = (physical - map->stripes[num].physical) * | 2617 | last_offset = (physical - map->stripes[num].physical) * |
2619 | nr_data_stripes(map); | 2618 | nr_data_stripes(map); |
@@ -2624,12 +2623,11 @@ static int get_raid56_logic_offset(u64 physical, int num, | |||
2624 | for (i = 0; i < nr_data_stripes(map); i++) { | 2623 | for (i = 0; i < nr_data_stripes(map); i++) { |
2625 | *offset = last_offset + i * map->stripe_len; | 2624 | *offset = last_offset + i * map->stripe_len; |
2626 | 2625 | ||
2627 | stripe_nr = *offset; | 2626 | stripe_nr = div_u64(*offset, map->stripe_len); |
2628 | do_div(stripe_nr, map->stripe_len); | 2627 | stripe_nr = div_u64(stripe_nr, nr_data_stripes(map)); |
2629 | do_div(stripe_nr, nr_data_stripes(map)); | ||
2630 | 2628 | ||
2631 | /* Work out the disk rotation on this stripe-set */ | 2629 | /* Work out the disk rotation on this stripe-set */ |
2632 | rot = do_div(stripe_nr, map->num_stripes); | 2630 | stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot); |
2633 | /* calculate which stripe this data locates */ | 2631 | /* calculate which stripe this data locates */ |
2634 | rot += i; | 2632 | rot += i; |
2635 | stripe_index = rot % map->num_stripes; | 2633 | stripe_index = rot % map->num_stripes; |
@@ -2995,10 +2993,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2995 | int extent_mirror_num; | 2993 | int extent_mirror_num; |
2996 | int stop_loop = 0; | 2994 | int stop_loop = 0; |
2997 | 2995 | ||
2998 | nstripes = length; | ||
2999 | physical = map->stripes[num].physical; | 2996 | physical = map->stripes[num].physical; |
3000 | offset = 0; | 2997 | offset = 0; |
3001 | do_div(nstripes, map->stripe_len); | 2998 | nstripes = div_u64(length, map->stripe_len); |
3002 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | 2999 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { |
3003 | offset = map->stripe_len * num; | 3000 | offset = map->stripe_len * num; |
3004 | increment = map->stripe_len * map->num_stripes; | 3001 | increment = map->stripe_len * map->num_stripes; |
@@ -3563,7 +3560,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, | |||
3563 | int is_dev_replace) | 3560 | int is_dev_replace) |
3564 | { | 3561 | { |
3565 | int ret = 0; | 3562 | int ret = 0; |
3566 | int flags = WQ_FREEZABLE | WQ_UNBOUND; | 3563 | unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; |
3567 | int max_active = fs_info->thread_pool_size; | 3564 | int max_active = fs_info->thread_pool_size; |
3568 | 3565 | ||
3569 | if (fs_info->scrub_workers_refcnt == 0) { | 3566 | if (fs_info->scrub_workers_refcnt == 0) { |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index d6033f540cc7..a1216f9b4917 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -3067,48 +3067,6 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, | |||
3067 | return NULL; | 3067 | return NULL; |
3068 | } | 3068 | } |
3069 | 3069 | ||
3070 | static int path_loop(struct send_ctx *sctx, struct fs_path *name, | ||
3071 | u64 ino, u64 gen, u64 *ancestor_ino) | ||
3072 | { | ||
3073 | int ret = 0; | ||
3074 | u64 parent_inode = 0; | ||
3075 | u64 parent_gen = 0; | ||
3076 | u64 start_ino = ino; | ||
3077 | |||
3078 | *ancestor_ino = 0; | ||
3079 | while (ino != BTRFS_FIRST_FREE_OBJECTID) { | ||
3080 | fs_path_reset(name); | ||
3081 | |||
3082 | if (is_waiting_for_rm(sctx, ino)) | ||
3083 | break; | ||
3084 | if (is_waiting_for_move(sctx, ino)) { | ||
3085 | if (*ancestor_ino == 0) | ||
3086 | *ancestor_ino = ino; | ||
3087 | ret = get_first_ref(sctx->parent_root, ino, | ||
3088 | &parent_inode, &parent_gen, name); | ||
3089 | } else { | ||
3090 | ret = __get_cur_name_and_parent(sctx, ino, gen, | ||
3091 | &parent_inode, | ||
3092 | &parent_gen, name); | ||
3093 | if (ret > 0) { | ||
3094 | ret = 0; | ||
3095 | break; | ||
3096 | } | ||
3097 | } | ||
3098 | if (ret < 0) | ||
3099 | break; | ||
3100 | if (parent_inode == start_ino) { | ||
3101 | ret = 1; | ||
3102 | if (*ancestor_ino == 0) | ||
3103 | *ancestor_ino = ino; | ||
3104 | break; | ||
3105 | } | ||
3106 | ino = parent_inode; | ||
3107 | gen = parent_gen; | ||
3108 | } | ||
3109 | return ret; | ||
3110 | } | ||
3111 | |||
3112 | static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | 3070 | static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) |
3113 | { | 3071 | { |
3114 | struct fs_path *from_path = NULL; | 3072 | struct fs_path *from_path = NULL; |
@@ -3120,7 +3078,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3120 | struct waiting_dir_move *dm = NULL; | 3078 | struct waiting_dir_move *dm = NULL; |
3121 | u64 rmdir_ino = 0; | 3079 | u64 rmdir_ino = 0; |
3122 | int ret; | 3080 | int ret; |
3123 | u64 ancestor = 0; | ||
3124 | 3081 | ||
3125 | name = fs_path_alloc(); | 3082 | name = fs_path_alloc(); |
3126 | from_path = fs_path_alloc(); | 3083 | from_path = fs_path_alloc(); |
@@ -3152,22 +3109,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3152 | goto out; | 3109 | goto out; |
3153 | 3110 | ||
3154 | sctx->send_progress = sctx->cur_ino + 1; | 3111 | sctx->send_progress = sctx->cur_ino + 1; |
3155 | ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor); | ||
3156 | if (ret) { | ||
3157 | LIST_HEAD(deleted_refs); | ||
3158 | ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); | ||
3159 | ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, | ||
3160 | &pm->update_refs, &deleted_refs, | ||
3161 | pm->is_orphan); | ||
3162 | if (ret < 0) | ||
3163 | goto out; | ||
3164 | if (rmdir_ino) { | ||
3165 | dm = get_waiting_dir_move(sctx, pm->ino); | ||
3166 | ASSERT(dm); | ||
3167 | dm->rmdir_ino = rmdir_ino; | ||
3168 | } | ||
3169 | goto out; | ||
3170 | } | ||
3171 | fs_path_reset(name); | 3112 | fs_path_reset(name); |
3172 | to_path = name; | 3113 | to_path = name; |
3173 | name = NULL; | 3114 | name = NULL; |
@@ -3610,10 +3551,27 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3610 | if (ret < 0) | 3551 | if (ret < 0) |
3611 | goto out; | 3552 | goto out; |
3612 | if (ret) { | 3553 | if (ret) { |
3554 | struct name_cache_entry *nce; | ||
3555 | |||
3613 | ret = orphanize_inode(sctx, ow_inode, ow_gen, | 3556 | ret = orphanize_inode(sctx, ow_inode, ow_gen, |
3614 | cur->full_path); | 3557 | cur->full_path); |
3615 | if (ret < 0) | 3558 | if (ret < 0) |
3616 | goto out; | 3559 | goto out; |
3560 | /* | ||
3561 | * Make sure we clear our orphanized inode's | ||
3562 | * name from the name cache. This is because the | ||
3563 | * inode ow_inode might be an ancestor of some | ||
3564 | * other inode that will be orphanized as well | ||
3565 | * later and has an inode number greater than | ||
3566 | * sctx->send_progress. We need to prevent | ||
3567 | * future name lookups from using the old name | ||
3568 | * and get instead the orphan name. | ||
3569 | */ | ||
3570 | nce = name_cache_search(sctx, ow_inode, ow_gen); | ||
3571 | if (nce) { | ||
3572 | name_cache_delete(sctx, nce); | ||
3573 | kfree(nce); | ||
3574 | } | ||
3617 | } else { | 3575 | } else { |
3618 | ret = send_unlink(sctx, cur->full_path); | 3576 | ret = send_unlink(sctx, cur->full_path); |
3619 | if (ret < 0) | 3577 | if (ret < 0) |
@@ -5852,19 +5810,20 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5852 | ret = PTR_ERR(clone_root); | 5810 | ret = PTR_ERR(clone_root); |
5853 | goto out; | 5811 | goto out; |
5854 | } | 5812 | } |
5855 | clone_sources_to_rollback = i + 1; | ||
5856 | spin_lock(&clone_root->root_item_lock); | 5813 | spin_lock(&clone_root->root_item_lock); |
5857 | clone_root->send_in_progress++; | 5814 | if (!btrfs_root_readonly(clone_root) || |
5858 | if (!btrfs_root_readonly(clone_root)) { | 5815 | btrfs_root_dead(clone_root)) { |
5859 | spin_unlock(&clone_root->root_item_lock); | 5816 | spin_unlock(&clone_root->root_item_lock); |
5860 | srcu_read_unlock(&fs_info->subvol_srcu, index); | 5817 | srcu_read_unlock(&fs_info->subvol_srcu, index); |
5861 | ret = -EPERM; | 5818 | ret = -EPERM; |
5862 | goto out; | 5819 | goto out; |
5863 | } | 5820 | } |
5821 | clone_root->send_in_progress++; | ||
5864 | spin_unlock(&clone_root->root_item_lock); | 5822 | spin_unlock(&clone_root->root_item_lock); |
5865 | srcu_read_unlock(&fs_info->subvol_srcu, index); | 5823 | srcu_read_unlock(&fs_info->subvol_srcu, index); |
5866 | 5824 | ||
5867 | sctx->clone_roots[i].root = clone_root; | 5825 | sctx->clone_roots[i].root = clone_root; |
5826 | clone_sources_to_rollback = i + 1; | ||
5868 | } | 5827 | } |
5869 | vfree(clone_sources_tmp); | 5828 | vfree(clone_sources_tmp); |
5870 | clone_sources_tmp = NULL; | 5829 | clone_sources_tmp = NULL; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 05fef198ff94..f2c9f9db3b19 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -901,6 +901,15 @@ find_root: | |||
901 | if (IS_ERR(new_root)) | 901 | if (IS_ERR(new_root)) |
902 | return ERR_CAST(new_root); | 902 | return ERR_CAST(new_root); |
903 | 903 | ||
904 | if (!(sb->s_flags & MS_RDONLY)) { | ||
905 | int ret; | ||
906 | down_read(&fs_info->cleanup_work_sem); | ||
907 | ret = btrfs_orphan_cleanup(new_root); | ||
908 | up_read(&fs_info->cleanup_work_sem); | ||
909 | if (ret) | ||
910 | return ERR_PTR(ret); | ||
911 | } | ||
912 | |||
904 | dir_id = btrfs_root_dirid(&new_root->root_item); | 913 | dir_id = btrfs_root_dirid(&new_root->root_item); |
905 | setup_root: | 914 | setup_root: |
906 | location.objectid = dir_id; | 915 | location.objectid = dir_id; |
@@ -1714,7 +1723,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
1714 | avail_space = device->total_bytes - device->bytes_used; | 1723 | avail_space = device->total_bytes - device->bytes_used; |
1715 | 1724 | ||
1716 | /* align with stripe_len */ | 1725 | /* align with stripe_len */ |
1717 | do_div(avail_space, BTRFS_STRIPE_LEN); | 1726 | avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN); |
1718 | avail_space *= BTRFS_STRIPE_LEN; | 1727 | avail_space *= BTRFS_STRIPE_LEN; |
1719 | 1728 | ||
1720 | /* | 1729 | /* |
@@ -1908,6 +1917,17 @@ static struct file_system_type btrfs_fs_type = { | |||
1908 | }; | 1917 | }; |
1909 | MODULE_ALIAS_FS("btrfs"); | 1918 | MODULE_ALIAS_FS("btrfs"); |
1910 | 1919 | ||
1920 | static int btrfs_control_open(struct inode *inode, struct file *file) | ||
1921 | { | ||
1922 | /* | ||
1923 | * The control file's private_data is used to hold the | ||
1924 | * transaction when it is started and is used to keep | ||
1925 | * track of whether a transaction is already in progress. | ||
1926 | */ | ||
1927 | file->private_data = NULL; | ||
1928 | return 0; | ||
1929 | } | ||
1930 | |||
1911 | /* | 1931 | /* |
1912 | * used by btrfsctl to scan devices when no FS is mounted | 1932 | * used by btrfsctl to scan devices when no FS is mounted |
1913 | */ | 1933 | */ |
@@ -2009,6 +2029,7 @@ static const struct super_operations btrfs_super_ops = { | |||
2009 | }; | 2029 | }; |
2010 | 2030 | ||
2011 | static const struct file_operations btrfs_ctl_fops = { | 2031 | static const struct file_operations btrfs_ctl_fops = { |
2032 | .open = btrfs_control_open, | ||
2012 | .unlocked_ioctl = btrfs_control_ioctl, | 2033 | .unlocked_ioctl = btrfs_control_ioctl, |
2013 | .compat_ioctl = btrfs_control_ioctl, | 2034 | .compat_ioctl = btrfs_control_ioctl, |
2014 | .owner = THIS_MODULE, | 2035 | .owner = THIS_MODULE, |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 94edb0a2a026..e8a4c86d274d 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -459,7 +459,7 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj) | |||
459 | static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13]; | 459 | static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13]; |
460 | static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS]; | 460 | static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS]; |
461 | 461 | ||
462 | static u64 supported_feature_masks[3] = { | 462 | static const u64 supported_feature_masks[3] = { |
463 | [FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP, | 463 | [FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP, |
464 | [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP, | 464 | [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP, |
465 | [FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP, | 465 | [FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP, |
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index f7dd298b3cf6..3a4bbed723fd 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h | |||
@@ -61,11 +61,23 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \ | |||
61 | BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) | 61 | BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) |
62 | 62 | ||
63 | /* convert from attribute */ | 63 | /* convert from attribute */ |
64 | #define to_btrfs_feature_attr(a) \ | 64 | static inline struct btrfs_feature_attr * |
65 | container_of(a, struct btrfs_feature_attr, kobj_attr) | 65 | to_btrfs_feature_attr(struct kobj_attribute *a) |
66 | #define attr_to_btrfs_attr(a) container_of(a, struct kobj_attribute, attr) | 66 | { |
67 | #define attr_to_btrfs_feature_attr(a) \ | 67 | return container_of(a, struct btrfs_feature_attr, kobj_attr); |
68 | to_btrfs_feature_attr(attr_to_btrfs_attr(a)) | 68 | } |
69 | |||
70 | static inline struct kobj_attribute *attr_to_btrfs_attr(struct attribute *attr) | ||
71 | { | ||
72 | return container_of(attr, struct kobj_attribute, attr); | ||
73 | } | ||
74 | |||
75 | static inline struct btrfs_feature_attr * | ||
76 | attr_to_btrfs_feature_attr(struct attribute *attr) | ||
77 | { | ||
78 | return to_btrfs_feature_attr(attr_to_btrfs_attr(attr)); | ||
79 | } | ||
80 | |||
69 | char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags); | 81 | char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags); |
70 | extern const char * const btrfs_feature_set_names[3]; | 82 | extern const char * const btrfs_feature_set_names[3]; |
71 | extern struct kobj_type space_info_ktype; | 83 | extern struct kobj_type space_info_ktype; |
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 73f299ebdabb..c32a7ba76bca 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c | |||
@@ -232,7 +232,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root) | |||
232 | init_dummy_trans(&trans); | 232 | init_dummy_trans(&trans); |
233 | 233 | ||
234 | test_msg("Qgroup basic add\n"); | 234 | test_msg("Qgroup basic add\n"); |
235 | ret = btrfs_create_qgroup(NULL, fs_info, 5, NULL); | 235 | ret = btrfs_create_qgroup(NULL, fs_info, 5); |
236 | if (ret) { | 236 | if (ret) { |
237 | test_msg("Couldn't create a qgroup %d\n", ret); | 237 | test_msg("Couldn't create a qgroup %d\n", ret); |
238 | return ret; | 238 | return ret; |
@@ -301,7 +301,7 @@ static int test_multiple_refs(struct btrfs_root *root) | |||
301 | test_msg("Qgroup multiple refs test\n"); | 301 | test_msg("Qgroup multiple refs test\n"); |
302 | 302 | ||
303 | /* We have 5 created already from the previous test */ | 303 | /* We have 5 created already from the previous test */ |
304 | ret = btrfs_create_qgroup(NULL, fs_info, 256, NULL); | 304 | ret = btrfs_create_qgroup(NULL, fs_info, 256); |
305 | if (ret) { | 305 | if (ret) { |
306 | test_msg("Couldn't create a qgroup %d\n", ret); | 306 | test_msg("Couldn't create a qgroup %d\n", ret); |
307 | return ret; | 307 | return ret; |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8be4278e25e8..5628e25250c0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -35,7 +35,7 @@ | |||
35 | 35 | ||
36 | #define BTRFS_ROOT_TRANS_TAG 0 | 36 | #define BTRFS_ROOT_TRANS_TAG 0 |
37 | 37 | ||
38 | static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { | 38 | static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { |
39 | [TRANS_STATE_RUNNING] = 0U, | 39 | [TRANS_STATE_RUNNING] = 0U, |
40 | [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE | | 40 | [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE | |
41 | __TRANS_START), | 41 | __TRANS_START), |
@@ -64,6 +64,9 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) | |||
64 | if (atomic_dec_and_test(&transaction->use_count)) { | 64 | if (atomic_dec_and_test(&transaction->use_count)) { |
65 | BUG_ON(!list_empty(&transaction->list)); | 65 | BUG_ON(!list_empty(&transaction->list)); |
66 | WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); | 66 | WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); |
67 | if (transaction->delayed_refs.pending_csums) | ||
68 | printk(KERN_ERR "pending csums is %llu\n", | ||
69 | transaction->delayed_refs.pending_csums); | ||
67 | while (!list_empty(&transaction->pending_chunks)) { | 70 | while (!list_empty(&transaction->pending_chunks)) { |
68 | struct extent_map *em; | 71 | struct extent_map *em; |
69 | 72 | ||
@@ -93,11 +96,8 @@ static void clear_btree_io_tree(struct extent_io_tree *tree) | |||
93 | */ | 96 | */ |
94 | ASSERT(!waitqueue_active(&state->wq)); | 97 | ASSERT(!waitqueue_active(&state->wq)); |
95 | free_extent_state(state); | 98 | free_extent_state(state); |
96 | if (need_resched()) { | 99 | |
97 | spin_unlock(&tree->lock); | 100 | cond_resched_lock(&tree->lock); |
98 | cond_resched(); | ||
99 | spin_lock(&tree->lock); | ||
100 | } | ||
101 | } | 101 | } |
102 | spin_unlock(&tree->lock); | 102 | spin_unlock(&tree->lock); |
103 | } | 103 | } |
@@ -222,10 +222,12 @@ loop: | |||
222 | atomic_set(&cur_trans->use_count, 2); | 222 | atomic_set(&cur_trans->use_count, 2); |
223 | cur_trans->have_free_bgs = 0; | 223 | cur_trans->have_free_bgs = 0; |
224 | cur_trans->start_time = get_seconds(); | 224 | cur_trans->start_time = get_seconds(); |
225 | cur_trans->dirty_bg_run = 0; | ||
225 | 226 | ||
226 | cur_trans->delayed_refs.href_root = RB_ROOT; | 227 | cur_trans->delayed_refs.href_root = RB_ROOT; |
227 | atomic_set(&cur_trans->delayed_refs.num_entries, 0); | 228 | atomic_set(&cur_trans->delayed_refs.num_entries, 0); |
228 | cur_trans->delayed_refs.num_heads_ready = 0; | 229 | cur_trans->delayed_refs.num_heads_ready = 0; |
230 | cur_trans->delayed_refs.pending_csums = 0; | ||
229 | cur_trans->delayed_refs.num_heads = 0; | 231 | cur_trans->delayed_refs.num_heads = 0; |
230 | cur_trans->delayed_refs.flushing = 0; | 232 | cur_trans->delayed_refs.flushing = 0; |
231 | cur_trans->delayed_refs.run_delayed_start = 0; | 233 | cur_trans->delayed_refs.run_delayed_start = 0; |
@@ -250,6 +252,9 @@ loop: | |||
250 | INIT_LIST_HEAD(&cur_trans->switch_commits); | 252 | INIT_LIST_HEAD(&cur_trans->switch_commits); |
251 | INIT_LIST_HEAD(&cur_trans->pending_ordered); | 253 | INIT_LIST_HEAD(&cur_trans->pending_ordered); |
252 | INIT_LIST_HEAD(&cur_trans->dirty_bgs); | 254 | INIT_LIST_HEAD(&cur_trans->dirty_bgs); |
255 | INIT_LIST_HEAD(&cur_trans->io_bgs); | ||
256 | mutex_init(&cur_trans->cache_write_mutex); | ||
257 | cur_trans->num_dirty_bgs = 0; | ||
253 | spin_lock_init(&cur_trans->dirty_bgs_lock); | 258 | spin_lock_init(&cur_trans->dirty_bgs_lock); |
254 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 259 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
255 | extent_io_tree_init(&cur_trans->dirty_pages, | 260 | extent_io_tree_init(&cur_trans->dirty_pages, |
@@ -721,7 +726,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
721 | updates = trans->delayed_ref_updates; | 726 | updates = trans->delayed_ref_updates; |
722 | trans->delayed_ref_updates = 0; | 727 | trans->delayed_ref_updates = 0; |
723 | if (updates) { | 728 | if (updates) { |
724 | err = btrfs_run_delayed_refs(trans, root, updates); | 729 | err = btrfs_run_delayed_refs(trans, root, updates * 2); |
725 | if (err) /* Error code will also eval true */ | 730 | if (err) /* Error code will also eval true */ |
726 | return err; | 731 | return err; |
727 | } | 732 | } |
@@ -1057,6 +1062,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
1057 | { | 1062 | { |
1058 | struct btrfs_fs_info *fs_info = root->fs_info; | 1063 | struct btrfs_fs_info *fs_info = root->fs_info; |
1059 | struct list_head *dirty_bgs = &trans->transaction->dirty_bgs; | 1064 | struct list_head *dirty_bgs = &trans->transaction->dirty_bgs; |
1065 | struct list_head *io_bgs = &trans->transaction->io_bgs; | ||
1060 | struct list_head *next; | 1066 | struct list_head *next; |
1061 | struct extent_buffer *eb; | 1067 | struct extent_buffer *eb; |
1062 | int ret; | 1068 | int ret; |
@@ -1110,7 +1116,7 @@ again: | |||
1110 | return ret; | 1116 | return ret; |
1111 | } | 1117 | } |
1112 | 1118 | ||
1113 | while (!list_empty(dirty_bgs)) { | 1119 | while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) { |
1114 | ret = btrfs_write_dirty_block_groups(trans, root); | 1120 | ret = btrfs_write_dirty_block_groups(trans, root); |
1115 | if (ret) | 1121 | if (ret) |
1116 | return ret; | 1122 | return ret; |
@@ -1810,6 +1816,37 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1810 | return ret; | 1816 | return ret; |
1811 | } | 1817 | } |
1812 | 1818 | ||
1819 | if (!cur_trans->dirty_bg_run) { | ||
1820 | int run_it = 0; | ||
1821 | |||
1822 | /* this mutex is also taken before trying to set | ||
1823 | * block groups readonly. We need to make sure | ||
1824 | * that nobody has set a block group readonly | ||
1825 | * after a extents from that block group have been | ||
1826 | * allocated for cache files. btrfs_set_block_group_ro | ||
1827 | * will wait for the transaction to commit if it | ||
1828 | * finds dirty_bg_run = 1 | ||
1829 | * | ||
1830 | * The dirty_bg_run flag is also used to make sure only | ||
1831 | * one process starts all the block group IO. It wouldn't | ||
1832 | * hurt to have more than one go through, but there's no | ||
1833 | * real advantage to it either. | ||
1834 | */ | ||
1835 | mutex_lock(&root->fs_info->ro_block_group_mutex); | ||
1836 | if (!cur_trans->dirty_bg_run) { | ||
1837 | run_it = 1; | ||
1838 | cur_trans->dirty_bg_run = 1; | ||
1839 | } | ||
1840 | mutex_unlock(&root->fs_info->ro_block_group_mutex); | ||
1841 | |||
1842 | if (run_it) | ||
1843 | ret = btrfs_start_dirty_block_groups(trans, root); | ||
1844 | } | ||
1845 | if (ret) { | ||
1846 | btrfs_end_transaction(trans, root); | ||
1847 | return ret; | ||
1848 | } | ||
1849 | |||
1813 | spin_lock(&root->fs_info->trans_lock); | 1850 | spin_lock(&root->fs_info->trans_lock); |
1814 | list_splice(&trans->ordered, &cur_trans->pending_ordered); | 1851 | list_splice(&trans->ordered, &cur_trans->pending_ordered); |
1815 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { | 1852 | if (cur_trans->state >= TRANS_STATE_COMMIT_START) { |
@@ -2003,6 +2040,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
2003 | 2040 | ||
2004 | assert_qgroups_uptodate(trans); | 2041 | assert_qgroups_uptodate(trans); |
2005 | ASSERT(list_empty(&cur_trans->dirty_bgs)); | 2042 | ASSERT(list_empty(&cur_trans->dirty_bgs)); |
2043 | ASSERT(list_empty(&cur_trans->io_bgs)); | ||
2006 | update_super_roots(root); | 2044 | update_super_roots(root); |
2007 | 2045 | ||
2008 | btrfs_set_super_log_root(root->fs_info->super_copy, 0); | 2046 | btrfs_set_super_log_root(root->fs_info->super_copy, 0); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 937050a2b68e..0b24755596ba 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -64,9 +64,19 @@ struct btrfs_transaction { | |||
64 | struct list_head pending_ordered; | 64 | struct list_head pending_ordered; |
65 | struct list_head switch_commits; | 65 | struct list_head switch_commits; |
66 | struct list_head dirty_bgs; | 66 | struct list_head dirty_bgs; |
67 | struct list_head io_bgs; | ||
68 | u64 num_dirty_bgs; | ||
69 | |||
70 | /* | ||
71 | * we need to make sure block group deletion doesn't race with | ||
72 | * free space cache writeout. This mutex keeps them from stomping | ||
73 | * on each other | ||
74 | */ | ||
75 | struct mutex cache_write_mutex; | ||
67 | spinlock_t dirty_bgs_lock; | 76 | spinlock_t dirty_bgs_lock; |
68 | struct btrfs_delayed_ref_root delayed_refs; | 77 | struct btrfs_delayed_ref_root delayed_refs; |
69 | int aborted; | 78 | int aborted; |
79 | int dirty_bg_run; | ||
70 | }; | 80 | }; |
71 | 81 | ||
72 | #define __TRANS_FREEZABLE (1U << 0) | 82 | #define __TRANS_FREEZABLE (1U << 0) |
@@ -136,9 +146,11 @@ struct btrfs_pending_snapshot { | |||
136 | static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | 146 | static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, |
137 | struct inode *inode) | 147 | struct inode *inode) |
138 | { | 148 | { |
149 | spin_lock(&BTRFS_I(inode)->lock); | ||
139 | BTRFS_I(inode)->last_trans = trans->transaction->transid; | 150 | BTRFS_I(inode)->last_trans = trans->transaction->transid; |
140 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | 151 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; |
141 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; | 152 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; |
153 | spin_unlock(&BTRFS_I(inode)->lock); | ||
142 | } | 154 | } |
143 | 155 | ||
144 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 156 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c5b8ba37f88e..a089b5944efc 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -492,11 +492,19 @@ insert: | |||
492 | 492 | ||
493 | if (btrfs_inode_generation(eb, src_item) == 0) { | 493 | if (btrfs_inode_generation(eb, src_item) == 0) { |
494 | struct extent_buffer *dst_eb = path->nodes[0]; | 494 | struct extent_buffer *dst_eb = path->nodes[0]; |
495 | const u64 ino_size = btrfs_inode_size(eb, src_item); | ||
495 | 496 | ||
497 | /* | ||
498 | * For regular files an ino_size == 0 is used only when | ||
499 | * logging that an inode exists, as part of a directory | ||
500 | * fsync, and the inode wasn't fsynced before. In this | ||
501 | * case don't set the size of the inode in the fs/subvol | ||
502 | * tree, otherwise we would be throwing valid data away. | ||
503 | */ | ||
496 | if (S_ISREG(btrfs_inode_mode(eb, src_item)) && | 504 | if (S_ISREG(btrfs_inode_mode(eb, src_item)) && |
497 | S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) { | 505 | S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) && |
506 | ino_size != 0) { | ||
498 | struct btrfs_map_token token; | 507 | struct btrfs_map_token token; |
499 | u64 ino_size = btrfs_inode_size(eb, src_item); | ||
500 | 508 | ||
501 | btrfs_init_map_token(&token); | 509 | btrfs_init_map_token(&token); |
502 | btrfs_set_token_inode_size(dst_eb, dst_item, | 510 | btrfs_set_token_inode_size(dst_eb, dst_item, |
@@ -1951,6 +1959,104 @@ out: | |||
1951 | return ret; | 1959 | return ret; |
1952 | } | 1960 | } |
1953 | 1961 | ||
1962 | static int replay_xattr_deletes(struct btrfs_trans_handle *trans, | ||
1963 | struct btrfs_root *root, | ||
1964 | struct btrfs_root *log, | ||
1965 | struct btrfs_path *path, | ||
1966 | const u64 ino) | ||
1967 | { | ||
1968 | struct btrfs_key search_key; | ||
1969 | struct btrfs_path *log_path; | ||
1970 | int i; | ||
1971 | int nritems; | ||
1972 | int ret; | ||
1973 | |||
1974 | log_path = btrfs_alloc_path(); | ||
1975 | if (!log_path) | ||
1976 | return -ENOMEM; | ||
1977 | |||
1978 | search_key.objectid = ino; | ||
1979 | search_key.type = BTRFS_XATTR_ITEM_KEY; | ||
1980 | search_key.offset = 0; | ||
1981 | again: | ||
1982 | ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); | ||
1983 | if (ret < 0) | ||
1984 | goto out; | ||
1985 | process_leaf: | ||
1986 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
1987 | for (i = path->slots[0]; i < nritems; i++) { | ||
1988 | struct btrfs_key key; | ||
1989 | struct btrfs_dir_item *di; | ||
1990 | struct btrfs_dir_item *log_di; | ||
1991 | u32 total_size; | ||
1992 | u32 cur; | ||
1993 | |||
1994 | btrfs_item_key_to_cpu(path->nodes[0], &key, i); | ||
1995 | if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) { | ||
1996 | ret = 0; | ||
1997 | goto out; | ||
1998 | } | ||
1999 | |||
2000 | di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item); | ||
2001 | total_size = btrfs_item_size_nr(path->nodes[0], i); | ||
2002 | cur = 0; | ||
2003 | while (cur < total_size) { | ||
2004 | u16 name_len = btrfs_dir_name_len(path->nodes[0], di); | ||
2005 | u16 data_len = btrfs_dir_data_len(path->nodes[0], di); | ||
2006 | u32 this_len = sizeof(*di) + name_len + data_len; | ||
2007 | char *name; | ||
2008 | |||
2009 | name = kmalloc(name_len, GFP_NOFS); | ||
2010 | if (!name) { | ||
2011 | ret = -ENOMEM; | ||
2012 | goto out; | ||
2013 | } | ||
2014 | read_extent_buffer(path->nodes[0], name, | ||
2015 | (unsigned long)(di + 1), name_len); | ||
2016 | |||
2017 | log_di = btrfs_lookup_xattr(NULL, log, log_path, ino, | ||
2018 | name, name_len, 0); | ||
2019 | btrfs_release_path(log_path); | ||
2020 | if (!log_di) { | ||
2021 | /* Doesn't exist in log tree, so delete it. */ | ||
2022 | btrfs_release_path(path); | ||
2023 | di = btrfs_lookup_xattr(trans, root, path, ino, | ||
2024 | name, name_len, -1); | ||
2025 | kfree(name); | ||
2026 | if (IS_ERR(di)) { | ||
2027 | ret = PTR_ERR(di); | ||
2028 | goto out; | ||
2029 | } | ||
2030 | ASSERT(di); | ||
2031 | ret = btrfs_delete_one_dir_name(trans, root, | ||
2032 | path, di); | ||
2033 | if (ret) | ||
2034 | goto out; | ||
2035 | btrfs_release_path(path); | ||
2036 | search_key = key; | ||
2037 | goto again; | ||
2038 | } | ||
2039 | kfree(name); | ||
2040 | if (IS_ERR(log_di)) { | ||
2041 | ret = PTR_ERR(log_di); | ||
2042 | goto out; | ||
2043 | } | ||
2044 | cur += this_len; | ||
2045 | di = (struct btrfs_dir_item *)((char *)di + this_len); | ||
2046 | } | ||
2047 | } | ||
2048 | ret = btrfs_next_leaf(root, path); | ||
2049 | if (ret > 0) | ||
2050 | ret = 0; | ||
2051 | else if (ret == 0) | ||
2052 | goto process_leaf; | ||
2053 | out: | ||
2054 | btrfs_free_path(log_path); | ||
2055 | btrfs_release_path(path); | ||
2056 | return ret; | ||
2057 | } | ||
2058 | |||
2059 | |||
1954 | /* | 2060 | /* |
1955 | * deletion replay happens before we copy any new directory items | 2061 | * deletion replay happens before we copy any new directory items |
1956 | * out of the log or out of backreferences from inodes. It | 2062 | * out of the log or out of backreferences from inodes. It |
@@ -2104,6 +2210,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
2104 | 2210 | ||
2105 | inode_item = btrfs_item_ptr(eb, i, | 2211 | inode_item = btrfs_item_ptr(eb, i, |
2106 | struct btrfs_inode_item); | 2212 | struct btrfs_inode_item); |
2213 | ret = replay_xattr_deletes(wc->trans, root, log, | ||
2214 | path, key.objectid); | ||
2215 | if (ret) | ||
2216 | break; | ||
2107 | mode = btrfs_inode_mode(eb, inode_item); | 2217 | mode = btrfs_inode_mode(eb, inode_item); |
2108 | if (S_ISDIR(mode)) { | 2218 | if (S_ISDIR(mode)) { |
2109 | ret = replay_dir_deletes(wc->trans, | 2219 | ret = replay_dir_deletes(wc->trans, |
@@ -2230,7 +2340,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
2230 | if (trans) { | 2340 | if (trans) { |
2231 | btrfs_tree_lock(next); | 2341 | btrfs_tree_lock(next); |
2232 | btrfs_set_lock_blocking(next); | 2342 | btrfs_set_lock_blocking(next); |
2233 | clean_tree_block(trans, root, next); | 2343 | clean_tree_block(trans, root->fs_info, |
2344 | next); | ||
2234 | btrfs_wait_tree_block_writeback(next); | 2345 | btrfs_wait_tree_block_writeback(next); |
2235 | btrfs_tree_unlock(next); | 2346 | btrfs_tree_unlock(next); |
2236 | } | 2347 | } |
@@ -2308,7 +2419,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
2308 | if (trans) { | 2419 | if (trans) { |
2309 | btrfs_tree_lock(next); | 2420 | btrfs_tree_lock(next); |
2310 | btrfs_set_lock_blocking(next); | 2421 | btrfs_set_lock_blocking(next); |
2311 | clean_tree_block(trans, root, next); | 2422 | clean_tree_block(trans, root->fs_info, |
2423 | next); | ||
2312 | btrfs_wait_tree_block_writeback(next); | 2424 | btrfs_wait_tree_block_writeback(next); |
2313 | btrfs_tree_unlock(next); | 2425 | btrfs_tree_unlock(next); |
2314 | } | 2426 | } |
@@ -2384,7 +2496,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
2384 | if (trans) { | 2496 | if (trans) { |
2385 | btrfs_tree_lock(next); | 2497 | btrfs_tree_lock(next); |
2386 | btrfs_set_lock_blocking(next); | 2498 | btrfs_set_lock_blocking(next); |
2387 | clean_tree_block(trans, log, next); | 2499 | clean_tree_block(trans, log->fs_info, next); |
2388 | btrfs_wait_tree_block_writeback(next); | 2500 | btrfs_wait_tree_block_writeback(next); |
2389 | btrfs_tree_unlock(next); | 2501 | btrfs_tree_unlock(next); |
2390 | } | 2502 | } |
@@ -3020,6 +3132,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
3020 | struct btrfs_root *root, struct inode *inode, | 3132 | struct btrfs_root *root, struct inode *inode, |
3021 | struct btrfs_path *path, | 3133 | struct btrfs_path *path, |
3022 | struct btrfs_path *dst_path, int key_type, | 3134 | struct btrfs_path *dst_path, int key_type, |
3135 | struct btrfs_log_ctx *ctx, | ||
3023 | u64 min_offset, u64 *last_offset_ret) | 3136 | u64 min_offset, u64 *last_offset_ret) |
3024 | { | 3137 | { |
3025 | struct btrfs_key min_key; | 3138 | struct btrfs_key min_key; |
@@ -3104,6 +3217,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
3104 | src = path->nodes[0]; | 3217 | src = path->nodes[0]; |
3105 | nritems = btrfs_header_nritems(src); | 3218 | nritems = btrfs_header_nritems(src); |
3106 | for (i = path->slots[0]; i < nritems; i++) { | 3219 | for (i = path->slots[0]; i < nritems; i++) { |
3220 | struct btrfs_dir_item *di; | ||
3221 | |||
3107 | btrfs_item_key_to_cpu(src, &min_key, i); | 3222 | btrfs_item_key_to_cpu(src, &min_key, i); |
3108 | 3223 | ||
3109 | if (min_key.objectid != ino || min_key.type != key_type) | 3224 | if (min_key.objectid != ino || min_key.type != key_type) |
@@ -3114,6 +3229,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
3114 | err = ret; | 3229 | err = ret; |
3115 | goto done; | 3230 | goto done; |
3116 | } | 3231 | } |
3232 | |||
3233 | /* | ||
3234 | * We must make sure that when we log a directory entry, | ||
3235 | * the corresponding inode, after log replay, has a | ||
3236 | * matching link count. For example: | ||
3237 | * | ||
3238 | * touch foo | ||
3239 | * mkdir mydir | ||
3240 | * sync | ||
3241 | * ln foo mydir/bar | ||
3242 | * xfs_io -c "fsync" mydir | ||
3243 | * <crash> | ||
3244 | * <mount fs and log replay> | ||
3245 | * | ||
3246 | * Would result in a fsync log that when replayed, our | ||
3247 | * file inode would have a link count of 1, but we get | ||
3248 | * two directory entries pointing to the same inode. | ||
3249 | * After removing one of the names, it would not be | ||
3250 | * possible to remove the other name, which resulted | ||
3251 | * always in stale file handle errors, and would not | ||
3252 | * be possible to rmdir the parent directory, since | ||
3253 | * its i_size could never decrement to the value | ||
3254 | * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors. | ||
3255 | */ | ||
3256 | di = btrfs_item_ptr(src, i, struct btrfs_dir_item); | ||
3257 | btrfs_dir_item_key_to_cpu(src, di, &tmp); | ||
3258 | if (ctx && | ||
3259 | (btrfs_dir_transid(src, di) == trans->transid || | ||
3260 | btrfs_dir_type(src, di) == BTRFS_FT_DIR) && | ||
3261 | tmp.type != BTRFS_ROOT_ITEM_KEY) | ||
3262 | ctx->log_new_dentries = true; | ||
3117 | } | 3263 | } |
3118 | path->slots[0] = nritems; | 3264 | path->slots[0] = nritems; |
3119 | 3265 | ||
@@ -3175,7 +3321,8 @@ done: | |||
3175 | static noinline int log_directory_changes(struct btrfs_trans_handle *trans, | 3321 | static noinline int log_directory_changes(struct btrfs_trans_handle *trans, |
3176 | struct btrfs_root *root, struct inode *inode, | 3322 | struct btrfs_root *root, struct inode *inode, |
3177 | struct btrfs_path *path, | 3323 | struct btrfs_path *path, |
3178 | struct btrfs_path *dst_path) | 3324 | struct btrfs_path *dst_path, |
3325 | struct btrfs_log_ctx *ctx) | ||
3179 | { | 3326 | { |
3180 | u64 min_key; | 3327 | u64 min_key; |
3181 | u64 max_key; | 3328 | u64 max_key; |
@@ -3187,7 +3334,7 @@ again: | |||
3187 | max_key = 0; | 3334 | max_key = 0; |
3188 | while (1) { | 3335 | while (1) { |
3189 | ret = log_dir_items(trans, root, inode, path, | 3336 | ret = log_dir_items(trans, root, inode, path, |
3190 | dst_path, key_type, min_key, | 3337 | dst_path, key_type, ctx, min_key, |
3191 | &max_key); | 3338 | &max_key); |
3192 | if (ret) | 3339 | if (ret) |
3193 | return ret; | 3340 | return ret; |
@@ -3963,7 +4110,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode, | |||
3963 | if (ret < 0) { | 4110 | if (ret < 0) { |
3964 | return ret; | 4111 | return ret; |
3965 | } else if (ret > 0) { | 4112 | } else if (ret > 0) { |
3966 | *size_ret = i_size_read(inode); | 4113 | *size_ret = 0; |
3967 | } else { | 4114 | } else { |
3968 | struct btrfs_inode_item *item; | 4115 | struct btrfs_inode_item *item; |
3969 | 4116 | ||
@@ -4070,10 +4217,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4070 | if (S_ISDIR(inode->i_mode)) { | 4217 | if (S_ISDIR(inode->i_mode)) { |
4071 | int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; | 4218 | int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; |
4072 | 4219 | ||
4073 | if (inode_only == LOG_INODE_EXISTS) { | 4220 | if (inode_only == LOG_INODE_EXISTS) |
4074 | max_key_type = BTRFS_INODE_EXTREF_KEY; | 4221 | max_key_type = BTRFS_XATTR_ITEM_KEY; |
4075 | max_key.type = max_key_type; | ||
4076 | } | ||
4077 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); | 4222 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); |
4078 | } else { | 4223 | } else { |
4079 | if (inode_only == LOG_INODE_EXISTS) { | 4224 | if (inode_only == LOG_INODE_EXISTS) { |
@@ -4098,7 +4243,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4098 | if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 4243 | if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
4099 | &BTRFS_I(inode)->runtime_flags)) { | 4244 | &BTRFS_I(inode)->runtime_flags)) { |
4100 | if (inode_only == LOG_INODE_EXISTS) { | 4245 | if (inode_only == LOG_INODE_EXISTS) { |
4101 | max_key.type = BTRFS_INODE_EXTREF_KEY; | 4246 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
4102 | ret = drop_objectid_items(trans, log, path, ino, | 4247 | ret = drop_objectid_items(trans, log, path, ino, |
4103 | max_key.type); | 4248 | max_key.type); |
4104 | } else { | 4249 | } else { |
@@ -4106,20 +4251,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4106 | &BTRFS_I(inode)->runtime_flags); | 4251 | &BTRFS_I(inode)->runtime_flags); |
4107 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, | 4252 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, |
4108 | &BTRFS_I(inode)->runtime_flags); | 4253 | &BTRFS_I(inode)->runtime_flags); |
4109 | ret = btrfs_truncate_inode_items(trans, log, | 4254 | while(1) { |
4110 | inode, 0, 0); | 4255 | ret = btrfs_truncate_inode_items(trans, |
4256 | log, inode, 0, 0); | ||
4257 | if (ret != -EAGAIN) | ||
4258 | break; | ||
4259 | } | ||
4111 | } | 4260 | } |
4112 | } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING, | 4261 | } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, |
4113 | &BTRFS_I(inode)->runtime_flags) || | 4262 | &BTRFS_I(inode)->runtime_flags) || |
4114 | inode_only == LOG_INODE_EXISTS) { | 4263 | inode_only == LOG_INODE_EXISTS) { |
4115 | if (inode_only == LOG_INODE_ALL) { | 4264 | if (inode_only == LOG_INODE_ALL) |
4116 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, | ||
4117 | &BTRFS_I(inode)->runtime_flags); | ||
4118 | fast_search = true; | 4265 | fast_search = true; |
4119 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 4266 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
4120 | } else { | ||
4121 | max_key.type = BTRFS_INODE_EXTREF_KEY; | ||
4122 | } | ||
4123 | ret = drop_objectid_items(trans, log, path, ino, | 4267 | ret = drop_objectid_items(trans, log, path, ino, |
4124 | max_key.type); | 4268 | max_key.type); |
4125 | } else { | 4269 | } else { |
@@ -4277,15 +4421,18 @@ log_extents: | |||
4277 | } | 4421 | } |
4278 | 4422 | ||
4279 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 4423 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
4280 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 4424 | ret = log_directory_changes(trans, root, inode, path, dst_path, |
4425 | ctx); | ||
4281 | if (ret) { | 4426 | if (ret) { |
4282 | err = ret; | 4427 | err = ret; |
4283 | goto out_unlock; | 4428 | goto out_unlock; |
4284 | } | 4429 | } |
4285 | } | 4430 | } |
4286 | 4431 | ||
4432 | spin_lock(&BTRFS_I(inode)->lock); | ||
4287 | BTRFS_I(inode)->logged_trans = trans->transid; | 4433 | BTRFS_I(inode)->logged_trans = trans->transid; |
4288 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | 4434 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; |
4435 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4289 | out_unlock: | 4436 | out_unlock: |
4290 | if (unlikely(err)) | 4437 | if (unlikely(err)) |
4291 | btrfs_put_logged_extents(&logged_list); | 4438 | btrfs_put_logged_extents(&logged_list); |
@@ -4372,6 +4519,181 @@ out: | |||
4372 | return ret; | 4519 | return ret; |
4373 | } | 4520 | } |
4374 | 4521 | ||
4522 | struct btrfs_dir_list { | ||
4523 | u64 ino; | ||
4524 | struct list_head list; | ||
4525 | }; | ||
4526 | |||
4527 | /* | ||
4528 | * Log the inodes of the new dentries of a directory. See log_dir_items() for | ||
4529 | * details about the why it is needed. | ||
4530 | * This is a recursive operation - if an existing dentry corresponds to a | ||
4531 | * directory, that directory's new entries are logged too (same behaviour as | ||
4532 | * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes | ||
4533 | * the dentries point to we do not lock their i_mutex, otherwise lockdep | ||
4534 | * complains about the following circular lock dependency / possible deadlock: | ||
4535 | * | ||
4536 | * CPU0 CPU1 | ||
4537 | * ---- ---- | ||
4538 | * lock(&type->i_mutex_dir_key#3/2); | ||
4539 | * lock(sb_internal#2); | ||
4540 | * lock(&type->i_mutex_dir_key#3/2); | ||
4541 | * lock(&sb->s_type->i_mutex_key#14); | ||
4542 | * | ||
4543 | * Where sb_internal is the lock (a counter that works as a lock) acquired by | ||
4544 | * sb_start_intwrite() in btrfs_start_transaction(). | ||
4545 | * Not locking i_mutex of the inodes is still safe because: | ||
4546 | * | ||
4547 | * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible | ||
4548 | * that while logging the inode new references (names) are added or removed | ||
4549 | * from the inode, leaving the logged inode item with a link count that does | ||
4550 | * not match the number of logged inode reference items. This is fine because | ||
4551 | * at log replay time we compute the real number of links and correct the | ||
4552 | * link count in the inode item (see replay_one_buffer() and | ||
4553 | * link_to_fixup_dir()); | ||
4554 | * | ||
4555 | * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that | ||
4556 | * while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and | ||
4557 | * BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item | ||
4558 | * has a size that doesn't match the sum of the lengths of all the logged | ||
4559 | * names. This does not result in a problem because if a dir_item key is | ||
4560 | * logged but its matching dir_index key is not logged, at log replay time we | ||
4561 | * don't use it to replay the respective name (see replay_one_name()). On the | ||
4562 | * other hand if only the dir_index key ends up being logged, the respective | ||
4563 | * name is added to the fs/subvol tree with both the dir_item and dir_index | ||
4564 | * keys created (see replay_one_name()). | ||
4565 | * The directory's inode item with a wrong i_size is not a problem as well, | ||
4566 | * since we don't use it at log replay time to set the i_size in the inode | ||
4567 | * item of the fs/subvol tree (see overwrite_item()). | ||
4568 | */ | ||
4569 | static int log_new_dir_dentries(struct btrfs_trans_handle *trans, | ||
4570 | struct btrfs_root *root, | ||
4571 | struct inode *start_inode, | ||
4572 | struct btrfs_log_ctx *ctx) | ||
4573 | { | ||
4574 | struct btrfs_root *log = root->log_root; | ||
4575 | struct btrfs_path *path; | ||
4576 | LIST_HEAD(dir_list); | ||
4577 | struct btrfs_dir_list *dir_elem; | ||
4578 | int ret = 0; | ||
4579 | |||
4580 | path = btrfs_alloc_path(); | ||
4581 | if (!path) | ||
4582 | return -ENOMEM; | ||
4583 | |||
4584 | dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS); | ||
4585 | if (!dir_elem) { | ||
4586 | btrfs_free_path(path); | ||
4587 | return -ENOMEM; | ||
4588 | } | ||
4589 | dir_elem->ino = btrfs_ino(start_inode); | ||
4590 | list_add_tail(&dir_elem->list, &dir_list); | ||
4591 | |||
4592 | while (!list_empty(&dir_list)) { | ||
4593 | struct extent_buffer *leaf; | ||
4594 | struct btrfs_key min_key; | ||
4595 | int nritems; | ||
4596 | int i; | ||
4597 | |||
4598 | dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list, | ||
4599 | list); | ||
4600 | if (ret) | ||
4601 | goto next_dir_inode; | ||
4602 | |||
4603 | min_key.objectid = dir_elem->ino; | ||
4604 | min_key.type = BTRFS_DIR_ITEM_KEY; | ||
4605 | min_key.offset = 0; | ||
4606 | again: | ||
4607 | btrfs_release_path(path); | ||
4608 | ret = btrfs_search_forward(log, &min_key, path, trans->transid); | ||
4609 | if (ret < 0) { | ||
4610 | goto next_dir_inode; | ||
4611 | } else if (ret > 0) { | ||
4612 | ret = 0; | ||
4613 | goto next_dir_inode; | ||
4614 | } | ||
4615 | |||
4616 | process_leaf: | ||
4617 | leaf = path->nodes[0]; | ||
4618 | nritems = btrfs_header_nritems(leaf); | ||
4619 | for (i = path->slots[0]; i < nritems; i++) { | ||
4620 | struct btrfs_dir_item *di; | ||
4621 | struct btrfs_key di_key; | ||
4622 | struct inode *di_inode; | ||
4623 | struct btrfs_dir_list *new_dir_elem; | ||
4624 | int log_mode = LOG_INODE_EXISTS; | ||
4625 | int type; | ||
4626 | |||
4627 | btrfs_item_key_to_cpu(leaf, &min_key, i); | ||
4628 | if (min_key.objectid != dir_elem->ino || | ||
4629 | min_key.type != BTRFS_DIR_ITEM_KEY) | ||
4630 | goto next_dir_inode; | ||
4631 | |||
4632 | di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item); | ||
4633 | type = btrfs_dir_type(leaf, di); | ||
4634 | if (btrfs_dir_transid(leaf, di) < trans->transid && | ||
4635 | type != BTRFS_FT_DIR) | ||
4636 | continue; | ||
4637 | btrfs_dir_item_key_to_cpu(leaf, di, &di_key); | ||
4638 | if (di_key.type == BTRFS_ROOT_ITEM_KEY) | ||
4639 | continue; | ||
4640 | |||
4641 | di_inode = btrfs_iget(root->fs_info->sb, &di_key, | ||
4642 | root, NULL); | ||
4643 | if (IS_ERR(di_inode)) { | ||
4644 | ret = PTR_ERR(di_inode); | ||
4645 | goto next_dir_inode; | ||
4646 | } | ||
4647 | |||
4648 | if (btrfs_inode_in_log(di_inode, trans->transid)) { | ||
4649 | iput(di_inode); | ||
4650 | continue; | ||
4651 | } | ||
4652 | |||
4653 | ctx->log_new_dentries = false; | ||
4654 | if (type == BTRFS_FT_DIR) | ||
4655 | log_mode = LOG_INODE_ALL; | ||
4656 | btrfs_release_path(path); | ||
4657 | ret = btrfs_log_inode(trans, root, di_inode, | ||
4658 | log_mode, 0, LLONG_MAX, ctx); | ||
4659 | iput(di_inode); | ||
4660 | if (ret) | ||
4661 | goto next_dir_inode; | ||
4662 | if (ctx->log_new_dentries) { | ||
4663 | new_dir_elem = kmalloc(sizeof(*new_dir_elem), | ||
4664 | GFP_NOFS); | ||
4665 | if (!new_dir_elem) { | ||
4666 | ret = -ENOMEM; | ||
4667 | goto next_dir_inode; | ||
4668 | } | ||
4669 | new_dir_elem->ino = di_key.objectid; | ||
4670 | list_add_tail(&new_dir_elem->list, &dir_list); | ||
4671 | } | ||
4672 | break; | ||
4673 | } | ||
4674 | if (i == nritems) { | ||
4675 | ret = btrfs_next_leaf(log, path); | ||
4676 | if (ret < 0) { | ||
4677 | goto next_dir_inode; | ||
4678 | } else if (ret > 0) { | ||
4679 | ret = 0; | ||
4680 | goto next_dir_inode; | ||
4681 | } | ||
4682 | goto process_leaf; | ||
4683 | } | ||
4684 | if (min_key.offset < (u64)-1) { | ||
4685 | min_key.offset++; | ||
4686 | goto again; | ||
4687 | } | ||
4688 | next_dir_inode: | ||
4689 | list_del(&dir_elem->list); | ||
4690 | kfree(dir_elem); | ||
4691 | } | ||
4692 | |||
4693 | btrfs_free_path(path); | ||
4694 | return ret; | ||
4695 | } | ||
4696 | |||
4375 | /* | 4697 | /* |
4376 | * helper function around btrfs_log_inode to make sure newly created | 4698 | * helper function around btrfs_log_inode to make sure newly created |
4377 | * parent directories also end up in the log. A minimal inode and backref | 4699 | * parent directories also end up in the log. A minimal inode and backref |
@@ -4394,6 +4716,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4394 | const struct dentry * const first_parent = parent; | 4716 | const struct dentry * const first_parent = parent; |
4395 | const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans > | 4717 | const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans > |
4396 | last_committed); | 4718 | last_committed); |
4719 | bool log_dentries = false; | ||
4720 | struct inode *orig_inode = inode; | ||
4397 | 4721 | ||
4398 | sb = inode->i_sb; | 4722 | sb = inode->i_sb; |
4399 | 4723 | ||
@@ -4449,6 +4773,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4449 | goto end_trans; | 4773 | goto end_trans; |
4450 | } | 4774 | } |
4451 | 4775 | ||
4776 | if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries) | ||
4777 | log_dentries = true; | ||
4778 | |||
4452 | while (1) { | 4779 | while (1) { |
4453 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | 4780 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) |
4454 | break; | 4781 | break; |
@@ -4485,7 +4812,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4485 | dput(old_parent); | 4812 | dput(old_parent); |
4486 | old_parent = parent; | 4813 | old_parent = parent; |
4487 | } | 4814 | } |
4488 | ret = 0; | 4815 | if (log_dentries) |
4816 | ret = log_new_dir_dentries(trans, root, orig_inode, ctx); | ||
4817 | else | ||
4818 | ret = 0; | ||
4489 | end_trans: | 4819 | end_trans: |
4490 | dput(old_parent); | 4820 | dput(old_parent); |
4491 | if (ret < 0) { | 4821 | if (ret < 0) { |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 154990c26dcb..6916a781ea02 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -29,6 +29,7 @@ struct btrfs_log_ctx { | |||
29 | int log_ret; | 29 | int log_ret; |
30 | int log_transid; | 30 | int log_transid; |
31 | int io_err; | 31 | int io_err; |
32 | bool log_new_dentries; | ||
32 | struct list_head list; | 33 | struct list_head list; |
33 | }; | 34 | }; |
34 | 35 | ||
@@ -37,6 +38,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | |||
37 | ctx->log_ret = 0; | 38 | ctx->log_ret = 0; |
38 | ctx->log_transid = 0; | 39 | ctx->log_transid = 0; |
39 | ctx->io_err = 0; | 40 | ctx->io_err = 0; |
41 | ctx->log_new_dentries = false; | ||
40 | INIT_LIST_HEAD(&ctx->list); | 42 | INIT_LIST_HEAD(&ctx->list); |
41 | } | 43 | } |
42 | 44 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8222f6f74147..8bcd2a007517 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -366,8 +366,8 @@ loop_lock: | |||
366 | btrfsic_submit_bio(cur->bi_rw, cur); | 366 | btrfsic_submit_bio(cur->bi_rw, cur); |
367 | num_run++; | 367 | num_run++; |
368 | batch_run++; | 368 | batch_run++; |
369 | if (need_resched()) | 369 | |
370 | cond_resched(); | 370 | cond_resched(); |
371 | 371 | ||
372 | /* | 372 | /* |
373 | * we made progress, there is more work to do and the bdi | 373 | * we made progress, there is more work to do and the bdi |
@@ -400,8 +400,7 @@ loop_lock: | |||
400 | * against it before looping | 400 | * against it before looping |
401 | */ | 401 | */ |
402 | last_waited = ioc->last_waited; | 402 | last_waited = ioc->last_waited; |
403 | if (need_resched()) | 403 | cond_resched(); |
404 | cond_resched(); | ||
405 | continue; | 404 | continue; |
406 | } | 405 | } |
407 | spin_lock(&device->io_lock); | 406 | spin_lock(&device->io_lock); |
@@ -609,8 +608,7 @@ error: | |||
609 | return ERR_PTR(-ENOMEM); | 608 | return ERR_PTR(-ENOMEM); |
610 | } | 609 | } |
611 | 610 | ||
612 | void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, | 611 | void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step) |
613 | struct btrfs_fs_devices *fs_devices, int step) | ||
614 | { | 612 | { |
615 | struct btrfs_device *device, *next; | 613 | struct btrfs_device *device, *next; |
616 | struct btrfs_device *latest_dev = NULL; | 614 | struct btrfs_device *latest_dev = NULL; |
@@ -1136,11 +1134,11 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
1136 | path = btrfs_alloc_path(); | 1134 | path = btrfs_alloc_path(); |
1137 | if (!path) | 1135 | if (!path) |
1138 | return -ENOMEM; | 1136 | return -ENOMEM; |
1139 | again: | 1137 | |
1140 | max_hole_start = search_start; | 1138 | max_hole_start = search_start; |
1141 | max_hole_size = 0; | 1139 | max_hole_size = 0; |
1142 | hole_size = 0; | ||
1143 | 1140 | ||
1141 | again: | ||
1144 | if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { | 1142 | if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { |
1145 | ret = -ENOSPC; | 1143 | ret = -ENOSPC; |
1146 | goto out; | 1144 | goto out; |
@@ -1233,21 +1231,23 @@ next: | |||
1233 | * allocated dev extents, and when shrinking the device, | 1231 | * allocated dev extents, and when shrinking the device, |
1234 | * search_end may be smaller than search_start. | 1232 | * search_end may be smaller than search_start. |
1235 | */ | 1233 | */ |
1236 | if (search_end > search_start) | 1234 | if (search_end > search_start) { |
1237 | hole_size = search_end - search_start; | 1235 | hole_size = search_end - search_start; |
1238 | 1236 | ||
1239 | if (hole_size > max_hole_size) { | 1237 | if (contains_pending_extent(trans, device, &search_start, |
1240 | max_hole_start = search_start; | 1238 | hole_size)) { |
1241 | max_hole_size = hole_size; | 1239 | btrfs_release_path(path); |
1242 | } | 1240 | goto again; |
1241 | } | ||
1243 | 1242 | ||
1244 | if (contains_pending_extent(trans, device, &search_start, hole_size)) { | 1243 | if (hole_size > max_hole_size) { |
1245 | btrfs_release_path(path); | 1244 | max_hole_start = search_start; |
1246 | goto again; | 1245 | max_hole_size = hole_size; |
1246 | } | ||
1247 | } | 1247 | } |
1248 | 1248 | ||
1249 | /* See above. */ | 1249 | /* See above. */ |
1250 | if (hole_size < num_bytes) | 1250 | if (max_hole_size < num_bytes) |
1251 | ret = -ENOSPC; | 1251 | ret = -ENOSPC; |
1252 | else | 1252 | else |
1253 | ret = 0; | 1253 | ret = 0; |
@@ -2487,8 +2487,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, | |||
2487 | } | 2487 | } |
2488 | 2488 | ||
2489 | static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | 2489 | static int btrfs_free_chunk(struct btrfs_trans_handle *trans, |
2490 | struct btrfs_root *root, | 2490 | struct btrfs_root *root, u64 chunk_objectid, |
2491 | u64 chunk_tree, u64 chunk_objectid, | ||
2492 | u64 chunk_offset) | 2491 | u64 chunk_offset) |
2493 | { | 2492 | { |
2494 | int ret; | 2493 | int ret; |
@@ -2580,7 +2579,6 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, | |||
2580 | struct map_lookup *map; | 2579 | struct map_lookup *map; |
2581 | u64 dev_extent_len = 0; | 2580 | u64 dev_extent_len = 0; |
2582 | u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 2581 | u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
2583 | u64 chunk_tree = root->fs_info->chunk_root->objectid; | ||
2584 | int i, ret = 0; | 2582 | int i, ret = 0; |
2585 | 2583 | ||
2586 | /* Just in case */ | 2584 | /* Just in case */ |
@@ -2634,8 +2632,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, | |||
2634 | } | 2632 | } |
2635 | } | 2633 | } |
2636 | } | 2634 | } |
2637 | ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, | 2635 | ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset); |
2638 | chunk_offset); | ||
2639 | if (ret) { | 2636 | if (ret) { |
2640 | btrfs_abort_transaction(trans, root, ret); | 2637 | btrfs_abort_transaction(trans, root, ret); |
2641 | goto out; | 2638 | goto out; |
@@ -2664,8 +2661,8 @@ out: | |||
2664 | } | 2661 | } |
2665 | 2662 | ||
2666 | static int btrfs_relocate_chunk(struct btrfs_root *root, | 2663 | static int btrfs_relocate_chunk(struct btrfs_root *root, |
2667 | u64 chunk_tree, u64 chunk_objectid, | 2664 | u64 chunk_objectid, |
2668 | u64 chunk_offset) | 2665 | u64 chunk_offset) |
2669 | { | 2666 | { |
2670 | struct btrfs_root *extent_root; | 2667 | struct btrfs_root *extent_root; |
2671 | struct btrfs_trans_handle *trans; | 2668 | struct btrfs_trans_handle *trans; |
@@ -2707,7 +2704,6 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
2707 | struct btrfs_chunk *chunk; | 2704 | struct btrfs_chunk *chunk; |
2708 | struct btrfs_key key; | 2705 | struct btrfs_key key; |
2709 | struct btrfs_key found_key; | 2706 | struct btrfs_key found_key; |
2710 | u64 chunk_tree = chunk_root->root_key.objectid; | ||
2711 | u64 chunk_type; | 2707 | u64 chunk_type; |
2712 | bool retried = false; | 2708 | bool retried = false; |
2713 | int failed = 0; | 2709 | int failed = 0; |
@@ -2744,7 +2740,7 @@ again: | |||
2744 | btrfs_release_path(path); | 2740 | btrfs_release_path(path); |
2745 | 2741 | ||
2746 | if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { | 2742 | if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { |
2747 | ret = btrfs_relocate_chunk(chunk_root, chunk_tree, | 2743 | ret = btrfs_relocate_chunk(chunk_root, |
2748 | found_key.objectid, | 2744 | found_key.objectid, |
2749 | found_key.offset); | 2745 | found_key.offset); |
2750 | if (ret == -ENOSPC) | 2746 | if (ret == -ENOSPC) |
@@ -3022,7 +3018,7 @@ static int chunk_drange_filter(struct extent_buffer *leaf, | |||
3022 | 3018 | ||
3023 | stripe_offset = btrfs_stripe_offset(leaf, stripe); | 3019 | stripe_offset = btrfs_stripe_offset(leaf, stripe); |
3024 | stripe_length = btrfs_chunk_length(leaf, chunk); | 3020 | stripe_length = btrfs_chunk_length(leaf, chunk); |
3025 | do_div(stripe_length, factor); | 3021 | stripe_length = div_u64(stripe_length, factor); |
3026 | 3022 | ||
3027 | if (stripe_offset < bargs->pend && | 3023 | if (stripe_offset < bargs->pend && |
3028 | stripe_offset + stripe_length > bargs->pstart) | 3024 | stripe_offset + stripe_length > bargs->pstart) |
@@ -3255,7 +3251,6 @@ again: | |||
3255 | } | 3251 | } |
3256 | 3252 | ||
3257 | ret = btrfs_relocate_chunk(chunk_root, | 3253 | ret = btrfs_relocate_chunk(chunk_root, |
3258 | chunk_root->root_key.objectid, | ||
3259 | found_key.objectid, | 3254 | found_key.objectid, |
3260 | found_key.offset); | 3255 | found_key.offset); |
3261 | if (ret && ret != -ENOSPC) | 3256 | if (ret && ret != -ENOSPC) |
@@ -3957,7 +3952,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
3957 | struct btrfs_dev_extent *dev_extent = NULL; | 3952 | struct btrfs_dev_extent *dev_extent = NULL; |
3958 | struct btrfs_path *path; | 3953 | struct btrfs_path *path; |
3959 | u64 length; | 3954 | u64 length; |
3960 | u64 chunk_tree; | ||
3961 | u64 chunk_objectid; | 3955 | u64 chunk_objectid; |
3962 | u64 chunk_offset; | 3956 | u64 chunk_offset; |
3963 | int ret; | 3957 | int ret; |
@@ -4027,13 +4021,11 @@ again: | |||
4027 | break; | 4021 | break; |
4028 | } | 4022 | } |
4029 | 4023 | ||
4030 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | ||
4031 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | 4024 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); |
4032 | chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); | 4025 | chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); |
4033 | btrfs_release_path(path); | 4026 | btrfs_release_path(path); |
4034 | 4027 | ||
4035 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, | 4028 | ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); |
4036 | chunk_offset); | ||
4037 | if (ret && ret != -ENOSPC) | 4029 | if (ret && ret != -ENOSPC) |
4038 | goto done; | 4030 | goto done; |
4039 | if (ret == -ENOSPC) | 4031 | if (ret == -ENOSPC) |
@@ -4131,7 +4123,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b) | |||
4131 | return 0; | 4123 | return 0; |
4132 | } | 4124 | } |
4133 | 4125 | ||
4134 | static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { | 4126 | static const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { |
4135 | [BTRFS_RAID_RAID10] = { | 4127 | [BTRFS_RAID_RAID10] = { |
4136 | .sub_stripes = 2, | 4128 | .sub_stripes = 2, |
4137 | .dev_stripes = 1, | 4129 | .dev_stripes = 1, |
@@ -4289,7 +4281,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
4289 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), | 4281 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
4290 | max_chunk_size); | 4282 | max_chunk_size); |
4291 | 4283 | ||
4292 | devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, | 4284 | devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info), |
4293 | GFP_NOFS); | 4285 | GFP_NOFS); |
4294 | if (!devices_info) | 4286 | if (!devices_info) |
4295 | return -ENOMEM; | 4287 | return -ENOMEM; |
@@ -4400,8 +4392,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
4400 | */ | 4392 | */ |
4401 | if (stripe_size * data_stripes > max_chunk_size) { | 4393 | if (stripe_size * data_stripes > max_chunk_size) { |
4402 | u64 mask = (1ULL << 24) - 1; | 4394 | u64 mask = (1ULL << 24) - 1; |
4403 | stripe_size = max_chunk_size; | 4395 | |
4404 | do_div(stripe_size, data_stripes); | 4396 | stripe_size = div_u64(max_chunk_size, data_stripes); |
4405 | 4397 | ||
4406 | /* bump the answer up to a 16MB boundary */ | 4398 | /* bump the answer up to a 16MB boundary */ |
4407 | stripe_size = (stripe_size + mask) & ~mask; | 4399 | stripe_size = (stripe_size + mask) & ~mask; |
@@ -4413,10 +4405,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
4413 | stripe_size = devices_info[ndevs-1].max_avail; | 4405 | stripe_size = devices_info[ndevs-1].max_avail; |
4414 | } | 4406 | } |
4415 | 4407 | ||
4416 | do_div(stripe_size, dev_stripes); | 4408 | stripe_size = div_u64(stripe_size, dev_stripes); |
4417 | 4409 | ||
4418 | /* align to BTRFS_STRIPE_LEN */ | 4410 | /* align to BTRFS_STRIPE_LEN */ |
4419 | do_div(stripe_size, raid_stripe_len); | 4411 | stripe_size = div_u64(stripe_size, raid_stripe_len); |
4420 | stripe_size *= raid_stripe_len; | 4412 | stripe_size *= raid_stripe_len; |
4421 | 4413 | ||
4422 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | 4414 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); |
@@ -4954,7 +4946,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
4954 | u64 stripe_nr_orig; | 4946 | u64 stripe_nr_orig; |
4955 | u64 stripe_nr_end; | 4947 | u64 stripe_nr_end; |
4956 | u64 stripe_len; | 4948 | u64 stripe_len; |
4957 | int stripe_index; | 4949 | u32 stripe_index; |
4958 | int i; | 4950 | int i; |
4959 | int ret = 0; | 4951 | int ret = 0; |
4960 | int num_stripes; | 4952 | int num_stripes; |
@@ -4995,7 +4987,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
4995 | * stripe_nr counts the total number of stripes we have to stride | 4987 | * stripe_nr counts the total number of stripes we have to stride |
4996 | * to get to this block | 4988 | * to get to this block |
4997 | */ | 4989 | */ |
4998 | do_div(stripe_nr, stripe_len); | 4990 | stripe_nr = div64_u64(stripe_nr, stripe_len); |
4999 | 4991 | ||
5000 | stripe_offset = stripe_nr * stripe_len; | 4992 | stripe_offset = stripe_nr * stripe_len; |
5001 | BUG_ON(offset < stripe_offset); | 4993 | BUG_ON(offset < stripe_offset); |
@@ -5011,7 +5003,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5011 | /* allow a write of a full stripe, but make sure we don't | 5003 | /* allow a write of a full stripe, but make sure we don't |
5012 | * allow straddling of stripes | 5004 | * allow straddling of stripes |
5013 | */ | 5005 | */ |
5014 | do_div(raid56_full_stripe_start, full_stripe_len); | 5006 | raid56_full_stripe_start = div64_u64(raid56_full_stripe_start, |
5007 | full_stripe_len); | ||
5015 | raid56_full_stripe_start *= full_stripe_len; | 5008 | raid56_full_stripe_start *= full_stripe_len; |
5016 | } | 5009 | } |
5017 | 5010 | ||
@@ -5136,7 +5129,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5136 | stripe_index = 0; | 5129 | stripe_index = 0; |
5137 | stripe_nr_orig = stripe_nr; | 5130 | stripe_nr_orig = stripe_nr; |
5138 | stripe_nr_end = ALIGN(offset + *length, map->stripe_len); | 5131 | stripe_nr_end = ALIGN(offset + *length, map->stripe_len); |
5139 | do_div(stripe_nr_end, map->stripe_len); | 5132 | stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len); |
5140 | stripe_end_offset = stripe_nr_end * map->stripe_len - | 5133 | stripe_end_offset = stripe_nr_end * map->stripe_len - |
5141 | (offset + *length); | 5134 | (offset + *length); |
5142 | 5135 | ||
@@ -5144,7 +5137,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5144 | if (rw & REQ_DISCARD) | 5137 | if (rw & REQ_DISCARD) |
5145 | num_stripes = min_t(u64, map->num_stripes, | 5138 | num_stripes = min_t(u64, map->num_stripes, |
5146 | stripe_nr_end - stripe_nr_orig); | 5139 | stripe_nr_end - stripe_nr_orig); |
5147 | stripe_index = do_div(stripe_nr, map->num_stripes); | 5140 | stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, |
5141 | &stripe_index); | ||
5148 | if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))) | 5142 | if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))) |
5149 | mirror_num = 1; | 5143 | mirror_num = 1; |
5150 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 5144 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
@@ -5170,9 +5164,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5170 | } | 5164 | } |
5171 | 5165 | ||
5172 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | 5166 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { |
5173 | int factor = map->num_stripes / map->sub_stripes; | 5167 | u32 factor = map->num_stripes / map->sub_stripes; |
5174 | 5168 | ||
5175 | stripe_index = do_div(stripe_nr, factor); | 5169 | stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); |
5176 | stripe_index *= map->sub_stripes; | 5170 | stripe_index *= map->sub_stripes; |
5177 | 5171 | ||
5178 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) | 5172 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) |
@@ -5198,8 +5192,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5198 | ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || | 5192 | ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || |
5199 | mirror_num > 1)) { | 5193 | mirror_num > 1)) { |
5200 | /* push stripe_nr back to the start of the full stripe */ | 5194 | /* push stripe_nr back to the start of the full stripe */ |
5201 | stripe_nr = raid56_full_stripe_start; | 5195 | stripe_nr = div_u64(raid56_full_stripe_start, |
5202 | do_div(stripe_nr, stripe_len * nr_data_stripes(map)); | 5196 | stripe_len * nr_data_stripes(map)); |
5203 | 5197 | ||
5204 | /* RAID[56] write or recovery. Return all stripes */ | 5198 | /* RAID[56] write or recovery. Return all stripes */ |
5205 | num_stripes = map->num_stripes; | 5199 | num_stripes = map->num_stripes; |
@@ -5209,32 +5203,32 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5209 | stripe_index = 0; | 5203 | stripe_index = 0; |
5210 | stripe_offset = 0; | 5204 | stripe_offset = 0; |
5211 | } else { | 5205 | } else { |
5212 | u64 tmp; | ||
5213 | |||
5214 | /* | 5206 | /* |
5215 | * Mirror #0 or #1 means the original data block. | 5207 | * Mirror #0 or #1 means the original data block. |
5216 | * Mirror #2 is RAID5 parity block. | 5208 | * Mirror #2 is RAID5 parity block. |
5217 | * Mirror #3 is RAID6 Q block. | 5209 | * Mirror #3 is RAID6 Q block. |
5218 | */ | 5210 | */ |
5219 | stripe_index = do_div(stripe_nr, nr_data_stripes(map)); | 5211 | stripe_nr = div_u64_rem(stripe_nr, |
5212 | nr_data_stripes(map), &stripe_index); | ||
5220 | if (mirror_num > 1) | 5213 | if (mirror_num > 1) |
5221 | stripe_index = nr_data_stripes(map) + | 5214 | stripe_index = nr_data_stripes(map) + |
5222 | mirror_num - 2; | 5215 | mirror_num - 2; |
5223 | 5216 | ||
5224 | /* We distribute the parity blocks across stripes */ | 5217 | /* We distribute the parity blocks across stripes */ |
5225 | tmp = stripe_nr + stripe_index; | 5218 | div_u64_rem(stripe_nr + stripe_index, map->num_stripes, |
5226 | stripe_index = do_div(tmp, map->num_stripes); | 5219 | &stripe_index); |
5227 | if (!(rw & (REQ_WRITE | REQ_DISCARD | | 5220 | if (!(rw & (REQ_WRITE | REQ_DISCARD | |
5228 | REQ_GET_READ_MIRRORS)) && mirror_num <= 1) | 5221 | REQ_GET_READ_MIRRORS)) && mirror_num <= 1) |
5229 | mirror_num = 1; | 5222 | mirror_num = 1; |
5230 | } | 5223 | } |
5231 | } else { | 5224 | } else { |
5232 | /* | 5225 | /* |
5233 | * after this do_div call, stripe_nr is the number of stripes | 5226 | * after this, stripe_nr is the number of stripes on this |
5234 | * on this device we have to walk to find the data, and | 5227 | * device we have to walk to find the data, and stripe_index is |
5235 | * stripe_index is the number of our device in the stripe array | 5228 | * the number of our device in the stripe array |
5236 | */ | 5229 | */ |
5237 | stripe_index = do_div(stripe_nr, map->num_stripes); | 5230 | stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, |
5231 | &stripe_index); | ||
5238 | mirror_num = stripe_index + 1; | 5232 | mirror_num = stripe_index + 1; |
5239 | } | 5233 | } |
5240 | BUG_ON(stripe_index >= map->num_stripes); | 5234 | BUG_ON(stripe_index >= map->num_stripes); |
@@ -5261,7 +5255,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5261 | need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || | 5255 | need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || |
5262 | mirror_num > 1)) { | 5256 | mirror_num > 1)) { |
5263 | u64 tmp; | 5257 | u64 tmp; |
5264 | int i, rot; | 5258 | unsigned rot; |
5265 | 5259 | ||
5266 | bbio->raid_map = (u64 *)((void *)bbio->stripes + | 5260 | bbio->raid_map = (u64 *)((void *)bbio->stripes + |
5267 | sizeof(struct btrfs_bio_stripe) * | 5261 | sizeof(struct btrfs_bio_stripe) * |
@@ -5269,8 +5263,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5269 | sizeof(int) * tgtdev_indexes); | 5263 | sizeof(int) * tgtdev_indexes); |
5270 | 5264 | ||
5271 | /* Work out the disk rotation on this stripe-set */ | 5265 | /* Work out the disk rotation on this stripe-set */ |
5272 | tmp = stripe_nr; | 5266 | div_u64_rem(stripe_nr, num_stripes, &rot); |
5273 | rot = do_div(tmp, num_stripes); | ||
5274 | 5267 | ||
5275 | /* Fill in the logical address of each stripe */ | 5268 | /* Fill in the logical address of each stripe */ |
5276 | tmp = stripe_nr * nr_data_stripes(map); | 5269 | tmp = stripe_nr * nr_data_stripes(map); |
@@ -5285,8 +5278,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5285 | } | 5278 | } |
5286 | 5279 | ||
5287 | if (rw & REQ_DISCARD) { | 5280 | if (rw & REQ_DISCARD) { |
5288 | int factor = 0; | 5281 | u32 factor = 0; |
5289 | int sub_stripes = 0; | 5282 | u32 sub_stripes = 0; |
5290 | u64 stripes_per_dev = 0; | 5283 | u64 stripes_per_dev = 0; |
5291 | u32 remaining_stripes = 0; | 5284 | u32 remaining_stripes = 0; |
5292 | u32 last_stripe = 0; | 5285 | u32 last_stripe = 0; |
@@ -5437,9 +5430,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5437 | } | 5430 | } |
5438 | } | 5431 | } |
5439 | if (found) { | 5432 | if (found) { |
5440 | u64 length = map->stripe_len; | 5433 | if (physical_of_found + map->stripe_len <= |
5441 | |||
5442 | if (physical_of_found + length <= | ||
5443 | dev_replace->cursor_left) { | 5434 | dev_replace->cursor_left) { |
5444 | struct btrfs_bio_stripe *tgtdev_stripe = | 5435 | struct btrfs_bio_stripe *tgtdev_stripe = |
5445 | bbio->stripes + num_stripes; | 5436 | bbio->stripes + num_stripes; |
@@ -5535,15 +5526,15 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
5535 | rmap_len = map->stripe_len; | 5526 | rmap_len = map->stripe_len; |
5536 | 5527 | ||
5537 | if (map->type & BTRFS_BLOCK_GROUP_RAID10) | 5528 | if (map->type & BTRFS_BLOCK_GROUP_RAID10) |
5538 | do_div(length, map->num_stripes / map->sub_stripes); | 5529 | length = div_u64(length, map->num_stripes / map->sub_stripes); |
5539 | else if (map->type & BTRFS_BLOCK_GROUP_RAID0) | 5530 | else if (map->type & BTRFS_BLOCK_GROUP_RAID0) |
5540 | do_div(length, map->num_stripes); | 5531 | length = div_u64(length, map->num_stripes); |
5541 | else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { | 5532 | else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
5542 | do_div(length, nr_data_stripes(map)); | 5533 | length = div_u64(length, nr_data_stripes(map)); |
5543 | rmap_len = map->stripe_len * nr_data_stripes(map); | 5534 | rmap_len = map->stripe_len * nr_data_stripes(map); |
5544 | } | 5535 | } |
5545 | 5536 | ||
5546 | buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); | 5537 | buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS); |
5547 | BUG_ON(!buf); /* -ENOMEM */ | 5538 | BUG_ON(!buf); /* -ENOMEM */ |
5548 | 5539 | ||
5549 | for (i = 0; i < map->num_stripes; i++) { | 5540 | for (i = 0; i < map->num_stripes; i++) { |
@@ -5554,11 +5545,11 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
5554 | continue; | 5545 | continue; |
5555 | 5546 | ||
5556 | stripe_nr = physical - map->stripes[i].physical; | 5547 | stripe_nr = physical - map->stripes[i].physical; |
5557 | do_div(stripe_nr, map->stripe_len); | 5548 | stripe_nr = div_u64(stripe_nr, map->stripe_len); |
5558 | 5549 | ||
5559 | if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | 5550 | if (map->type & BTRFS_BLOCK_GROUP_RAID10) { |
5560 | stripe_nr = stripe_nr * map->num_stripes + i; | 5551 | stripe_nr = stripe_nr * map->num_stripes + i; |
5561 | do_div(stripe_nr, map->sub_stripes); | 5552 | stripe_nr = div_u64(stripe_nr, map->sub_stripes); |
5562 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | 5553 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { |
5563 | stripe_nr = stripe_nr * map->num_stripes + i; | 5554 | stripe_nr = stripe_nr * map->num_stripes + i; |
5564 | } /* else if RAID[56], multiply by nr_data_stripes(). | 5555 | } /* else if RAID[56], multiply by nr_data_stripes(). |
@@ -5835,8 +5826,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5835 | u64 length = 0; | 5826 | u64 length = 0; |
5836 | u64 map_length; | 5827 | u64 map_length; |
5837 | int ret; | 5828 | int ret; |
5838 | int dev_nr = 0; | 5829 | int dev_nr; |
5839 | int total_devs = 1; | 5830 | int total_devs; |
5840 | struct btrfs_bio *bbio = NULL; | 5831 | struct btrfs_bio *bbio = NULL; |
5841 | 5832 | ||
5842 | length = bio->bi_iter.bi_size; | 5833 | length = bio->bi_iter.bi_size; |
@@ -5877,11 +5868,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5877 | BUG(); | 5868 | BUG(); |
5878 | } | 5869 | } |
5879 | 5870 | ||
5880 | while (dev_nr < total_devs) { | 5871 | for (dev_nr = 0; dev_nr < total_devs; dev_nr++) { |
5881 | dev = bbio->stripes[dev_nr].dev; | 5872 | dev = bbio->stripes[dev_nr].dev; |
5882 | if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) { | 5873 | if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) { |
5883 | bbio_error(bbio, first_bio, logical); | 5874 | bbio_error(bbio, first_bio, logical); |
5884 | dev_nr++; | ||
5885 | continue; | 5875 | continue; |
5886 | } | 5876 | } |
5887 | 5877 | ||
@@ -5894,7 +5884,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5894 | ret = breakup_stripe_bio(root, bbio, first_bio, dev, | 5884 | ret = breakup_stripe_bio(root, bbio, first_bio, dev, |
5895 | dev_nr, rw, async_submit); | 5885 | dev_nr, rw, async_submit); |
5896 | BUG_ON(ret); | 5886 | BUG_ON(ret); |
5897 | dev_nr++; | ||
5898 | continue; | 5887 | continue; |
5899 | } | 5888 | } |
5900 | 5889 | ||
@@ -5909,7 +5898,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5909 | submit_stripe_bio(root, bbio, bio, | 5898 | submit_stripe_bio(root, bbio, bio, |
5910 | bbio->stripes[dev_nr].physical, dev_nr, rw, | 5899 | bbio->stripes[dev_nr].physical, dev_nr, rw, |
5911 | async_submit); | 5900 | async_submit); |
5912 | dev_nr++; | ||
5913 | } | 5901 | } |
5914 | btrfs_bio_counter_dec(root->fs_info); | 5902 | btrfs_bio_counter_dec(root->fs_info); |
5915 | return 0; | 5903 | return 0; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 83069dec6898..ebc31331a837 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -421,8 +421,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
421 | int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | 421 | int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, |
422 | struct btrfs_fs_devices **fs_devices_ret); | 422 | struct btrfs_fs_devices **fs_devices_ret); |
423 | int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); | 423 | int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); |
424 | void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, | 424 | void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step); |
425 | struct btrfs_fs_devices *fs_devices, int step); | ||
426 | int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, | 425 | int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, |
427 | char *device_path, | 426 | char *device_path, |
428 | struct btrfs_device **device); | 427 | struct btrfs_device **device); |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 883b93623bc5..45ea704be030 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -364,22 +364,42 @@ const struct xattr_handler *btrfs_xattr_handlers[] = { | |||
364 | /* | 364 | /* |
365 | * Check if the attribute is in a supported namespace. | 365 | * Check if the attribute is in a supported namespace. |
366 | * | 366 | * |
367 | * This applied after the check for the synthetic attributes in the system | 367 | * This is applied after the check for the synthetic attributes in the system |
368 | * namespace. | 368 | * namespace. |
369 | */ | 369 | */ |
370 | static bool btrfs_is_valid_xattr(const char *name) | 370 | static int btrfs_is_valid_xattr(const char *name) |
371 | { | 371 | { |
372 | return !strncmp(name, XATTR_SECURITY_PREFIX, | 372 | int len = strlen(name); |
373 | XATTR_SECURITY_PREFIX_LEN) || | 373 | int prefixlen = 0; |
374 | !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || | 374 | |
375 | !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || | 375 | if (!strncmp(name, XATTR_SECURITY_PREFIX, |
376 | !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) || | 376 | XATTR_SECURITY_PREFIX_LEN)) |
377 | !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN); | 377 | prefixlen = XATTR_SECURITY_PREFIX_LEN; |
378 | else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | ||
379 | prefixlen = XATTR_SYSTEM_PREFIX_LEN; | ||
380 | else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) | ||
381 | prefixlen = XATTR_TRUSTED_PREFIX_LEN; | ||
382 | else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) | ||
383 | prefixlen = XATTR_USER_PREFIX_LEN; | ||
384 | else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) | ||
385 | prefixlen = XATTR_BTRFS_PREFIX_LEN; | ||
386 | else | ||
387 | return -EOPNOTSUPP; | ||
388 | |||
389 | /* | ||
390 | * The name cannot consist of just prefix | ||
391 | */ | ||
392 | if (len <= prefixlen) | ||
393 | return -EINVAL; | ||
394 | |||
395 | return 0; | ||
378 | } | 396 | } |
379 | 397 | ||
380 | ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, | 398 | ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, |
381 | void *buffer, size_t size) | 399 | void *buffer, size_t size) |
382 | { | 400 | { |
401 | int ret; | ||
402 | |||
383 | /* | 403 | /* |
384 | * If this is a request for a synthetic attribute in the system.* | 404 | * If this is a request for a synthetic attribute in the system.* |
385 | * namespace use the generic infrastructure to resolve a handler | 405 | * namespace use the generic infrastructure to resolve a handler |
@@ -388,8 +408,9 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, | |||
388 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | 408 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) |
389 | return generic_getxattr(dentry, name, buffer, size); | 409 | return generic_getxattr(dentry, name, buffer, size); |
390 | 410 | ||
391 | if (!btrfs_is_valid_xattr(name)) | 411 | ret = btrfs_is_valid_xattr(name); |
392 | return -EOPNOTSUPP; | 412 | if (ret) |
413 | return ret; | ||
393 | return __btrfs_getxattr(dentry->d_inode, name, buffer, size); | 414 | return __btrfs_getxattr(dentry->d_inode, name, buffer, size); |
394 | } | 415 | } |
395 | 416 | ||
@@ -397,6 +418,7 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
397 | size_t size, int flags) | 418 | size_t size, int flags) |
398 | { | 419 | { |
399 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | 420 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; |
421 | int ret; | ||
400 | 422 | ||
401 | /* | 423 | /* |
402 | * The permission on security.* and system.* is not checked | 424 | * The permission on security.* and system.* is not checked |
@@ -413,8 +435,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
413 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | 435 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) |
414 | return generic_setxattr(dentry, name, value, size, flags); | 436 | return generic_setxattr(dentry, name, value, size, flags); |
415 | 437 | ||
416 | if (!btrfs_is_valid_xattr(name)) | 438 | ret = btrfs_is_valid_xattr(name); |
417 | return -EOPNOTSUPP; | 439 | if (ret) |
440 | return ret; | ||
418 | 441 | ||
419 | if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) | 442 | if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) |
420 | return btrfs_set_prop(dentry->d_inode, name, | 443 | return btrfs_set_prop(dentry->d_inode, name, |
@@ -430,6 +453,7 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
430 | int btrfs_removexattr(struct dentry *dentry, const char *name) | 453 | int btrfs_removexattr(struct dentry *dentry, const char *name) |
431 | { | 454 | { |
432 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | 455 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; |
456 | int ret; | ||
433 | 457 | ||
434 | /* | 458 | /* |
435 | * The permission on security.* and system.* is not checked | 459 | * The permission on security.* and system.* is not checked |
@@ -446,8 +470,9 @@ int btrfs_removexattr(struct dentry *dentry, const char *name) | |||
446 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | 470 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) |
447 | return generic_removexattr(dentry, name); | 471 | return generic_removexattr(dentry, name); |
448 | 472 | ||
449 | if (!btrfs_is_valid_xattr(name)) | 473 | ret = btrfs_is_valid_xattr(name); |
450 | return -EOPNOTSUPP; | 474 | if (ret) |
475 | return ret; | ||
451 | 476 | ||
452 | if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) | 477 | if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) |
453 | return btrfs_set_prop(dentry->d_inode, name, | 478 | return btrfs_set_prop(dentry->d_inode, name, |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index fb22fd8d8fb8..82990b8f872b 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -403,7 +403,7 @@ next: | |||
403 | return ret; | 403 | return ret; |
404 | } | 404 | } |
405 | 405 | ||
406 | struct btrfs_compress_op btrfs_zlib_compress = { | 406 | const struct btrfs_compress_op btrfs_zlib_compress = { |
407 | .alloc_workspace = zlib_alloc_workspace, | 407 | .alloc_workspace = zlib_alloc_workspace, |
408 | .free_workspace = zlib_free_workspace, | 408 | .free_workspace = zlib_free_workspace, |
409 | .compress_pages = zlib_compress_pages, | 409 | .compress_pages = zlib_compress_pages, |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 155ab9c0246b..e162bcd105ee 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, | |||
1146 | inode, page, (int)pos, (int)len); | 1146 | inode, page, (int)pos, (int)len); |
1147 | 1147 | ||
1148 | r = ceph_update_writeable_page(file, pos, len, page); | 1148 | r = ceph_update_writeable_page(file, pos, len, page); |
1149 | if (r < 0) | ||
1150 | page_cache_release(page); | ||
1151 | else | ||
1152 | *pagep = page; | ||
1149 | } while (r == -EAGAIN); | 1153 | } while (r == -EAGAIN); |
1150 | 1154 | ||
1151 | return r; | 1155 | return r; |
@@ -1534,19 +1538,27 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) | |||
1534 | 1538 | ||
1535 | osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); | 1539 | osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); |
1536 | 1540 | ||
1537 | err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, | 1541 | { |
1538 | "inline_version", &inline_version, | 1542 | __le64 xattr_buf = cpu_to_le64(inline_version); |
1539 | sizeof(inline_version), | 1543 | err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, |
1540 | CEPH_OSD_CMPXATTR_OP_GT, | 1544 | "inline_version", &xattr_buf, |
1541 | CEPH_OSD_CMPXATTR_MODE_U64); | 1545 | sizeof(xattr_buf), |
1542 | if (err) | 1546 | CEPH_OSD_CMPXATTR_OP_GT, |
1543 | goto out_put; | 1547 | CEPH_OSD_CMPXATTR_MODE_U64); |
1544 | 1548 | if (err) | |
1545 | err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, | 1549 | goto out_put; |
1546 | "inline_version", &inline_version, | 1550 | } |
1547 | sizeof(inline_version), 0, 0); | 1551 | |
1548 | if (err) | 1552 | { |
1549 | goto out_put; | 1553 | char xattr_buf[32]; |
1554 | int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf), | ||
1555 | "%llu", inline_version); | ||
1556 | err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, | ||
1557 | "inline_version", | ||
1558 | xattr_buf, xattr_len, 0, 0); | ||
1559 | if (err) | ||
1560 | goto out_put; | ||
1561 | } | ||
1550 | 1562 | ||
1551 | ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); | 1563 | ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); |
1552 | err = ceph_osdc_start_request(&fsc->client->osdc, req, false); | 1564 | err = ceph_osdc_start_request(&fsc->client->osdc, req, false); |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 8172775428a0..11631c4c7d14 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode) | |||
896 | return ret; | 896 | return ret; |
897 | } | 897 | } |
898 | 898 | ||
899 | static void drop_inode_snap_realm(struct ceph_inode_info *ci) | ||
900 | { | ||
901 | struct ceph_snap_realm *realm = ci->i_snap_realm; | ||
902 | spin_lock(&realm->inodes_with_caps_lock); | ||
903 | list_del_init(&ci->i_snap_realm_item); | ||
904 | ci->i_snap_realm_counter++; | ||
905 | ci->i_snap_realm = NULL; | ||
906 | spin_unlock(&realm->inodes_with_caps_lock); | ||
907 | ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc, | ||
908 | realm); | ||
909 | } | ||
910 | |||
899 | /* | 911 | /* |
900 | * Remove a cap. Take steps to deal with a racing iterate_session_caps. | 912 | * Remove a cap. Take steps to deal with a racing iterate_session_caps. |
901 | * | 913 | * |
@@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) | |||
946 | if (removed) | 958 | if (removed) |
947 | ceph_put_cap(mdsc, cap); | 959 | ceph_put_cap(mdsc, cap); |
948 | 960 | ||
949 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { | 961 | /* when reconnect denied, we remove session caps forcibly, |
950 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 962 | * i_wr_ref can be non-zero. If there are ongoing write, |
951 | spin_lock(&realm->inodes_with_caps_lock); | 963 | * keep i_snap_realm. |
952 | list_del_init(&ci->i_snap_realm_item); | 964 | */ |
953 | ci->i_snap_realm_counter++; | 965 | if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm) |
954 | ci->i_snap_realm = NULL; | 966 | drop_inode_snap_realm(ci); |
955 | spin_unlock(&realm->inodes_with_caps_lock); | 967 | |
956 | ceph_put_snap_realm(mdsc, realm); | ||
957 | } | ||
958 | if (!__ceph_is_any_real_caps(ci)) | 968 | if (!__ceph_is_any_real_caps(ci)) |
959 | __cap_delay_cancel(mdsc, ci); | 969 | __cap_delay_cancel(mdsc, ci); |
960 | } | 970 | } |
@@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1394 | int was = ci->i_dirty_caps; | 1404 | int was = ci->i_dirty_caps; |
1395 | int dirty = 0; | 1405 | int dirty = 0; |
1396 | 1406 | ||
1407 | if (!ci->i_auth_cap) { | ||
1408 | pr_warn("__mark_dirty_caps %p %llx mask %s, " | ||
1409 | "but no auth cap (session was closed?)\n", | ||
1410 | inode, ceph_ino(inode), ceph_cap_string(mask)); | ||
1411 | return 0; | ||
1412 | } | ||
1413 | |||
1397 | dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode, | 1414 | dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode, |
1398 | ceph_cap_string(mask), ceph_cap_string(was), | 1415 | ceph_cap_string(mask), ceph_cap_string(was), |
1399 | ceph_cap_string(was | mask)); | 1416 | ceph_cap_string(was | mask)); |
@@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1404 | ci->i_snap_realm->cached_context); | 1421 | ci->i_snap_realm->cached_context); |
1405 | dout(" inode %p now dirty snapc %p auth cap %p\n", | 1422 | dout(" inode %p now dirty snapc %p auth cap %p\n", |
1406 | &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); | 1423 | &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); |
1407 | WARN_ON(!ci->i_auth_cap); | ||
1408 | BUG_ON(!list_empty(&ci->i_dirty_item)); | 1424 | BUG_ON(!list_empty(&ci->i_dirty_item)); |
1409 | spin_lock(&mdsc->cap_dirty_lock); | 1425 | spin_lock(&mdsc->cap_dirty_lock); |
1410 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); | 1426 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); |
@@ -1545,7 +1561,19 @@ retry_locked: | |||
1545 | if (!mdsc->stopping && inode->i_nlink > 0) { | 1561 | if (!mdsc->stopping && inode->i_nlink > 0) { |
1546 | if (want) { | 1562 | if (want) { |
1547 | retain |= CEPH_CAP_ANY; /* be greedy */ | 1563 | retain |= CEPH_CAP_ANY; /* be greedy */ |
1564 | } else if (S_ISDIR(inode->i_mode) && | ||
1565 | (issued & CEPH_CAP_FILE_SHARED) && | ||
1566 | __ceph_dir_is_complete(ci)) { | ||
1567 | /* | ||
1568 | * If a directory is complete, we want to keep | ||
1569 | * the exclusive cap. So that MDS does not end up | ||
1570 | * revoking the shared cap on every create/unlink | ||
1571 | * operation. | ||
1572 | */ | ||
1573 | want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL; | ||
1574 | retain |= want; | ||
1548 | } else { | 1575 | } else { |
1576 | |||
1549 | retain |= CEPH_CAP_ANY_SHARED; | 1577 | retain |= CEPH_CAP_ANY_SHARED; |
1550 | /* | 1578 | /* |
1551 | * keep RD only if we didn't have the file open RW, | 1579 | * keep RD only if we didn't have the file open RW, |
@@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) | |||
2309 | wake = 1; | 2337 | wake = 1; |
2310 | } | 2338 | } |
2311 | } | 2339 | } |
2340 | /* see comment in __ceph_remove_cap() */ | ||
2341 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) | ||
2342 | drop_inode_snap_realm(ci); | ||
2312 | } | 2343 | } |
2313 | spin_unlock(&ci->i_ceph_lock); | 2344 | spin_unlock(&ci->i_ceph_lock); |
2314 | 2345 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 83e9976f7189..e729b79812b4 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -281,6 +281,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) | |||
281 | /* can we use the dcache? */ | 281 | /* can we use the dcache? */ |
282 | spin_lock(&ci->i_ceph_lock); | 282 | spin_lock(&ci->i_ceph_lock); |
283 | if ((ctx->pos == 2 || fi->dentry) && | 283 | if ((ctx->pos == 2 || fi->dentry) && |
284 | ceph_test_mount_opt(fsc, DCACHE) && | ||
284 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && | 285 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && |
285 | ceph_snap(inode) != CEPH_SNAPDIR && | 286 | ceph_snap(inode) != CEPH_SNAPDIR && |
286 | __ceph_dir_is_complete_ordered(ci) && | 287 | __ceph_dir_is_complete_ordered(ci) && |
@@ -336,16 +337,23 @@ more: | |||
336 | ceph_mdsc_put_request(req); | 337 | ceph_mdsc_put_request(req); |
337 | return err; | 338 | return err; |
338 | } | 339 | } |
339 | req->r_inode = inode; | ||
340 | ihold(inode); | ||
341 | req->r_dentry = dget(file->f_path.dentry); | ||
342 | /* hints to request -> mds selection code */ | 340 | /* hints to request -> mds selection code */ |
343 | req->r_direct_mode = USE_AUTH_MDS; | 341 | req->r_direct_mode = USE_AUTH_MDS; |
344 | req->r_direct_hash = ceph_frag_value(frag); | 342 | req->r_direct_hash = ceph_frag_value(frag); |
345 | req->r_direct_is_hash = true; | 343 | req->r_direct_is_hash = true; |
346 | req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); | 344 | if (fi->last_name) { |
345 | req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); | ||
346 | if (!req->r_path2) { | ||
347 | ceph_mdsc_put_request(req); | ||
348 | return -ENOMEM; | ||
349 | } | ||
350 | } | ||
347 | req->r_readdir_offset = fi->next_offset; | 351 | req->r_readdir_offset = fi->next_offset; |
348 | req->r_args.readdir.frag = cpu_to_le32(frag); | 352 | req->r_args.readdir.frag = cpu_to_le32(frag); |
353 | |||
354 | req->r_inode = inode; | ||
355 | ihold(inode); | ||
356 | req->r_dentry = dget(file->f_path.dentry); | ||
349 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 357 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
350 | if (err < 0) { | 358 | if (err < 0) { |
351 | ceph_mdsc_put_request(req); | 359 | ceph_mdsc_put_request(req); |
@@ -629,6 +637,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
629 | fsc->mount_options->snapdir_name, | 637 | fsc->mount_options->snapdir_name, |
630 | dentry->d_name.len) && | 638 | dentry->d_name.len) && |
631 | !is_root_ceph_dentry(dir, dentry) && | 639 | !is_root_ceph_dentry(dir, dentry) && |
640 | ceph_test_mount_opt(fsc, DCACHE) && | ||
632 | __ceph_dir_is_complete(ci) && | 641 | __ceph_dir_is_complete(ci) && |
633 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { | 642 | (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { |
634 | spin_unlock(&ci->i_ceph_lock); | 643 | spin_unlock(&ci->i_ceph_lock); |
@@ -755,10 +764,15 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, | |||
755 | err = PTR_ERR(req); | 764 | err = PTR_ERR(req); |
756 | goto out; | 765 | goto out; |
757 | } | 766 | } |
758 | req->r_dentry = dget(dentry); | ||
759 | req->r_num_caps = 2; | ||
760 | req->r_path2 = kstrdup(dest, GFP_NOFS); | 767 | req->r_path2 = kstrdup(dest, GFP_NOFS); |
768 | if (!req->r_path2) { | ||
769 | err = -ENOMEM; | ||
770 | ceph_mdsc_put_request(req); | ||
771 | goto out; | ||
772 | } | ||
761 | req->r_locked_dir = dir; | 773 | req->r_locked_dir = dir; |
774 | req->r_dentry = dget(dentry); | ||
775 | req->r_num_caps = 2; | ||
762 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; | 776 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; |
763 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; | 777 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
764 | err = ceph_mdsc_do_request(mdsc, dir, req); | 778 | err = ceph_mdsc_do_request(mdsc, dir, req); |
@@ -933,16 +947,20 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
933 | struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); | 947 | struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); |
934 | struct ceph_mds_client *mdsc = fsc->mdsc; | 948 | struct ceph_mds_client *mdsc = fsc->mdsc; |
935 | struct ceph_mds_request *req; | 949 | struct ceph_mds_request *req; |
950 | int op = CEPH_MDS_OP_RENAME; | ||
936 | int err; | 951 | int err; |
937 | 952 | ||
938 | if (ceph_snap(old_dir) != ceph_snap(new_dir)) | 953 | if (ceph_snap(old_dir) != ceph_snap(new_dir)) |
939 | return -EXDEV; | 954 | return -EXDEV; |
940 | if (ceph_snap(old_dir) != CEPH_NOSNAP || | 955 | if (ceph_snap(old_dir) != CEPH_NOSNAP) { |
941 | ceph_snap(new_dir) != CEPH_NOSNAP) | 956 | if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR) |
942 | return -EROFS; | 957 | op = CEPH_MDS_OP_RENAMESNAP; |
958 | else | ||
959 | return -EROFS; | ||
960 | } | ||
943 | dout("rename dir %p dentry %p to dir %p dentry %p\n", | 961 | dout("rename dir %p dentry %p to dir %p dentry %p\n", |
944 | old_dir, old_dentry, new_dir, new_dentry); | 962 | old_dir, old_dentry, new_dir, new_dentry); |
945 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); | 963 | req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); |
946 | if (IS_ERR(req)) | 964 | if (IS_ERR(req)) |
947 | return PTR_ERR(req); | 965 | return PTR_ERR(req); |
948 | ihold(old_dir); | 966 | ihold(old_dir); |
@@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, | |||
1240 | dout("dir_fsync %p wait on tid %llu (until %llu)\n", | 1258 | dout("dir_fsync %p wait on tid %llu (until %llu)\n", |
1241 | inode, req->r_tid, last_tid); | 1259 | inode, req->r_tid, last_tid); |
1242 | if (req->r_timeout) { | 1260 | if (req->r_timeout) { |
1243 | ret = wait_for_completion_timeout( | 1261 | unsigned long time_left = wait_for_completion_timeout( |
1244 | &req->r_safe_completion, req->r_timeout); | 1262 | &req->r_safe_completion, |
1245 | if (ret > 0) | 1263 | req->r_timeout); |
1264 | if (time_left > 0) | ||
1246 | ret = 0; | 1265 | ret = 0; |
1247 | else if (ret == 0) | 1266 | else |
1248 | ret = -EIO; /* timed out */ | 1267 | ret = -EIO; /* timed out */ |
1249 | } else { | 1268 | } else { |
1250 | wait_for_completion(&req->r_safe_completion); | 1269 | wait_for_completion(&req->r_safe_completion); |
@@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = { | |||
1372 | .getattr = ceph_getattr, | 1391 | .getattr = ceph_getattr, |
1373 | .mkdir = ceph_mkdir, | 1392 | .mkdir = ceph_mkdir, |
1374 | .rmdir = ceph_unlink, | 1393 | .rmdir = ceph_unlink, |
1394 | .rename = ceph_rename, | ||
1375 | }; | 1395 | }; |
1376 | 1396 | ||
1377 | const struct dentry_operations ceph_dentry_ops = { | 1397 | const struct dentry_operations ceph_dentry_ops = { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 71c073f38e54..0a2eb32ffe43 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -1021,6 +1021,33 @@ static void cleanup_cap_releases(struct ceph_mds_session *session) | |||
1021 | spin_unlock(&session->s_cap_lock); | 1021 | spin_unlock(&session->s_cap_lock); |
1022 | } | 1022 | } |
1023 | 1023 | ||
1024 | static void cleanup_session_requests(struct ceph_mds_client *mdsc, | ||
1025 | struct ceph_mds_session *session) | ||
1026 | { | ||
1027 | struct ceph_mds_request *req; | ||
1028 | struct rb_node *p; | ||
1029 | |||
1030 | dout("cleanup_session_requests mds%d\n", session->s_mds); | ||
1031 | mutex_lock(&mdsc->mutex); | ||
1032 | while (!list_empty(&session->s_unsafe)) { | ||
1033 | req = list_first_entry(&session->s_unsafe, | ||
1034 | struct ceph_mds_request, r_unsafe_item); | ||
1035 | list_del_init(&req->r_unsafe_item); | ||
1036 | pr_info(" dropping unsafe request %llu\n", req->r_tid); | ||
1037 | __unregister_request(mdsc, req); | ||
1038 | } | ||
1039 | /* zero r_attempts, so kick_requests() will re-send requests */ | ||
1040 | p = rb_first(&mdsc->request_tree); | ||
1041 | while (p) { | ||
1042 | req = rb_entry(p, struct ceph_mds_request, r_node); | ||
1043 | p = rb_next(p); | ||
1044 | if (req->r_session && | ||
1045 | req->r_session->s_mds == session->s_mds) | ||
1046 | req->r_attempts = 0; | ||
1047 | } | ||
1048 | mutex_unlock(&mdsc->mutex); | ||
1049 | } | ||
1050 | |||
1024 | /* | 1051 | /* |
1025 | * Helper to safely iterate over all caps associated with a session, with | 1052 | * Helper to safely iterate over all caps associated with a session, with |
1026 | * special care taken to handle a racing __ceph_remove_cap(). | 1053 | * special care taken to handle a racing __ceph_remove_cap(). |
@@ -1098,7 +1125,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1098 | cap, ci, &ci->vfs_inode); | 1125 | cap, ci, &ci->vfs_inode); |
1099 | spin_lock(&ci->i_ceph_lock); | 1126 | spin_lock(&ci->i_ceph_lock); |
1100 | __ceph_remove_cap(cap, false); | 1127 | __ceph_remove_cap(cap, false); |
1101 | if (!__ceph_is_any_real_caps(ci)) { | 1128 | if (!ci->i_auth_cap) { |
1102 | struct ceph_mds_client *mdsc = | 1129 | struct ceph_mds_client *mdsc = |
1103 | ceph_sb_to_client(inode->i_sb)->mdsc; | 1130 | ceph_sb_to_client(inode->i_sb)->mdsc; |
1104 | 1131 | ||
@@ -1120,13 +1147,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
1120 | mdsc->num_cap_flushing--; | 1147 | mdsc->num_cap_flushing--; |
1121 | drop = 1; | 1148 | drop = 1; |
1122 | } | 1149 | } |
1123 | if (drop && ci->i_wrbuffer_ref) { | ||
1124 | pr_info(" dropping dirty data for %p %lld\n", | ||
1125 | inode, ceph_ino(inode)); | ||
1126 | ci->i_wrbuffer_ref = 0; | ||
1127 | ci->i_wrbuffer_ref_head = 0; | ||
1128 | drop++; | ||
1129 | } | ||
1130 | spin_unlock(&mdsc->cap_dirty_lock); | 1150 | spin_unlock(&mdsc->cap_dirty_lock); |
1131 | } | 1151 | } |
1132 | spin_unlock(&ci->i_ceph_lock); | 1152 | spin_unlock(&ci->i_ceph_lock); |
@@ -1853,7 +1873,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, | |||
1853 | */ | 1873 | */ |
1854 | static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | 1874 | static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, |
1855 | struct ceph_mds_request *req, | 1875 | struct ceph_mds_request *req, |
1856 | int mds) | 1876 | int mds, bool drop_cap_releases) |
1857 | { | 1877 | { |
1858 | struct ceph_msg *msg; | 1878 | struct ceph_msg *msg; |
1859 | struct ceph_mds_request_head *head; | 1879 | struct ceph_mds_request_head *head; |
@@ -1937,6 +1957,12 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1937 | releases += ceph_encode_inode_release(&p, | 1957 | releases += ceph_encode_inode_release(&p, |
1938 | req->r_old_dentry->d_inode, | 1958 | req->r_old_dentry->d_inode, |
1939 | mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); | 1959 | mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); |
1960 | |||
1961 | if (drop_cap_releases) { | ||
1962 | releases = 0; | ||
1963 | p = msg->front.iov_base + req->r_request_release_offset; | ||
1964 | } | ||
1965 | |||
1940 | head->num_releases = cpu_to_le16(releases); | 1966 | head->num_releases = cpu_to_le16(releases); |
1941 | 1967 | ||
1942 | /* time stamp */ | 1968 | /* time stamp */ |
@@ -1989,7 +2015,7 @@ static void complete_request(struct ceph_mds_client *mdsc, | |||
1989 | */ | 2015 | */ |
1990 | static int __prepare_send_request(struct ceph_mds_client *mdsc, | 2016 | static int __prepare_send_request(struct ceph_mds_client *mdsc, |
1991 | struct ceph_mds_request *req, | 2017 | struct ceph_mds_request *req, |
1992 | int mds) | 2018 | int mds, bool drop_cap_releases) |
1993 | { | 2019 | { |
1994 | struct ceph_mds_request_head *rhead; | 2020 | struct ceph_mds_request_head *rhead; |
1995 | struct ceph_msg *msg; | 2021 | struct ceph_msg *msg; |
@@ -2048,7 +2074,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
2048 | ceph_msg_put(req->r_request); | 2074 | ceph_msg_put(req->r_request); |
2049 | req->r_request = NULL; | 2075 | req->r_request = NULL; |
2050 | } | 2076 | } |
2051 | msg = create_request_message(mdsc, req, mds); | 2077 | msg = create_request_message(mdsc, req, mds, drop_cap_releases); |
2052 | if (IS_ERR(msg)) { | 2078 | if (IS_ERR(msg)) { |
2053 | req->r_err = PTR_ERR(msg); | 2079 | req->r_err = PTR_ERR(msg); |
2054 | complete_request(mdsc, req); | 2080 | complete_request(mdsc, req); |
@@ -2132,7 +2158,7 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
2132 | if (req->r_request_started == 0) /* note request start time */ | 2158 | if (req->r_request_started == 0) /* note request start time */ |
2133 | req->r_request_started = jiffies; | 2159 | req->r_request_started = jiffies; |
2134 | 2160 | ||
2135 | err = __prepare_send_request(mdsc, req, mds); | 2161 | err = __prepare_send_request(mdsc, req, mds, false); |
2136 | if (!err) { | 2162 | if (!err) { |
2137 | ceph_msg_get(req->r_request); | 2163 | ceph_msg_get(req->r_request); |
2138 | ceph_con_send(&session->s_con, req->r_request); | 2164 | ceph_con_send(&session->s_con, req->r_request); |
@@ -2590,6 +2616,7 @@ static void handle_session(struct ceph_mds_session *session, | |||
2590 | case CEPH_SESSION_CLOSE: | 2616 | case CEPH_SESSION_CLOSE: |
2591 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) | 2617 | if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) |
2592 | pr_info("mds%d reconnect denied\n", session->s_mds); | 2618 | pr_info("mds%d reconnect denied\n", session->s_mds); |
2619 | cleanup_session_requests(mdsc, session); | ||
2593 | remove_session_caps(session); | 2620 | remove_session_caps(session); |
2594 | wake = 2; /* for good measure */ | 2621 | wake = 2; /* for good measure */ |
2595 | wake_up_all(&mdsc->session_close_wq); | 2622 | wake_up_all(&mdsc->session_close_wq); |
@@ -2658,7 +2685,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
2658 | 2685 | ||
2659 | mutex_lock(&mdsc->mutex); | 2686 | mutex_lock(&mdsc->mutex); |
2660 | list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) { | 2687 | list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) { |
2661 | err = __prepare_send_request(mdsc, req, session->s_mds); | 2688 | err = __prepare_send_request(mdsc, req, session->s_mds, true); |
2662 | if (!err) { | 2689 | if (!err) { |
2663 | ceph_msg_get(req->r_request); | 2690 | ceph_msg_get(req->r_request); |
2664 | ceph_con_send(&session->s_con, req->r_request); | 2691 | ceph_con_send(&session->s_con, req->r_request); |
@@ -2679,7 +2706,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
2679 | continue; /* only old requests */ | 2706 | continue; /* only old requests */ |
2680 | if (req->r_session && | 2707 | if (req->r_session && |
2681 | req->r_session->s_mds == session->s_mds) { | 2708 | req->r_session->s_mds == session->s_mds) { |
2682 | err = __prepare_send_request(mdsc, req, session->s_mds); | 2709 | err = __prepare_send_request(mdsc, req, |
2710 | session->s_mds, true); | ||
2683 | if (!err) { | 2711 | if (!err) { |
2684 | ceph_msg_get(req->r_request); | 2712 | ceph_msg_get(req->r_request); |
2685 | ceph_con_send(&session->s_con, req->r_request); | 2713 | ceph_con_send(&session->s_con, req->r_request); |
@@ -2864,7 +2892,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2864 | spin_unlock(&session->s_cap_lock); | 2892 | spin_unlock(&session->s_cap_lock); |
2865 | 2893 | ||
2866 | /* trim unused caps to reduce MDS's cache rejoin time */ | 2894 | /* trim unused caps to reduce MDS's cache rejoin time */ |
2867 | shrink_dcache_parent(mdsc->fsc->sb->s_root); | 2895 | if (mdsc->fsc->sb->s_root) |
2896 | shrink_dcache_parent(mdsc->fsc->sb->s_root); | ||
2868 | 2897 | ||
2869 | ceph_con_close(&session->s_con); | 2898 | ceph_con_close(&session->s_con); |
2870 | ceph_con_open(&session->s_con, | 2899 | ceph_con_open(&session->s_con, |
@@ -3133,7 +3162,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
3133 | di->lease_renew_from && | 3162 | di->lease_renew_from && |
3134 | di->lease_renew_after == 0) { | 3163 | di->lease_renew_after == 0) { |
3135 | unsigned long duration = | 3164 | unsigned long duration = |
3136 | le32_to_cpu(h->duration_ms) * HZ / 1000; | 3165 | msecs_to_jiffies(le32_to_cpu(h->duration_ms)); |
3137 | 3166 | ||
3138 | di->lease_seq = seq; | 3167 | di->lease_seq = seq; |
3139 | dentry->d_time = di->lease_renew_from + duration; | 3168 | dentry->d_time = di->lease_renew_from + duration; |
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index 51cc23e48111..89e6bc321df3 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c | |||
@@ -75,6 +75,7 @@ const char *ceph_mds_op_name(int op) | |||
75 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; | 75 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; |
76 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; | 76 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; |
77 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; | 77 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; |
78 | case CEPH_MDS_OP_RENAMESNAP: return "renamesnap"; | ||
78 | case CEPH_MDS_OP_SETFILELOCK: return "setfilelock"; | 79 | case CEPH_MDS_OP_SETFILELOCK: return "setfilelock"; |
79 | case CEPH_MDS_OP_GETFILELOCK: return "getfilelock"; | 80 | case CEPH_MDS_OP_GETFILELOCK: return "getfilelock"; |
80 | } | 81 | } |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index a63997b8bcff..e463ebd69a9c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -345,6 +345,11 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, | |||
345 | fsopt->rsize = CEPH_RSIZE_DEFAULT; | 345 | fsopt->rsize = CEPH_RSIZE_DEFAULT; |
346 | fsopt->rasize = CEPH_RASIZE_DEFAULT; | 346 | fsopt->rasize = CEPH_RASIZE_DEFAULT; |
347 | fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | 347 | fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); |
348 | if (!fsopt->snapdir_name) { | ||
349 | err = -ENOMEM; | ||
350 | goto out; | ||
351 | } | ||
352 | |||
348 | fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; | 353 | fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; |
349 | fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | 354 | fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; |
350 | fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; | 355 | fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; |
@@ -406,31 +411,20 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) | |||
406 | { | 411 | { |
407 | struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); | 412 | struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); |
408 | struct ceph_mount_options *fsopt = fsc->mount_options; | 413 | struct ceph_mount_options *fsopt = fsc->mount_options; |
409 | struct ceph_options *opt = fsc->client->options; | 414 | size_t pos; |
410 | 415 | int ret; | |
411 | if (opt->flags & CEPH_OPT_FSID) | 416 | |
412 | seq_printf(m, ",fsid=%pU", &opt->fsid); | 417 | /* a comma between MNT/MS and client options */ |
413 | if (opt->flags & CEPH_OPT_NOSHARE) | 418 | seq_putc(m, ','); |
414 | seq_puts(m, ",noshare"); | 419 | pos = m->count; |
415 | if (opt->flags & CEPH_OPT_NOCRC) | 420 | |
416 | seq_puts(m, ",nocrc"); | 421 | ret = ceph_print_client_options(m, fsc->client); |
417 | if (opt->flags & CEPH_OPT_NOMSGAUTH) | 422 | if (ret) |
418 | seq_puts(m, ",nocephx_require_signatures"); | 423 | return ret; |
419 | if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) | 424 | |
420 | seq_puts(m, ",notcp_nodelay"); | 425 | /* retract our comma if no client options */ |
421 | 426 | if (m->count == pos) | |
422 | if (opt->name) | 427 | m->count--; |
423 | seq_printf(m, ",name=%s", opt->name); | ||
424 | if (opt->key) | ||
425 | seq_puts(m, ",secret=<hidden>"); | ||
426 | |||
427 | if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
428 | seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); | ||
429 | if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
430 | seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); | ||
431 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
432 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
433 | opt->osd_keepalive_timeout); | ||
434 | 428 | ||
435 | if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) | 429 | if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) |
436 | seq_puts(m, ",dirstat"); | 430 | seq_puts(m, ",dirstat"); |
@@ -438,14 +432,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) | |||
438 | seq_puts(m, ",norbytes"); | 432 | seq_puts(m, ",norbytes"); |
439 | if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) | 433 | if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) |
440 | seq_puts(m, ",noasyncreaddir"); | 434 | seq_puts(m, ",noasyncreaddir"); |
441 | if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE) | 435 | if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) |
442 | seq_puts(m, ",dcache"); | ||
443 | else | ||
444 | seq_puts(m, ",nodcache"); | 436 | seq_puts(m, ",nodcache"); |
445 | if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) | 437 | if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) |
446 | seq_puts(m, ",fsc"); | 438 | seq_puts(m, ",fsc"); |
447 | else | ||
448 | seq_puts(m, ",nofsc"); | ||
449 | 439 | ||
450 | #ifdef CONFIG_CEPH_FS_POSIX_ACL | 440 | #ifdef CONFIG_CEPH_FS_POSIX_ACL |
451 | if (fsopt->sb_flags & MS_POSIXACL) | 441 | if (fsopt->sb_flags & MS_POSIXACL) |
@@ -477,6 +467,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) | |||
477 | seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); | 467 | seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); |
478 | if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | 468 | if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) |
479 | seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); | 469 | seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); |
470 | |||
480 | return 0; | 471 | return 0; |
481 | } | 472 | } |
482 | 473 | ||
@@ -730,6 +721,11 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, | |||
730 | if (IS_ERR(req)) | 721 | if (IS_ERR(req)) |
731 | return ERR_CAST(req); | 722 | return ERR_CAST(req); |
732 | req->r_path1 = kstrdup(path, GFP_NOFS); | 723 | req->r_path1 = kstrdup(path, GFP_NOFS); |
724 | if (!req->r_path1) { | ||
725 | root = ERR_PTR(-ENOMEM); | ||
726 | goto out; | ||
727 | } | ||
728 | |||
733 | req->r_ino1.ino = CEPH_INO_ROOT; | 729 | req->r_ino1.ino = CEPH_INO_ROOT; |
734 | req->r_ino1.snap = CEPH_NOSNAP; | 730 | req->r_ino1.snap = CEPH_NOSNAP; |
735 | req->r_started = started; | 731 | req->r_started = started; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 04c8124ed30e..fa20e1318939 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -36,7 +36,8 @@ | |||
36 | #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ | 36 | #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ |
37 | #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ | 37 | #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ |
38 | 38 | ||
39 | #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) | 39 | #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \ |
40 | CEPH_MOUNT_OPT_DCACHE) | ||
40 | 41 | ||
41 | #define ceph_set_mount_opt(fsc, opt) \ | 42 | #define ceph_set_mount_opt(fsc, opt) \ |
42 | (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; | 43 | (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; |
@@ -881,7 +882,6 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); | |||
881 | 882 | ||
882 | /* file.c */ | 883 | /* file.c */ |
883 | extern const struct file_operations ceph_file_fops; | 884 | extern const struct file_operations ceph_file_fops; |
884 | extern const struct address_space_operations ceph_aops; | ||
885 | 885 | ||
886 | extern int ceph_open(struct inode *inode, struct file *file); | 886 | extern int ceph_open(struct inode *inode, struct file *file); |
887 | extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, | 887 | extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 5a492caf34cb..5c4c9c256931 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -877,16 +877,23 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
877 | err = PTR_ERR(req); | 877 | err = PTR_ERR(req); |
878 | goto out; | 878 | goto out; |
879 | } | 879 | } |
880 | req->r_inode = inode; | 880 | |
881 | ihold(inode); | ||
882 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; | ||
883 | req->r_num_caps = 1; | ||
884 | req->r_args.setxattr.flags = cpu_to_le32(flags); | 881 | req->r_args.setxattr.flags = cpu_to_le32(flags); |
885 | req->r_path2 = kstrdup(name, GFP_NOFS); | 882 | req->r_path2 = kstrdup(name, GFP_NOFS); |
883 | if (!req->r_path2) { | ||
884 | ceph_mdsc_put_request(req); | ||
885 | err = -ENOMEM; | ||
886 | goto out; | ||
887 | } | ||
886 | 888 | ||
887 | req->r_pagelist = pagelist; | 889 | req->r_pagelist = pagelist; |
888 | pagelist = NULL; | 890 | pagelist = NULL; |
889 | 891 | ||
892 | req->r_inode = inode; | ||
893 | ihold(inode); | ||
894 | req->r_num_caps = 1; | ||
895 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; | ||
896 | |||
890 | dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); | 897 | dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); |
891 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 898 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
892 | ceph_mdsc_put_request(req); | 899 | ceph_mdsc_put_request(req); |
@@ -1019,12 +1026,14 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) | |||
1019 | USE_AUTH_MDS); | 1026 | USE_AUTH_MDS); |
1020 | if (IS_ERR(req)) | 1027 | if (IS_ERR(req)) |
1021 | return PTR_ERR(req); | 1028 | return PTR_ERR(req); |
1029 | req->r_path2 = kstrdup(name, GFP_NOFS); | ||
1030 | if (!req->r_path2) | ||
1031 | return -ENOMEM; | ||
1032 | |||
1022 | req->r_inode = inode; | 1033 | req->r_inode = inode; |
1023 | ihold(inode); | 1034 | ihold(inode); |
1024 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; | ||
1025 | req->r_num_caps = 1; | 1035 | req->r_num_caps = 1; |
1026 | req->r_path2 = kstrdup(name, GFP_NOFS); | 1036 | req->r_inode_drop = CEPH_CAP_XATTR_SHARED; |
1027 | |||
1028 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 1037 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
1029 | ceph_mdsc_put_request(req); | 1038 | ceph_mdsc_put_request(req); |
1030 | return err; | 1039 | return err; |
@@ -1275,6 +1275,53 @@ static void check_unsafe_exec(struct linux_binprm *bprm) | |||
1275 | spin_unlock(&p->fs->lock); | 1275 | spin_unlock(&p->fs->lock); |
1276 | } | 1276 | } |
1277 | 1277 | ||
1278 | static void bprm_fill_uid(struct linux_binprm *bprm) | ||
1279 | { | ||
1280 | struct inode *inode; | ||
1281 | unsigned int mode; | ||
1282 | kuid_t uid; | ||
1283 | kgid_t gid; | ||
1284 | |||
1285 | /* clear any previous set[ug]id data from a previous binary */ | ||
1286 | bprm->cred->euid = current_euid(); | ||
1287 | bprm->cred->egid = current_egid(); | ||
1288 | |||
1289 | if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) | ||
1290 | return; | ||
1291 | |||
1292 | if (task_no_new_privs(current)) | ||
1293 | return; | ||
1294 | |||
1295 | inode = file_inode(bprm->file); | ||
1296 | mode = READ_ONCE(inode->i_mode); | ||
1297 | if (!(mode & (S_ISUID|S_ISGID))) | ||
1298 | return; | ||
1299 | |||
1300 | /* Be careful if suid/sgid is set */ | ||
1301 | mutex_lock(&inode->i_mutex); | ||
1302 | |||
1303 | /* reload atomically mode/uid/gid now that lock held */ | ||
1304 | mode = inode->i_mode; | ||
1305 | uid = inode->i_uid; | ||
1306 | gid = inode->i_gid; | ||
1307 | mutex_unlock(&inode->i_mutex); | ||
1308 | |||
1309 | /* We ignore suid/sgid if there are no mappings for them in the ns */ | ||
1310 | if (!kuid_has_mapping(bprm->cred->user_ns, uid) || | ||
1311 | !kgid_has_mapping(bprm->cred->user_ns, gid)) | ||
1312 | return; | ||
1313 | |||
1314 | if (mode & S_ISUID) { | ||
1315 | bprm->per_clear |= PER_CLEAR_ON_SETID; | ||
1316 | bprm->cred->euid = uid; | ||
1317 | } | ||
1318 | |||
1319 | if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { | ||
1320 | bprm->per_clear |= PER_CLEAR_ON_SETID; | ||
1321 | bprm->cred->egid = gid; | ||
1322 | } | ||
1323 | } | ||
1324 | |||
1278 | /* | 1325 | /* |
1279 | * Fill the binprm structure from the inode. | 1326 | * Fill the binprm structure from the inode. |
1280 | * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes | 1327 | * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes |
@@ -1283,36 +1330,9 @@ static void check_unsafe_exec(struct linux_binprm *bprm) | |||
1283 | */ | 1330 | */ |
1284 | int prepare_binprm(struct linux_binprm *bprm) | 1331 | int prepare_binprm(struct linux_binprm *bprm) |
1285 | { | 1332 | { |
1286 | struct inode *inode = file_inode(bprm->file); | ||
1287 | umode_t mode = inode->i_mode; | ||
1288 | int retval; | 1333 | int retval; |
1289 | 1334 | ||
1290 | 1335 | bprm_fill_uid(bprm); | |
1291 | /* clear any previous set[ug]id data from a previous binary */ | ||
1292 | bprm->cred->euid = current_euid(); | ||
1293 | bprm->cred->egid = current_egid(); | ||
1294 | |||
1295 | if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && | ||
1296 | !task_no_new_privs(current) && | ||
1297 | kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && | ||
1298 | kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { | ||
1299 | /* Set-uid? */ | ||
1300 | if (mode & S_ISUID) { | ||
1301 | bprm->per_clear |= PER_CLEAR_ON_SETID; | ||
1302 | bprm->cred->euid = inode->i_uid; | ||
1303 | } | ||
1304 | |||
1305 | /* Set-gid? */ | ||
1306 | /* | ||
1307 | * If setgid is set but no group execute bit then this | ||
1308 | * is a candidate for mandatory locking, not a setgid | ||
1309 | * executable. | ||
1310 | */ | ||
1311 | if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { | ||
1312 | bprm->per_clear |= PER_CLEAR_ON_SETID; | ||
1313 | bprm->cred->egid = inode->i_gid; | ||
1314 | } | ||
1315 | } | ||
1316 | 1336 | ||
1317 | /* fill in binprm security blob */ | 1337 | /* fill in binprm security blob */ |
1318 | retval = security_bprm_set_creds(bprm); | 1338 | retval = security_bprm_set_creds(bprm); |
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index efea5d5c44ce..18228c201f7f 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig | |||
@@ -64,6 +64,23 @@ config EXT4_FS_SECURITY | |||
64 | If you are not using a security module that requires using | 64 | If you are not using a security module that requires using |
65 | extended attributes for file security labels, say N. | 65 | extended attributes for file security labels, say N. |
66 | 66 | ||
67 | config EXT4_FS_ENCRYPTION | ||
68 | bool "Ext4 Encryption" | ||
69 | depends on EXT4_FS | ||
70 | select CRYPTO_AES | ||
71 | select CRYPTO_CBC | ||
72 | select CRYPTO_ECB | ||
73 | select CRYPTO_XTS | ||
74 | select CRYPTO_CTS | ||
75 | select CRYPTO_SHA256 | ||
76 | select KEYS | ||
77 | select ENCRYPTED_KEYS | ||
78 | help | ||
79 | Enable encryption of ext4 files and directories. This | ||
80 | feature is similar to ecryptfs, but it is more memory | ||
81 | efficient since it avoids caching the encrypted and | ||
82 | decrypted pages in the page cache. | ||
83 | |||
67 | config EXT4_DEBUG | 84 | config EXT4_DEBUG |
68 | bool "EXT4 debugging support" | 85 | bool "EXT4 debugging support" |
69 | depends on EXT4_FS | 86 | depends on EXT4_FS |
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 0310fec2ee3d..75285ea9aa05 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -8,7 +8,9 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ | |||
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ | 9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ |
10 | mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ | 10 | mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ |
11 | xattr_trusted.o inline.o | 11 | xattr_trusted.o inline.o readpage.o |
12 | 12 | ||
13 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o | 13 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
14 | ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o | 14 | ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o |
15 | ext4-$(CONFIG_EXT4_FS_ENCRYPTION) += crypto_policy.o crypto.o \ | ||
16 | crypto_key.o crypto_fname.o | ||
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index d40c8dbbb0d6..69b1e73026a5 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -4,11 +4,6 @@ | |||
4 | * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> | 4 | * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/slab.h> | ||
10 | #include <linux/capability.h> | ||
11 | #include <linux/fs.h> | ||
12 | #include "ext4_jbd2.h" | 7 | #include "ext4_jbd2.h" |
13 | #include "ext4.h" | 8 | #include "ext4.h" |
14 | #include "xattr.h" | 9 | #include "xattr.h" |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 83a6f497c4e0..955bf49a7945 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/time.h> | 14 | #include <linux/time.h> |
15 | #include <linux/capability.h> | 15 | #include <linux/capability.h> |
16 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
17 | #include <linux/jbd2.h> | ||
18 | #include <linux/quotaops.h> | 17 | #include <linux/quotaops.h> |
19 | #include <linux/buffer_head.h> | 18 | #include <linux/buffer_head.h> |
20 | #include "ext4.h" | 19 | #include "ext4.h" |
@@ -641,8 +640,6 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
641 | * fail EDQUOT for metdata, but we do account for it. | 640 | * fail EDQUOT for metdata, but we do account for it. |
642 | */ | 641 | */ |
643 | if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) { | 642 | if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) { |
644 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
645 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
646 | dquot_alloc_block_nofail(inode, | 643 | dquot_alloc_block_nofail(inode, |
647 | EXT4_C2B(EXT4_SB(inode->i_sb), ar.len)); | 644 | EXT4_C2B(EXT4_SB(inode->i_sb), ar.len)); |
648 | } | 645 | } |
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index b610779a958c..4a606afb171f 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c | |||
@@ -8,7 +8,6 @@ | |||
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/buffer_head.h> | 10 | #include <linux/buffer_head.h> |
11 | #include <linux/jbd2.h> | ||
12 | #include "ext4.h" | 11 | #include "ext4.h" |
13 | 12 | ||
14 | unsigned int ext4_count_free(char *bitmap, unsigned int numchars) | 13 | unsigned int ext4_count_free(char *bitmap, unsigned int numchars) |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 41eb9dcfac7e..3522340c7a99 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/swap.h> | 16 | #include <linux/swap.h> |
17 | #include <linux/pagemap.h> | 17 | #include <linux/pagemap.h> |
18 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
19 | #include <linux/mutex.h> | ||
20 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
21 | #include "ext4.h" | 20 | #include "ext4.h" |
22 | 21 | ||
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c new file mode 100644 index 000000000000..8ff15273ab0c --- /dev/null +++ b/fs/ext4/crypto.c | |||
@@ -0,0 +1,558 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/crypto.c | ||
3 | * | ||
4 | * Copyright (C) 2015, Google, Inc. | ||
5 | * | ||
6 | * This contains encryption functions for ext4 | ||
7 | * | ||
8 | * Written by Michael Halcrow, 2014. | ||
9 | * | ||
10 | * Filename encryption additions | ||
11 | * Uday Savagaonkar, 2014 | ||
12 | * Encryption policy handling additions | ||
13 | * Ildar Muslukhov, 2014 | ||
14 | * | ||
15 | * This has not yet undergone a rigorous security audit. | ||
16 | * | ||
17 | * The usage of AES-XTS should conform to recommendations in NIST | ||
18 | * Special Publication 800-38E and IEEE P1619/D16. | ||
19 | */ | ||
20 | |||
21 | #include <crypto/hash.h> | ||
22 | #include <crypto/sha.h> | ||
23 | #include <keys/user-type.h> | ||
24 | #include <keys/encrypted-type.h> | ||
25 | #include <linux/crypto.h> | ||
26 | #include <linux/ecryptfs.h> | ||
27 | #include <linux/gfp.h> | ||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/key.h> | ||
30 | #include <linux/list.h> | ||
31 | #include <linux/mempool.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/mutex.h> | ||
34 | #include <linux/random.h> | ||
35 | #include <linux/scatterlist.h> | ||
36 | #include <linux/spinlock_types.h> | ||
37 | |||
38 | #include "ext4_extents.h" | ||
39 | #include "xattr.h" | ||
40 | |||
41 | /* Encryption added and removed here! (L: */ | ||
42 | |||
43 | static unsigned int num_prealloc_crypto_pages = 32; | ||
44 | static unsigned int num_prealloc_crypto_ctxs = 128; | ||
45 | |||
46 | module_param(num_prealloc_crypto_pages, uint, 0444); | ||
47 | MODULE_PARM_DESC(num_prealloc_crypto_pages, | ||
48 | "Number of crypto pages to preallocate"); | ||
49 | module_param(num_prealloc_crypto_ctxs, uint, 0444); | ||
50 | MODULE_PARM_DESC(num_prealloc_crypto_ctxs, | ||
51 | "Number of crypto contexts to preallocate"); | ||
52 | |||
53 | static mempool_t *ext4_bounce_page_pool; | ||
54 | |||
55 | static LIST_HEAD(ext4_free_crypto_ctxs); | ||
56 | static DEFINE_SPINLOCK(ext4_crypto_ctx_lock); | ||
57 | |||
58 | /** | ||
59 | * ext4_release_crypto_ctx() - Releases an encryption context | ||
60 | * @ctx: The encryption context to release. | ||
61 | * | ||
62 | * If the encryption context was allocated from the pre-allocated pool, returns | ||
63 | * it to that pool. Else, frees it. | ||
64 | * | ||
65 | * If there's a bounce page in the context, this frees that. | ||
66 | */ | ||
67 | void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx) | ||
68 | { | ||
69 | unsigned long flags; | ||
70 | |||
71 | if (ctx->bounce_page) { | ||
72 | if (ctx->flags & EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) | ||
73 | __free_page(ctx->bounce_page); | ||
74 | else | ||
75 | mempool_free(ctx->bounce_page, ext4_bounce_page_pool); | ||
76 | ctx->bounce_page = NULL; | ||
77 | } | ||
78 | ctx->control_page = NULL; | ||
79 | if (ctx->flags & EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL) { | ||
80 | if (ctx->tfm) | ||
81 | crypto_free_tfm(ctx->tfm); | ||
82 | kfree(ctx); | ||
83 | } else { | ||
84 | spin_lock_irqsave(&ext4_crypto_ctx_lock, flags); | ||
85 | list_add(&ctx->free_list, &ext4_free_crypto_ctxs); | ||
86 | spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags); | ||
87 | } | ||
88 | } | ||
89 | |||
90 | /** | ||
91 | * ext4_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context | ||
92 | * @mask: The allocation mask. | ||
93 | * | ||
94 | * Return: An allocated and initialized encryption context on success. An error | ||
95 | * value or NULL otherwise. | ||
96 | */ | ||
97 | static struct ext4_crypto_ctx *ext4_alloc_and_init_crypto_ctx(gfp_t mask) | ||
98 | { | ||
99 | struct ext4_crypto_ctx *ctx = kzalloc(sizeof(struct ext4_crypto_ctx), | ||
100 | mask); | ||
101 | |||
102 | if (!ctx) | ||
103 | return ERR_PTR(-ENOMEM); | ||
104 | return ctx; | ||
105 | } | ||
106 | |||
107 | /** | ||
108 | * ext4_get_crypto_ctx() - Gets an encryption context | ||
109 | * @inode: The inode for which we are doing the crypto | ||
110 | * | ||
111 | * Allocates and initializes an encryption context. | ||
112 | * | ||
113 | * Return: An allocated and initialized encryption context on success; error | ||
114 | * value or NULL otherwise. | ||
115 | */ | ||
116 | struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode) | ||
117 | { | ||
118 | struct ext4_crypto_ctx *ctx = NULL; | ||
119 | int res = 0; | ||
120 | unsigned long flags; | ||
121 | struct ext4_encryption_key *key = &EXT4_I(inode)->i_encryption_key; | ||
122 | |||
123 | if (!ext4_read_workqueue) | ||
124 | ext4_init_crypto(); | ||
125 | |||
126 | /* | ||
127 | * We first try getting the ctx from a free list because in | ||
128 | * the common case the ctx will have an allocated and | ||
129 | * initialized crypto tfm, so it's probably a worthwhile | ||
130 | * optimization. For the bounce page, we first try getting it | ||
131 | * from the kernel allocator because that's just about as fast | ||
132 | * as getting it from a list and because a cache of free pages | ||
133 | * should generally be a "last resort" option for a filesystem | ||
134 | * to be able to do its job. | ||
135 | */ | ||
136 | spin_lock_irqsave(&ext4_crypto_ctx_lock, flags); | ||
137 | ctx = list_first_entry_or_null(&ext4_free_crypto_ctxs, | ||
138 | struct ext4_crypto_ctx, free_list); | ||
139 | if (ctx) | ||
140 | list_del(&ctx->free_list); | ||
141 | spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags); | ||
142 | if (!ctx) { | ||
143 | ctx = ext4_alloc_and_init_crypto_ctx(GFP_NOFS); | ||
144 | if (IS_ERR(ctx)) { | ||
145 | res = PTR_ERR(ctx); | ||
146 | goto out; | ||
147 | } | ||
148 | ctx->flags |= EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL; | ||
149 | } else { | ||
150 | ctx->flags &= ~EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL; | ||
151 | } | ||
152 | |||
153 | /* Allocate a new Crypto API context if we don't already have | ||
154 | * one or if it isn't the right mode. */ | ||
155 | BUG_ON(key->mode == EXT4_ENCRYPTION_MODE_INVALID); | ||
156 | if (ctx->tfm && (ctx->mode != key->mode)) { | ||
157 | crypto_free_tfm(ctx->tfm); | ||
158 | ctx->tfm = NULL; | ||
159 | ctx->mode = EXT4_ENCRYPTION_MODE_INVALID; | ||
160 | } | ||
161 | if (!ctx->tfm) { | ||
162 | switch (key->mode) { | ||
163 | case EXT4_ENCRYPTION_MODE_AES_256_XTS: | ||
164 | ctx->tfm = crypto_ablkcipher_tfm( | ||
165 | crypto_alloc_ablkcipher("xts(aes)", 0, 0)); | ||
166 | break; | ||
167 | case EXT4_ENCRYPTION_MODE_AES_256_GCM: | ||
168 | /* TODO(mhalcrow): AEAD w/ gcm(aes); | ||
169 | * crypto_aead_setauthsize() */ | ||
170 | ctx->tfm = ERR_PTR(-ENOTSUPP); | ||
171 | break; | ||
172 | default: | ||
173 | BUG(); | ||
174 | } | ||
175 | if (IS_ERR_OR_NULL(ctx->tfm)) { | ||
176 | res = PTR_ERR(ctx->tfm); | ||
177 | ctx->tfm = NULL; | ||
178 | goto out; | ||
179 | } | ||
180 | ctx->mode = key->mode; | ||
181 | } | ||
182 | BUG_ON(key->size != ext4_encryption_key_size(key->mode)); | ||
183 | |||
184 | /* There shouldn't be a bounce page attached to the crypto | ||
185 | * context at this point. */ | ||
186 | BUG_ON(ctx->bounce_page); | ||
187 | |||
188 | out: | ||
189 | if (res) { | ||
190 | if (!IS_ERR_OR_NULL(ctx)) | ||
191 | ext4_release_crypto_ctx(ctx); | ||
192 | ctx = ERR_PTR(res); | ||
193 | } | ||
194 | return ctx; | ||
195 | } | ||
196 | |||
197 | struct workqueue_struct *ext4_read_workqueue; | ||
198 | static DEFINE_MUTEX(crypto_init); | ||
199 | |||
200 | /** | ||
201 | * ext4_exit_crypto() - Shutdown the ext4 encryption system | ||
202 | */ | ||
203 | void ext4_exit_crypto(void) | ||
204 | { | ||
205 | struct ext4_crypto_ctx *pos, *n; | ||
206 | |||
207 | list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list) { | ||
208 | if (pos->bounce_page) { | ||
209 | if (pos->flags & | ||
210 | EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) { | ||
211 | __free_page(pos->bounce_page); | ||
212 | } else { | ||
213 | mempool_free(pos->bounce_page, | ||
214 | ext4_bounce_page_pool); | ||
215 | } | ||
216 | } | ||
217 | if (pos->tfm) | ||
218 | crypto_free_tfm(pos->tfm); | ||
219 | kfree(pos); | ||
220 | } | ||
221 | INIT_LIST_HEAD(&ext4_free_crypto_ctxs); | ||
222 | if (ext4_bounce_page_pool) | ||
223 | mempool_destroy(ext4_bounce_page_pool); | ||
224 | ext4_bounce_page_pool = NULL; | ||
225 | if (ext4_read_workqueue) | ||
226 | destroy_workqueue(ext4_read_workqueue); | ||
227 | ext4_read_workqueue = NULL; | ||
228 | } | ||
229 | |||
230 | /** | ||
231 | * ext4_init_crypto() - Set up for ext4 encryption. | ||
232 | * | ||
233 | * We only call this when we start accessing encrypted files, since it | ||
234 | * results in memory getting allocated that wouldn't otherwise be used. | ||
235 | * | ||
236 | * Return: Zero on success, non-zero otherwise. | ||
237 | */ | ||
238 | int ext4_init_crypto(void) | ||
239 | { | ||
240 | int i, res; | ||
241 | |||
242 | mutex_lock(&crypto_init); | ||
243 | if (ext4_read_workqueue) | ||
244 | goto already_initialized; | ||
245 | ext4_read_workqueue = alloc_workqueue("ext4_crypto", WQ_HIGHPRI, 0); | ||
246 | if (!ext4_read_workqueue) { | ||
247 | res = -ENOMEM; | ||
248 | goto fail; | ||
249 | } | ||
250 | |||
251 | for (i = 0; i < num_prealloc_crypto_ctxs; i++) { | ||
252 | struct ext4_crypto_ctx *ctx; | ||
253 | |||
254 | ctx = ext4_alloc_and_init_crypto_ctx(GFP_KERNEL); | ||
255 | if (IS_ERR(ctx)) { | ||
256 | res = PTR_ERR(ctx); | ||
257 | goto fail; | ||
258 | } | ||
259 | list_add(&ctx->free_list, &ext4_free_crypto_ctxs); | ||
260 | } | ||
261 | |||
262 | ext4_bounce_page_pool = | ||
263 | mempool_create_page_pool(num_prealloc_crypto_pages, 0); | ||
264 | if (!ext4_bounce_page_pool) { | ||
265 | res = -ENOMEM; | ||
266 | goto fail; | ||
267 | } | ||
268 | already_initialized: | ||
269 | mutex_unlock(&crypto_init); | ||
270 | return 0; | ||
271 | fail: | ||
272 | ext4_exit_crypto(); | ||
273 | mutex_unlock(&crypto_init); | ||
274 | return res; | ||
275 | } | ||
276 | |||
277 | void ext4_restore_control_page(struct page *data_page) | ||
278 | { | ||
279 | struct ext4_crypto_ctx *ctx = | ||
280 | (struct ext4_crypto_ctx *)page_private(data_page); | ||
281 | |||
282 | set_page_private(data_page, (unsigned long)NULL); | ||
283 | ClearPagePrivate(data_page); | ||
284 | unlock_page(data_page); | ||
285 | ext4_release_crypto_ctx(ctx); | ||
286 | } | ||
287 | |||
288 | /** | ||
289 | * ext4_crypt_complete() - The completion callback for page encryption | ||
290 | * @req: The asynchronous encryption request context | ||
291 | * @res: The result of the encryption operation | ||
292 | */ | ||
293 | static void ext4_crypt_complete(struct crypto_async_request *req, int res) | ||
294 | { | ||
295 | struct ext4_completion_result *ecr = req->data; | ||
296 | |||
297 | if (res == -EINPROGRESS) | ||
298 | return; | ||
299 | ecr->res = res; | ||
300 | complete(&ecr->completion); | ||
301 | } | ||
302 | |||
303 | typedef enum { | ||
304 | EXT4_DECRYPT = 0, | ||
305 | EXT4_ENCRYPT, | ||
306 | } ext4_direction_t; | ||
307 | |||
308 | static int ext4_page_crypto(struct ext4_crypto_ctx *ctx, | ||
309 | struct inode *inode, | ||
310 | ext4_direction_t rw, | ||
311 | pgoff_t index, | ||
312 | struct page *src_page, | ||
313 | struct page *dest_page) | ||
314 | |||
315 | { | ||
316 | u8 xts_tweak[EXT4_XTS_TWEAK_SIZE]; | ||
317 | struct ablkcipher_request *req = NULL; | ||
318 | DECLARE_EXT4_COMPLETION_RESULT(ecr); | ||
319 | struct scatterlist dst, src; | ||
320 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
321 | struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm); | ||
322 | int res = 0; | ||
323 | |||
324 | BUG_ON(!ctx->tfm); | ||
325 | BUG_ON(ctx->mode != ei->i_encryption_key.mode); | ||
326 | |||
327 | if (ctx->mode != EXT4_ENCRYPTION_MODE_AES_256_XTS) { | ||
328 | printk_ratelimited(KERN_ERR | ||
329 | "%s: unsupported crypto algorithm: %d\n", | ||
330 | __func__, ctx->mode); | ||
331 | return -ENOTSUPP; | ||
332 | } | ||
333 | |||
334 | crypto_ablkcipher_clear_flags(atfm, ~0); | ||
335 | crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY); | ||
336 | |||
337 | res = crypto_ablkcipher_setkey(atfm, ei->i_encryption_key.raw, | ||
338 | ei->i_encryption_key.size); | ||
339 | if (res) { | ||
340 | printk_ratelimited(KERN_ERR | ||
341 | "%s: crypto_ablkcipher_setkey() failed\n", | ||
342 | __func__); | ||
343 | return res; | ||
344 | } | ||
345 | req = ablkcipher_request_alloc(atfm, GFP_NOFS); | ||
346 | if (!req) { | ||
347 | printk_ratelimited(KERN_ERR | ||
348 | "%s: crypto_request_alloc() failed\n", | ||
349 | __func__); | ||
350 | return -ENOMEM; | ||
351 | } | ||
352 | ablkcipher_request_set_callback( | ||
353 | req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, | ||
354 | ext4_crypt_complete, &ecr); | ||
355 | |||
356 | BUILD_BUG_ON(EXT4_XTS_TWEAK_SIZE < sizeof(index)); | ||
357 | memcpy(xts_tweak, &index, sizeof(index)); | ||
358 | memset(&xts_tweak[sizeof(index)], 0, | ||
359 | EXT4_XTS_TWEAK_SIZE - sizeof(index)); | ||
360 | |||
361 | sg_init_table(&dst, 1); | ||
362 | sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0); | ||
363 | sg_init_table(&src, 1); | ||
364 | sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0); | ||
365 | ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE, | ||
366 | xts_tweak); | ||
367 | if (rw == EXT4_DECRYPT) | ||
368 | res = crypto_ablkcipher_decrypt(req); | ||
369 | else | ||
370 | res = crypto_ablkcipher_encrypt(req); | ||
371 | if (res == -EINPROGRESS || res == -EBUSY) { | ||
372 | BUG_ON(req->base.data != &ecr); | ||
373 | wait_for_completion(&ecr.completion); | ||
374 | res = ecr.res; | ||
375 | } | ||
376 | ablkcipher_request_free(req); | ||
377 | if (res) { | ||
378 | printk_ratelimited( | ||
379 | KERN_ERR | ||
380 | "%s: crypto_ablkcipher_encrypt() returned %d\n", | ||
381 | __func__, res); | ||
382 | return res; | ||
383 | } | ||
384 | return 0; | ||
385 | } | ||
386 | |||
387 | /** | ||
388 | * ext4_encrypt() - Encrypts a page | ||
389 | * @inode: The inode for which the encryption should take place | ||
390 | * @plaintext_page: The page to encrypt. Must be locked. | ||
391 | * | ||
392 | * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx | ||
393 | * encryption context. | ||
394 | * | ||
395 | * Called on the page write path. The caller must call | ||
396 | * ext4_restore_control_page() on the returned ciphertext page to | ||
397 | * release the bounce buffer and the encryption context. | ||
398 | * | ||
399 | * Return: An allocated page with the encrypted content on success. Else, an | ||
400 | * error value or NULL. | ||
401 | */ | ||
402 | struct page *ext4_encrypt(struct inode *inode, | ||
403 | struct page *plaintext_page) | ||
404 | { | ||
405 | struct ext4_crypto_ctx *ctx; | ||
406 | struct page *ciphertext_page = NULL; | ||
407 | int err; | ||
408 | |||
409 | BUG_ON(!PageLocked(plaintext_page)); | ||
410 | |||
411 | ctx = ext4_get_crypto_ctx(inode); | ||
412 | if (IS_ERR(ctx)) | ||
413 | return (struct page *) ctx; | ||
414 | |||
415 | /* The encryption operation will require a bounce page. */ | ||
416 | ciphertext_page = alloc_page(GFP_NOFS); | ||
417 | if (!ciphertext_page) { | ||
418 | /* This is a potential bottleneck, but at least we'll have | ||
419 | * forward progress. */ | ||
420 | ciphertext_page = mempool_alloc(ext4_bounce_page_pool, | ||
421 | GFP_NOFS); | ||
422 | if (WARN_ON_ONCE(!ciphertext_page)) { | ||
423 | ciphertext_page = mempool_alloc(ext4_bounce_page_pool, | ||
424 | GFP_NOFS | __GFP_WAIT); | ||
425 | } | ||
426 | ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; | ||
427 | } else { | ||
428 | ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; | ||
429 | } | ||
430 | ctx->bounce_page = ciphertext_page; | ||
431 | ctx->control_page = plaintext_page; | ||
432 | err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, plaintext_page->index, | ||
433 | plaintext_page, ciphertext_page); | ||
434 | if (err) { | ||
435 | ext4_release_crypto_ctx(ctx); | ||
436 | return ERR_PTR(err); | ||
437 | } | ||
438 | SetPagePrivate(ciphertext_page); | ||
439 | set_page_private(ciphertext_page, (unsigned long)ctx); | ||
440 | lock_page(ciphertext_page); | ||
441 | return ciphertext_page; | ||
442 | } | ||
443 | |||
444 | /** | ||
445 | * ext4_decrypt() - Decrypts a page in-place | ||
446 | * @ctx: The encryption context. | ||
447 | * @page: The page to decrypt. Must be locked. | ||
448 | * | ||
449 | * Decrypts page in-place using the ctx encryption context. | ||
450 | * | ||
451 | * Called from the read completion callback. | ||
452 | * | ||
453 | * Return: Zero on success, non-zero otherwise. | ||
454 | */ | ||
455 | int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page) | ||
456 | { | ||
457 | BUG_ON(!PageLocked(page)); | ||
458 | |||
459 | return ext4_page_crypto(ctx, page->mapping->host, | ||
460 | EXT4_DECRYPT, page->index, page, page); | ||
461 | } | ||
462 | |||
463 | /* | ||
464 | * Convenience function which takes care of allocating and | ||
465 | * deallocating the encryption context | ||
466 | */ | ||
467 | int ext4_decrypt_one(struct inode *inode, struct page *page) | ||
468 | { | ||
469 | int ret; | ||
470 | |||
471 | struct ext4_crypto_ctx *ctx = ext4_get_crypto_ctx(inode); | ||
472 | |||
473 | if (!ctx) | ||
474 | return -ENOMEM; | ||
475 | ret = ext4_decrypt(ctx, page); | ||
476 | ext4_release_crypto_ctx(ctx); | ||
477 | return ret; | ||
478 | } | ||
479 | |||
480 | int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) | ||
481 | { | ||
482 | struct ext4_crypto_ctx *ctx; | ||
483 | struct page *ciphertext_page = NULL; | ||
484 | struct bio *bio; | ||
485 | ext4_lblk_t lblk = ex->ee_block; | ||
486 | ext4_fsblk_t pblk = ext4_ext_pblock(ex); | ||
487 | unsigned int len = ext4_ext_get_actual_len(ex); | ||
488 | int err = 0; | ||
489 | |||
490 | BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE); | ||
491 | |||
492 | ctx = ext4_get_crypto_ctx(inode); | ||
493 | if (IS_ERR(ctx)) | ||
494 | return PTR_ERR(ctx); | ||
495 | |||
496 | ciphertext_page = alloc_page(GFP_NOFS); | ||
497 | if (!ciphertext_page) { | ||
498 | /* This is a potential bottleneck, but at least we'll have | ||
499 | * forward progress. */ | ||
500 | ciphertext_page = mempool_alloc(ext4_bounce_page_pool, | ||
501 | GFP_NOFS); | ||
502 | if (WARN_ON_ONCE(!ciphertext_page)) { | ||
503 | ciphertext_page = mempool_alloc(ext4_bounce_page_pool, | ||
504 | GFP_NOFS | __GFP_WAIT); | ||
505 | } | ||
506 | ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; | ||
507 | } else { | ||
508 | ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL; | ||
509 | } | ||
510 | ctx->bounce_page = ciphertext_page; | ||
511 | |||
512 | while (len--) { | ||
513 | err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, lblk, | ||
514 | ZERO_PAGE(0), ciphertext_page); | ||
515 | if (err) | ||
516 | goto errout; | ||
517 | |||
518 | bio = bio_alloc(GFP_KERNEL, 1); | ||
519 | if (!bio) { | ||
520 | err = -ENOMEM; | ||
521 | goto errout; | ||
522 | } | ||
523 | bio->bi_bdev = inode->i_sb->s_bdev; | ||
524 | bio->bi_iter.bi_sector = pblk; | ||
525 | err = bio_add_page(bio, ciphertext_page, | ||
526 | inode->i_sb->s_blocksize, 0); | ||
527 | if (err) { | ||
528 | bio_put(bio); | ||
529 | goto errout; | ||
530 | } | ||
531 | err = submit_bio_wait(WRITE, bio); | ||
532 | if (err) | ||
533 | goto errout; | ||
534 | } | ||
535 | err = 0; | ||
536 | errout: | ||
537 | ext4_release_crypto_ctx(ctx); | ||
538 | return err; | ||
539 | } | ||
540 | |||
541 | bool ext4_valid_contents_enc_mode(uint32_t mode) | ||
542 | { | ||
543 | return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS); | ||
544 | } | ||
545 | |||
546 | /** | ||
547 | * ext4_validate_encryption_key_size() - Validate the encryption key size | ||
548 | * @mode: The key mode. | ||
549 | * @size: The key size to validate. | ||
550 | * | ||
551 | * Return: The validated key size for @mode. Zero if invalid. | ||
552 | */ | ||
553 | uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size) | ||
554 | { | ||
555 | if (size == ext4_encryption_key_size(mode)) | ||
556 | return size; | ||
557 | return 0; | ||
558 | } | ||
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c new file mode 100644 index 000000000000..ca2f5948c1ac --- /dev/null +++ b/fs/ext4/crypto_fname.c | |||
@@ -0,0 +1,709 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/crypto_fname.c | ||
3 | * | ||
4 | * Copyright (C) 2015, Google, Inc. | ||
5 | * | ||
6 | * This contains functions for filename crypto management in ext4 | ||
7 | * | ||
8 | * Written by Uday Savagaonkar, 2014. | ||
9 | * | ||
10 | * This has not yet undergone a rigorous security audit. | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #include <crypto/hash.h> | ||
15 | #include <crypto/sha.h> | ||
16 | #include <keys/encrypted-type.h> | ||
17 | #include <keys/user-type.h> | ||
18 | #include <linux/crypto.h> | ||
19 | #include <linux/gfp.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/key.h> | ||
22 | #include <linux/key.h> | ||
23 | #include <linux/list.h> | ||
24 | #include <linux/mempool.h> | ||
25 | #include <linux/random.h> | ||
26 | #include <linux/scatterlist.h> | ||
27 | #include <linux/spinlock_types.h> | ||
28 | |||
29 | #include "ext4.h" | ||
30 | #include "ext4_crypto.h" | ||
31 | #include "xattr.h" | ||
32 | |||
33 | /** | ||
34 | * ext4_dir_crypt_complete() - | ||
35 | */ | ||
36 | static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res) | ||
37 | { | ||
38 | struct ext4_completion_result *ecr = req->data; | ||
39 | |||
40 | if (res == -EINPROGRESS) | ||
41 | return; | ||
42 | ecr->res = res; | ||
43 | complete(&ecr->completion); | ||
44 | } | ||
45 | |||
46 | bool ext4_valid_filenames_enc_mode(uint32_t mode) | ||
47 | { | ||
48 | return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS); | ||
49 | } | ||
50 | |||
51 | /** | ||
52 | * ext4_fname_encrypt() - | ||
53 | * | ||
54 | * This function encrypts the input filename, and returns the length of the | ||
55 | * ciphertext. Errors are returned as negative numbers. We trust the caller to | ||
56 | * allocate sufficient memory to oname string. | ||
57 | */ | ||
58 | static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx, | ||
59 | const struct qstr *iname, | ||
60 | struct ext4_str *oname) | ||
61 | { | ||
62 | u32 ciphertext_len; | ||
63 | struct ablkcipher_request *req = NULL; | ||
64 | DECLARE_EXT4_COMPLETION_RESULT(ecr); | ||
65 | struct crypto_ablkcipher *tfm = ctx->ctfm; | ||
66 | int res = 0; | ||
67 | char iv[EXT4_CRYPTO_BLOCK_SIZE]; | ||
68 | struct scatterlist sg[1]; | ||
69 | char *workbuf; | ||
70 | |||
71 | if (iname->len <= 0 || iname->len > ctx->lim) | ||
72 | return -EIO; | ||
73 | |||
74 | ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ? | ||
75 | EXT4_CRYPTO_BLOCK_SIZE : iname->len; | ||
76 | ciphertext_len = (ciphertext_len > ctx->lim) | ||
77 | ? ctx->lim : ciphertext_len; | ||
78 | |||
79 | /* Allocate request */ | ||
80 | req = ablkcipher_request_alloc(tfm, GFP_NOFS); | ||
81 | if (!req) { | ||
82 | printk_ratelimited( | ||
83 | KERN_ERR "%s: crypto_request_alloc() failed\n", __func__); | ||
84 | return -ENOMEM; | ||
85 | } | ||
86 | ablkcipher_request_set_callback(req, | ||
87 | CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, | ||
88 | ext4_dir_crypt_complete, &ecr); | ||
89 | |||
90 | /* Map the workpage */ | ||
91 | workbuf = kmap(ctx->workpage); | ||
92 | |||
93 | /* Copy the input */ | ||
94 | memcpy(workbuf, iname->name, iname->len); | ||
95 | if (iname->len < ciphertext_len) | ||
96 | memset(workbuf + iname->len, 0, ciphertext_len - iname->len); | ||
97 | |||
98 | /* Initialize IV */ | ||
99 | memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE); | ||
100 | |||
101 | /* Create encryption request */ | ||
102 | sg_init_table(sg, 1); | ||
103 | sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0); | ||
104 | ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv); | ||
105 | res = crypto_ablkcipher_encrypt(req); | ||
106 | if (res == -EINPROGRESS || res == -EBUSY) { | ||
107 | BUG_ON(req->base.data != &ecr); | ||
108 | wait_for_completion(&ecr.completion); | ||
109 | res = ecr.res; | ||
110 | } | ||
111 | if (res >= 0) { | ||
112 | /* Copy the result to output */ | ||
113 | memcpy(oname->name, workbuf, ciphertext_len); | ||
114 | res = ciphertext_len; | ||
115 | } | ||
116 | kunmap(ctx->workpage); | ||
117 | ablkcipher_request_free(req); | ||
118 | if (res < 0) { | ||
119 | printk_ratelimited( | ||
120 | KERN_ERR "%s: Error (error code %d)\n", __func__, res); | ||
121 | } | ||
122 | oname->len = ciphertext_len; | ||
123 | return res; | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * ext4_fname_decrypt() | ||
128 | * This function decrypts the input filename, and returns | ||
129 | * the length of the plaintext. | ||
130 | * Errors are returned as negative numbers. | ||
131 | * We trust the caller to allocate sufficient memory to oname string. | ||
132 | */ | ||
133 | static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx, | ||
134 | const struct ext4_str *iname, | ||
135 | struct ext4_str *oname) | ||
136 | { | ||
137 | struct ext4_str tmp_in[2], tmp_out[1]; | ||
138 | struct ablkcipher_request *req = NULL; | ||
139 | DECLARE_EXT4_COMPLETION_RESULT(ecr); | ||
140 | struct scatterlist sg[1]; | ||
141 | struct crypto_ablkcipher *tfm = ctx->ctfm; | ||
142 | int res = 0; | ||
143 | char iv[EXT4_CRYPTO_BLOCK_SIZE]; | ||
144 | char *workbuf; | ||
145 | |||
146 | if (iname->len <= 0 || iname->len > ctx->lim) | ||
147 | return -EIO; | ||
148 | |||
149 | tmp_in[0].name = iname->name; | ||
150 | tmp_in[0].len = iname->len; | ||
151 | tmp_out[0].name = oname->name; | ||
152 | |||
153 | /* Allocate request */ | ||
154 | req = ablkcipher_request_alloc(tfm, GFP_NOFS); | ||
155 | if (!req) { | ||
156 | printk_ratelimited( | ||
157 | KERN_ERR "%s: crypto_request_alloc() failed\n", __func__); | ||
158 | return -ENOMEM; | ||
159 | } | ||
160 | ablkcipher_request_set_callback(req, | ||
161 | CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, | ||
162 | ext4_dir_crypt_complete, &ecr); | ||
163 | |||
164 | /* Map the workpage */ | ||
165 | workbuf = kmap(ctx->workpage); | ||
166 | |||
167 | /* Copy the input */ | ||
168 | memcpy(workbuf, iname->name, iname->len); | ||
169 | |||
170 | /* Initialize IV */ | ||
171 | memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE); | ||
172 | |||
173 | /* Create encryption request */ | ||
174 | sg_init_table(sg, 1); | ||
175 | sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0); | ||
176 | ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv); | ||
177 | res = crypto_ablkcipher_decrypt(req); | ||
178 | if (res == -EINPROGRESS || res == -EBUSY) { | ||
179 | BUG_ON(req->base.data != &ecr); | ||
180 | wait_for_completion(&ecr.completion); | ||
181 | res = ecr.res; | ||
182 | } | ||
183 | if (res >= 0) { | ||
184 | /* Copy the result to output */ | ||
185 | memcpy(oname->name, workbuf, iname->len); | ||
186 | res = iname->len; | ||
187 | } | ||
188 | kunmap(ctx->workpage); | ||
189 | ablkcipher_request_free(req); | ||
190 | if (res < 0) { | ||
191 | printk_ratelimited( | ||
192 | KERN_ERR "%s: Error in ext4_fname_encrypt (error code %d)\n", | ||
193 | __func__, res); | ||
194 | return res; | ||
195 | } | ||
196 | |||
197 | oname->len = strnlen(oname->name, iname->len); | ||
198 | return oname->len; | ||
199 | } | ||
200 | |||
201 | /** | ||
202 | * ext4_fname_encode_digest() - | ||
203 | * | ||
204 | * Encodes the input digest using characters from the set [a-zA-Z0-9_+]. | ||
205 | * The encoded string is roughly 4/3 times the size of the input string. | ||
206 | */ | ||
207 | int ext4_fname_encode_digest(char *dst, char *src, u32 len) | ||
208 | { | ||
209 | static const char *lookup_table = | ||
210 | "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_+"; | ||
211 | u32 current_chunk, num_chunks, i; | ||
212 | char tmp_buf[3]; | ||
213 | u32 c0, c1, c2, c3; | ||
214 | |||
215 | current_chunk = 0; | ||
216 | num_chunks = len/3; | ||
217 | for (i = 0; i < num_chunks; i++) { | ||
218 | c0 = src[3*i] & 0x3f; | ||
219 | c1 = (((src[3*i]>>6)&0x3) | ((src[3*i+1] & 0xf)<<2)) & 0x3f; | ||
220 | c2 = (((src[3*i+1]>>4)&0xf) | ((src[3*i+2] & 0x3)<<4)) & 0x3f; | ||
221 | c3 = (src[3*i+2]>>2) & 0x3f; | ||
222 | dst[4*i] = lookup_table[c0]; | ||
223 | dst[4*i+1] = lookup_table[c1]; | ||
224 | dst[4*i+2] = lookup_table[c2]; | ||
225 | dst[4*i+3] = lookup_table[c3]; | ||
226 | } | ||
227 | if (i*3 < len) { | ||
228 | memset(tmp_buf, 0, 3); | ||
229 | memcpy(tmp_buf, &src[3*i], len-3*i); | ||
230 | c0 = tmp_buf[0] & 0x3f; | ||
231 | c1 = (((tmp_buf[0]>>6)&0x3) | ((tmp_buf[1] & 0xf)<<2)) & 0x3f; | ||
232 | c2 = (((tmp_buf[1]>>4)&0xf) | ((tmp_buf[2] & 0x3)<<4)) & 0x3f; | ||
233 | c3 = (tmp_buf[2]>>2) & 0x3f; | ||
234 | dst[4*i] = lookup_table[c0]; | ||
235 | dst[4*i+1] = lookup_table[c1]; | ||
236 | dst[4*i+2] = lookup_table[c2]; | ||
237 | dst[4*i+3] = lookup_table[c3]; | ||
238 | i++; | ||
239 | } | ||
240 | return (i * 4); | ||
241 | } | ||
242 | |||
243 | /** | ||
244 | * ext4_fname_hash() - | ||
245 | * | ||
246 | * This function computes the hash of the input filename, and sets the output | ||
247 | * buffer to the *encoded* digest. It returns the length of the digest as its | ||
248 | * return value. Errors are returned as negative numbers. We trust the caller | ||
249 | * to allocate sufficient memory to oname string. | ||
250 | */ | ||
251 | static int ext4_fname_hash(struct ext4_fname_crypto_ctx *ctx, | ||
252 | const struct ext4_str *iname, | ||
253 | struct ext4_str *oname) | ||
254 | { | ||
255 | struct scatterlist sg; | ||
256 | struct hash_desc desc = { | ||
257 | .tfm = (struct crypto_hash *)ctx->htfm, | ||
258 | .flags = CRYPTO_TFM_REQ_MAY_SLEEP | ||
259 | }; | ||
260 | int res = 0; | ||
261 | |||
262 | if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) { | ||
263 | res = ext4_fname_encode_digest(oname->name, iname->name, | ||
264 | iname->len); | ||
265 | oname->len = res; | ||
266 | return res; | ||
267 | } | ||
268 | |||
269 | sg_init_one(&sg, iname->name, iname->len); | ||
270 | res = crypto_hash_init(&desc); | ||
271 | if (res) { | ||
272 | printk(KERN_ERR | ||
273 | "%s: Error initializing crypto hash; res = [%d]\n", | ||
274 | __func__, res); | ||
275 | goto out; | ||
276 | } | ||
277 | res = crypto_hash_update(&desc, &sg, iname->len); | ||
278 | if (res) { | ||
279 | printk(KERN_ERR | ||
280 | "%s: Error updating crypto hash; res = [%d]\n", | ||
281 | __func__, res); | ||
282 | goto out; | ||
283 | } | ||
284 | res = crypto_hash_final(&desc, | ||
285 | &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE]); | ||
286 | if (res) { | ||
287 | printk(KERN_ERR | ||
288 | "%s: Error finalizing crypto hash; res = [%d]\n", | ||
289 | __func__, res); | ||
290 | goto out; | ||
291 | } | ||
292 | /* Encode the digest as a printable string--this will increase the | ||
293 | * size of the digest */ | ||
294 | oname->name[0] = 'I'; | ||
295 | res = ext4_fname_encode_digest(oname->name+1, | ||
296 | &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE], | ||
297 | EXT4_FNAME_CRYPTO_DIGEST_SIZE) + 1; | ||
298 | oname->len = res; | ||
299 | out: | ||
300 | return res; | ||
301 | } | ||
302 | |||
303 | /** | ||
304 | * ext4_free_fname_crypto_ctx() - | ||
305 | * | ||
306 | * Frees up a crypto context. | ||
307 | */ | ||
308 | void ext4_free_fname_crypto_ctx(struct ext4_fname_crypto_ctx *ctx) | ||
309 | { | ||
310 | if (ctx == NULL || IS_ERR(ctx)) | ||
311 | return; | ||
312 | |||
313 | if (ctx->ctfm && !IS_ERR(ctx->ctfm)) | ||
314 | crypto_free_ablkcipher(ctx->ctfm); | ||
315 | if (ctx->htfm && !IS_ERR(ctx->htfm)) | ||
316 | crypto_free_hash(ctx->htfm); | ||
317 | if (ctx->workpage && !IS_ERR(ctx->workpage)) | ||
318 | __free_page(ctx->workpage); | ||
319 | kfree(ctx); | ||
320 | } | ||
321 | |||
322 | /** | ||
323 | * ext4_put_fname_crypto_ctx() - | ||
324 | * | ||
325 | * Return: The crypto context onto free list. If the free list is above a | ||
326 | * threshold, completely frees up the context, and returns the memory. | ||
327 | * | ||
328 | * TODO: Currently we directly free the crypto context. Eventually we should | ||
329 | * add code it to return to free list. Such an approach will increase | ||
330 | * efficiency of directory lookup. | ||
331 | */ | ||
332 | void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx) | ||
333 | { | ||
334 | if (*ctx == NULL || IS_ERR(*ctx)) | ||
335 | return; | ||
336 | ext4_free_fname_crypto_ctx(*ctx); | ||
337 | *ctx = NULL; | ||
338 | } | ||
339 | |||
340 | /** | ||
341 | * ext4_search_fname_crypto_ctx() - | ||
342 | */ | ||
343 | static struct ext4_fname_crypto_ctx *ext4_search_fname_crypto_ctx( | ||
344 | const struct ext4_encryption_key *key) | ||
345 | { | ||
346 | return NULL; | ||
347 | } | ||
348 | |||
349 | /** | ||
350 | * ext4_alloc_fname_crypto_ctx() - | ||
351 | */ | ||
352 | struct ext4_fname_crypto_ctx *ext4_alloc_fname_crypto_ctx( | ||
353 | const struct ext4_encryption_key *key) | ||
354 | { | ||
355 | struct ext4_fname_crypto_ctx *ctx; | ||
356 | |||
357 | ctx = kmalloc(sizeof(struct ext4_fname_crypto_ctx), GFP_NOFS); | ||
358 | if (ctx == NULL) | ||
359 | return ERR_PTR(-ENOMEM); | ||
360 | if (key->mode == EXT4_ENCRYPTION_MODE_INVALID) { | ||
361 | /* This will automatically set key mode to invalid | ||
362 | * As enum for ENCRYPTION_MODE_INVALID is zero */ | ||
363 | memset(&ctx->key, 0, sizeof(ctx->key)); | ||
364 | } else { | ||
365 | memcpy(&ctx->key, key, sizeof(struct ext4_encryption_key)); | ||
366 | } | ||
367 | ctx->has_valid_key = (EXT4_ENCRYPTION_MODE_INVALID == key->mode) | ||
368 | ? 0 : 1; | ||
369 | ctx->ctfm_key_is_ready = 0; | ||
370 | ctx->ctfm = NULL; | ||
371 | ctx->htfm = NULL; | ||
372 | ctx->workpage = NULL; | ||
373 | return ctx; | ||
374 | } | ||
375 | |||
376 | /** | ||
377 | * ext4_get_fname_crypto_ctx() - | ||
378 | * | ||
379 | * Allocates a free crypto context and initializes it to hold | ||
380 | * the crypto material for the inode. | ||
381 | * | ||
382 | * Return: NULL if not encrypted. Error value on error. Valid pointer otherwise. | ||
383 | */ | ||
384 | struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx( | ||
385 | struct inode *inode, u32 max_ciphertext_len) | ||
386 | { | ||
387 | struct ext4_fname_crypto_ctx *ctx; | ||
388 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
389 | int res; | ||
390 | |||
391 | /* Check if the crypto policy is set on the inode */ | ||
392 | res = ext4_encrypted_inode(inode); | ||
393 | if (res == 0) | ||
394 | return NULL; | ||
395 | |||
396 | if (!ext4_has_encryption_key(inode)) | ||
397 | ext4_generate_encryption_key(inode); | ||
398 | |||
399 | /* Get a crypto context based on the key. | ||
400 | * A new context is allocated if no context matches the requested key. | ||
401 | */ | ||
402 | ctx = ext4_search_fname_crypto_ctx(&(ei->i_encryption_key)); | ||
403 | if (ctx == NULL) | ||
404 | ctx = ext4_alloc_fname_crypto_ctx(&(ei->i_encryption_key)); | ||
405 | if (IS_ERR(ctx)) | ||
406 | return ctx; | ||
407 | |||
408 | if (ctx->has_valid_key) { | ||
409 | if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) { | ||
410 | printk_once(KERN_WARNING | ||
411 | "ext4: unsupported key mode %d\n", | ||
412 | ctx->key.mode); | ||
413 | return ERR_PTR(-ENOKEY); | ||
414 | } | ||
415 | |||
416 | /* As a first cut, we will allocate new tfm in every call. | ||
417 | * later, we will keep the tfm around, in case the key gets | ||
418 | * re-used */ | ||
419 | if (ctx->ctfm == NULL) { | ||
420 | ctx->ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))", | ||
421 | 0, 0); | ||
422 | } | ||
423 | if (IS_ERR(ctx->ctfm)) { | ||
424 | res = PTR_ERR(ctx->ctfm); | ||
425 | printk( | ||
426 | KERN_DEBUG "%s: error (%d) allocating crypto tfm\n", | ||
427 | __func__, res); | ||
428 | ctx->ctfm = NULL; | ||
429 | ext4_put_fname_crypto_ctx(&ctx); | ||
430 | return ERR_PTR(res); | ||
431 | } | ||
432 | if (ctx->ctfm == NULL) { | ||
433 | printk( | ||
434 | KERN_DEBUG "%s: could not allocate crypto tfm\n", | ||
435 | __func__); | ||
436 | ext4_put_fname_crypto_ctx(&ctx); | ||
437 | return ERR_PTR(-ENOMEM); | ||
438 | } | ||
439 | if (ctx->workpage == NULL) | ||
440 | ctx->workpage = alloc_page(GFP_NOFS); | ||
441 | if (IS_ERR(ctx->workpage)) { | ||
442 | res = PTR_ERR(ctx->workpage); | ||
443 | printk( | ||
444 | KERN_DEBUG "%s: error (%d) allocating work page\n", | ||
445 | __func__, res); | ||
446 | ctx->workpage = NULL; | ||
447 | ext4_put_fname_crypto_ctx(&ctx); | ||
448 | return ERR_PTR(res); | ||
449 | } | ||
450 | if (ctx->workpage == NULL) { | ||
451 | printk( | ||
452 | KERN_DEBUG "%s: could not allocate work page\n", | ||
453 | __func__); | ||
454 | ext4_put_fname_crypto_ctx(&ctx); | ||
455 | return ERR_PTR(-ENOMEM); | ||
456 | } | ||
457 | ctx->lim = max_ciphertext_len; | ||
458 | crypto_ablkcipher_clear_flags(ctx->ctfm, ~0); | ||
459 | crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctx->ctfm), | ||
460 | CRYPTO_TFM_REQ_WEAK_KEY); | ||
461 | |||
462 | /* If we are lucky, we will get a context that is already | ||
463 | * set up with the right key. Else, we will have to | ||
464 | * set the key */ | ||
465 | if (!ctx->ctfm_key_is_ready) { | ||
466 | /* Since our crypto objectives for filename encryption | ||
467 | * are pretty weak, | ||
468 | * we directly use the inode master key */ | ||
469 | res = crypto_ablkcipher_setkey(ctx->ctfm, | ||
470 | ctx->key.raw, ctx->key.size); | ||
471 | if (res) { | ||
472 | ext4_put_fname_crypto_ctx(&ctx); | ||
473 | return ERR_PTR(-EIO); | ||
474 | } | ||
475 | ctx->ctfm_key_is_ready = 1; | ||
476 | } else { | ||
477 | /* In the current implementation, key should never be | ||
478 | * marked "ready" for a context that has just been | ||
479 | * allocated. So we should never reach here */ | ||
480 | BUG(); | ||
481 | } | ||
482 | } | ||
483 | if (ctx->htfm == NULL) | ||
484 | ctx->htfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); | ||
485 | if (IS_ERR(ctx->htfm)) { | ||
486 | res = PTR_ERR(ctx->htfm); | ||
487 | printk(KERN_DEBUG "%s: error (%d) allocating hash tfm\n", | ||
488 | __func__, res); | ||
489 | ctx->htfm = NULL; | ||
490 | ext4_put_fname_crypto_ctx(&ctx); | ||
491 | return ERR_PTR(res); | ||
492 | } | ||
493 | if (ctx->htfm == NULL) { | ||
494 | printk(KERN_DEBUG "%s: could not allocate hash tfm\n", | ||
495 | __func__); | ||
496 | ext4_put_fname_crypto_ctx(&ctx); | ||
497 | return ERR_PTR(-ENOMEM); | ||
498 | } | ||
499 | |||
500 | return ctx; | ||
501 | } | ||
502 | |||
503 | /** | ||
504 | * ext4_fname_crypto_round_up() - | ||
505 | * | ||
506 | * Return: The next multiple of block size | ||
507 | */ | ||
508 | u32 ext4_fname_crypto_round_up(u32 size, u32 blksize) | ||
509 | { | ||
510 | return ((size+blksize-1)/blksize)*blksize; | ||
511 | } | ||
512 | |||
513 | /** | ||
514 | * ext4_fname_crypto_namelen_on_disk() - | ||
515 | */ | ||
516 | int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx, | ||
517 | u32 namelen) | ||
518 | { | ||
519 | u32 ciphertext_len; | ||
520 | |||
521 | if (ctx == NULL) | ||
522 | return -EIO; | ||
523 | if (!(ctx->has_valid_key)) | ||
524 | return -EACCES; | ||
525 | ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ? | ||
526 | EXT4_CRYPTO_BLOCK_SIZE : namelen; | ||
527 | ciphertext_len = (ciphertext_len > ctx->lim) | ||
528 | ? ctx->lim : ciphertext_len; | ||
529 | return (int) ciphertext_len; | ||
530 | } | ||
531 | |||
532 | /** | ||
533 | * ext4_fname_crypto_alloc_obuff() - | ||
534 | * | ||
535 | * Allocates an output buffer that is sufficient for the crypto operation | ||
536 | * specified by the context and the direction. | ||
537 | */ | ||
538 | int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx, | ||
539 | u32 ilen, struct ext4_str *crypto_str) | ||
540 | { | ||
541 | unsigned int olen; | ||
542 | |||
543 | if (!ctx) | ||
544 | return -EIO; | ||
545 | olen = ext4_fname_crypto_round_up(ilen, EXT4_CRYPTO_BLOCK_SIZE); | ||
546 | crypto_str->len = olen; | ||
547 | if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) | ||
548 | olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2; | ||
549 | /* Allocated buffer can hold one more character to null-terminate the | ||
550 | * string */ | ||
551 | crypto_str->name = kmalloc(olen+1, GFP_NOFS); | ||
552 | if (!(crypto_str->name)) | ||
553 | return -ENOMEM; | ||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | /** | ||
558 | * ext4_fname_crypto_free_buffer() - | ||
559 | * | ||
560 | * Frees the buffer allocated for crypto operation. | ||
561 | */ | ||
562 | void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str) | ||
563 | { | ||
564 | if (!crypto_str) | ||
565 | return; | ||
566 | kfree(crypto_str->name); | ||
567 | crypto_str->name = NULL; | ||
568 | } | ||
569 | |||
570 | /** | ||
571 | * ext4_fname_disk_to_usr() - converts a filename from disk space to user space | ||
572 | */ | ||
573 | int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, | ||
574 | const struct ext4_str *iname, | ||
575 | struct ext4_str *oname) | ||
576 | { | ||
577 | if (ctx == NULL) | ||
578 | return -EIO; | ||
579 | if (iname->len < 3) { | ||
580 | /*Check for . and .. */ | ||
581 | if (iname->name[0] == '.' && iname->name[iname->len-1] == '.') { | ||
582 | oname->name[0] = '.'; | ||
583 | oname->name[iname->len-1] = '.'; | ||
584 | oname->len = iname->len; | ||
585 | return oname->len; | ||
586 | } | ||
587 | } | ||
588 | if (ctx->has_valid_key) | ||
589 | return ext4_fname_decrypt(ctx, iname, oname); | ||
590 | else | ||
591 | return ext4_fname_hash(ctx, iname, oname); | ||
592 | } | ||
593 | |||
594 | int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, | ||
595 | const struct ext4_dir_entry_2 *de, | ||
596 | struct ext4_str *oname) | ||
597 | { | ||
598 | struct ext4_str iname = {.name = (unsigned char *) de->name, | ||
599 | .len = de->name_len }; | ||
600 | |||
601 | return _ext4_fname_disk_to_usr(ctx, &iname, oname); | ||
602 | } | ||
603 | |||
604 | |||
605 | /** | ||
606 | * ext4_fname_usr_to_disk() - converts a filename from user space to disk space | ||
607 | */ | ||
608 | int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx, | ||
609 | const struct qstr *iname, | ||
610 | struct ext4_str *oname) | ||
611 | { | ||
612 | int res; | ||
613 | |||
614 | if (ctx == NULL) | ||
615 | return -EIO; | ||
616 | if (iname->len < 3) { | ||
617 | /*Check for . and .. */ | ||
618 | if (iname->name[0] == '.' && | ||
619 | iname->name[iname->len-1] == '.') { | ||
620 | oname->name[0] = '.'; | ||
621 | oname->name[iname->len-1] = '.'; | ||
622 | oname->len = iname->len; | ||
623 | return oname->len; | ||
624 | } | ||
625 | } | ||
626 | if (ctx->has_valid_key) { | ||
627 | res = ext4_fname_encrypt(ctx, iname, oname); | ||
628 | return res; | ||
629 | } | ||
630 | /* Without a proper key, a user is not allowed to modify the filenames | ||
631 | * in a directory. Consequently, a user space name cannot be mapped to | ||
632 | * a disk-space name */ | ||
633 | return -EACCES; | ||
634 | } | ||
635 | |||
636 | /* | ||
637 | * Calculate the htree hash from a filename from user space | ||
638 | */ | ||
639 | int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx, | ||
640 | const struct qstr *iname, | ||
641 | struct dx_hash_info *hinfo) | ||
642 | { | ||
643 | struct ext4_str tmp, tmp2; | ||
644 | int ret = 0; | ||
645 | |||
646 | if (!ctx || !ctx->has_valid_key || | ||
647 | ((iname->name[0] == '.') && | ||
648 | ((iname->len == 1) || | ||
649 | ((iname->name[1] == '.') && (iname->len == 2))))) { | ||
650 | ext4fs_dirhash(iname->name, iname->len, hinfo); | ||
651 | return 0; | ||
652 | } | ||
653 | |||
654 | /* First encrypt the plaintext name */ | ||
655 | ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp); | ||
656 | if (ret < 0) | ||
657 | return ret; | ||
658 | |||
659 | ret = ext4_fname_encrypt(ctx, iname, &tmp); | ||
660 | if (ret < 0) | ||
661 | goto out; | ||
662 | |||
663 | tmp2.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1; | ||
664 | tmp2.name = kmalloc(tmp2.len + 1, GFP_KERNEL); | ||
665 | if (tmp2.name == NULL) { | ||
666 | ret = -ENOMEM; | ||
667 | goto out; | ||
668 | } | ||
669 | |||
670 | ret = ext4_fname_hash(ctx, &tmp, &tmp2); | ||
671 | if (ret > 0) | ||
672 | ext4fs_dirhash(tmp2.name, tmp2.len, hinfo); | ||
673 | ext4_fname_crypto_free_buffer(&tmp2); | ||
674 | out: | ||
675 | ext4_fname_crypto_free_buffer(&tmp); | ||
676 | return ret; | ||
677 | } | ||
678 | |||
679 | /** | ||
680 | * ext4_fname_disk_to_htree() - converts a filename from disk space to htree-access string | ||
681 | */ | ||
682 | int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx, | ||
683 | const struct ext4_dir_entry_2 *de, | ||
684 | struct dx_hash_info *hinfo) | ||
685 | { | ||
686 | struct ext4_str iname = {.name = (unsigned char *) de->name, | ||
687 | .len = de->name_len}; | ||
688 | struct ext4_str tmp; | ||
689 | int ret; | ||
690 | |||
691 | if (!ctx || | ||
692 | ((iname.name[0] == '.') && | ||
693 | ((iname.len == 1) || | ||
694 | ((iname.name[1] == '.') && (iname.len == 2))))) { | ||
695 | ext4fs_dirhash(iname.name, iname.len, hinfo); | ||
696 | return 0; | ||
697 | } | ||
698 | |||
699 | tmp.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1; | ||
700 | tmp.name = kmalloc(tmp.len + 1, GFP_KERNEL); | ||
701 | if (tmp.name == NULL) | ||
702 | return -ENOMEM; | ||
703 | |||
704 | ret = ext4_fname_hash(ctx, &iname, &tmp); | ||
705 | if (ret > 0) | ||
706 | ext4fs_dirhash(tmp.name, tmp.len, hinfo); | ||
707 | ext4_fname_crypto_free_buffer(&tmp); | ||
708 | return ret; | ||
709 | } | ||
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c new file mode 100644 index 000000000000..c8392af8abbb --- /dev/null +++ b/fs/ext4/crypto_key.c | |||
@@ -0,0 +1,165 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/crypto_key.c | ||
3 | * | ||
4 | * Copyright (C) 2015, Google, Inc. | ||
5 | * | ||
6 | * This contains encryption key functions for ext4 | ||
7 | * | ||
8 | * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015. | ||
9 | */ | ||
10 | |||
11 | #include <keys/encrypted-type.h> | ||
12 | #include <keys/user-type.h> | ||
13 | #include <linux/random.h> | ||
14 | #include <linux/scatterlist.h> | ||
15 | #include <uapi/linux/keyctl.h> | ||
16 | |||
17 | #include "ext4.h" | ||
18 | #include "xattr.h" | ||
19 | |||
20 | static void derive_crypt_complete(struct crypto_async_request *req, int rc) | ||
21 | { | ||
22 | struct ext4_completion_result *ecr = req->data; | ||
23 | |||
24 | if (rc == -EINPROGRESS) | ||
25 | return; | ||
26 | |||
27 | ecr->res = rc; | ||
28 | complete(&ecr->completion); | ||
29 | } | ||
30 | |||
31 | /** | ||
32 | * ext4_derive_key_aes() - Derive a key using AES-128-ECB | ||
33 | * @deriving_key: Encryption key used for derivatio. | ||
34 | * @source_key: Source key to which to apply derivation. | ||
35 | * @derived_key: Derived key. | ||
36 | * | ||
37 | * Return: Zero on success; non-zero otherwise. | ||
38 | */ | ||
39 | static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE], | ||
40 | char source_key[EXT4_AES_256_XTS_KEY_SIZE], | ||
41 | char derived_key[EXT4_AES_256_XTS_KEY_SIZE]) | ||
42 | { | ||
43 | int res = 0; | ||
44 | struct ablkcipher_request *req = NULL; | ||
45 | DECLARE_EXT4_COMPLETION_RESULT(ecr); | ||
46 | struct scatterlist src_sg, dst_sg; | ||
47 | struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0, | ||
48 | 0); | ||
49 | |||
50 | if (IS_ERR(tfm)) { | ||
51 | res = PTR_ERR(tfm); | ||
52 | tfm = NULL; | ||
53 | goto out; | ||
54 | } | ||
55 | crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); | ||
56 | req = ablkcipher_request_alloc(tfm, GFP_NOFS); | ||
57 | if (!req) { | ||
58 | res = -ENOMEM; | ||
59 | goto out; | ||
60 | } | ||
61 | ablkcipher_request_set_callback(req, | ||
62 | CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, | ||
63 | derive_crypt_complete, &ecr); | ||
64 | res = crypto_ablkcipher_setkey(tfm, deriving_key, | ||
65 | EXT4_AES_128_ECB_KEY_SIZE); | ||
66 | if (res < 0) | ||
67 | goto out; | ||
68 | sg_init_one(&src_sg, source_key, EXT4_AES_256_XTS_KEY_SIZE); | ||
69 | sg_init_one(&dst_sg, derived_key, EXT4_AES_256_XTS_KEY_SIZE); | ||
70 | ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, | ||
71 | EXT4_AES_256_XTS_KEY_SIZE, NULL); | ||
72 | res = crypto_ablkcipher_encrypt(req); | ||
73 | if (res == -EINPROGRESS || res == -EBUSY) { | ||
74 | BUG_ON(req->base.data != &ecr); | ||
75 | wait_for_completion(&ecr.completion); | ||
76 | res = ecr.res; | ||
77 | } | ||
78 | |||
79 | out: | ||
80 | if (req) | ||
81 | ablkcipher_request_free(req); | ||
82 | if (tfm) | ||
83 | crypto_free_ablkcipher(tfm); | ||
84 | return res; | ||
85 | } | ||
86 | |||
87 | /** | ||
88 | * ext4_generate_encryption_key() - generates an encryption key | ||
89 | * @inode: The inode to generate the encryption key for. | ||
90 | */ | ||
91 | int ext4_generate_encryption_key(struct inode *inode) | ||
92 | { | ||
93 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
94 | struct ext4_encryption_key *crypt_key = &ei->i_encryption_key; | ||
95 | char full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE + | ||
96 | (EXT4_KEY_DESCRIPTOR_SIZE * 2) + 1]; | ||
97 | struct key *keyring_key = NULL; | ||
98 | struct ext4_encryption_key *master_key; | ||
99 | struct ext4_encryption_context ctx; | ||
100 | struct user_key_payload *ukp; | ||
101 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
102 | int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, | ||
103 | EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, | ||
104 | &ctx, sizeof(ctx)); | ||
105 | |||
106 | if (res != sizeof(ctx)) { | ||
107 | if (res > 0) | ||
108 | res = -EINVAL; | ||
109 | goto out; | ||
110 | } | ||
111 | res = 0; | ||
112 | |||
113 | if (S_ISREG(inode->i_mode)) | ||
114 | crypt_key->mode = ctx.contents_encryption_mode; | ||
115 | else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
116 | crypt_key->mode = ctx.filenames_encryption_mode; | ||
117 | else { | ||
118 | printk(KERN_ERR "ext4 crypto: Unsupported inode type.\n"); | ||
119 | BUG(); | ||
120 | } | ||
121 | crypt_key->size = ext4_encryption_key_size(crypt_key->mode); | ||
122 | BUG_ON(!crypt_key->size); | ||
123 | if (DUMMY_ENCRYPTION_ENABLED(sbi)) { | ||
124 | memset(crypt_key->raw, 0x42, EXT4_AES_256_XTS_KEY_SIZE); | ||
125 | goto out; | ||
126 | } | ||
127 | memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX, | ||
128 | EXT4_KEY_DESC_PREFIX_SIZE); | ||
129 | sprintf(full_key_descriptor + EXT4_KEY_DESC_PREFIX_SIZE, | ||
130 | "%*phN", EXT4_KEY_DESCRIPTOR_SIZE, | ||
131 | ctx.master_key_descriptor); | ||
132 | full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE + | ||
133 | (2 * EXT4_KEY_DESCRIPTOR_SIZE)] = '\0'; | ||
134 | keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL); | ||
135 | if (IS_ERR(keyring_key)) { | ||
136 | res = PTR_ERR(keyring_key); | ||
137 | keyring_key = NULL; | ||
138 | goto out; | ||
139 | } | ||
140 | BUG_ON(keyring_key->type != &key_type_logon); | ||
141 | ukp = ((struct user_key_payload *)keyring_key->payload.data); | ||
142 | if (ukp->datalen != sizeof(struct ext4_encryption_key)) { | ||
143 | res = -EINVAL; | ||
144 | goto out; | ||
145 | } | ||
146 | master_key = (struct ext4_encryption_key *)ukp->data; | ||
147 | BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE != | ||
148 | EXT4_KEY_DERIVATION_NONCE_SIZE); | ||
149 | BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE); | ||
150 | res = ext4_derive_key_aes(ctx.nonce, master_key->raw, crypt_key->raw); | ||
151 | out: | ||
152 | if (keyring_key) | ||
153 | key_put(keyring_key); | ||
154 | if (res < 0) | ||
155 | crypt_key->mode = EXT4_ENCRYPTION_MODE_INVALID; | ||
156 | return res; | ||
157 | } | ||
158 | |||
159 | int ext4_has_encryption_key(struct inode *inode) | ||
160 | { | ||
161 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
162 | struct ext4_encryption_key *crypt_key = &ei->i_encryption_key; | ||
163 | |||
164 | return (crypt_key->mode != EXT4_ENCRYPTION_MODE_INVALID); | ||
165 | } | ||
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c new file mode 100644 index 000000000000..30eaf9e9864a --- /dev/null +++ b/fs/ext4/crypto_policy.c | |||
@@ -0,0 +1,194 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/crypto_policy.c | ||
3 | * | ||
4 | * Copyright (C) 2015, Google, Inc. | ||
5 | * | ||
6 | * This contains encryption policy functions for ext4 | ||
7 | * | ||
8 | * Written by Michael Halcrow, 2015. | ||
9 | */ | ||
10 | |||
11 | #include <linux/random.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/types.h> | ||
14 | |||
15 | #include "ext4.h" | ||
16 | #include "xattr.h" | ||
17 | |||
18 | static int ext4_inode_has_encryption_context(struct inode *inode) | ||
19 | { | ||
20 | int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, | ||
21 | EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0); | ||
22 | return (res > 0); | ||
23 | } | ||
24 | |||
25 | /* | ||
26 | * check whether the policy is consistent with the encryption context | ||
27 | * for the inode | ||
28 | */ | ||
29 | static int ext4_is_encryption_context_consistent_with_policy( | ||
30 | struct inode *inode, const struct ext4_encryption_policy *policy) | ||
31 | { | ||
32 | struct ext4_encryption_context ctx; | ||
33 | int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, | ||
34 | EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, | ||
35 | sizeof(ctx)); | ||
36 | if (res != sizeof(ctx)) | ||
37 | return 0; | ||
38 | return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor, | ||
39 | EXT4_KEY_DESCRIPTOR_SIZE) == 0 && | ||
40 | (ctx.contents_encryption_mode == | ||
41 | policy->contents_encryption_mode) && | ||
42 | (ctx.filenames_encryption_mode == | ||
43 | policy->filenames_encryption_mode)); | ||
44 | } | ||
45 | |||
46 | static int ext4_create_encryption_context_from_policy( | ||
47 | struct inode *inode, const struct ext4_encryption_policy *policy) | ||
48 | { | ||
49 | struct ext4_encryption_context ctx; | ||
50 | int res = 0; | ||
51 | |||
52 | ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1; | ||
53 | memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, | ||
54 | EXT4_KEY_DESCRIPTOR_SIZE); | ||
55 | if (!ext4_valid_contents_enc_mode(policy->contents_encryption_mode)) { | ||
56 | printk(KERN_WARNING | ||
57 | "%s: Invalid contents encryption mode %d\n", __func__, | ||
58 | policy->contents_encryption_mode); | ||
59 | res = -EINVAL; | ||
60 | goto out; | ||
61 | } | ||
62 | if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) { | ||
63 | printk(KERN_WARNING | ||
64 | "%s: Invalid filenames encryption mode %d\n", __func__, | ||
65 | policy->filenames_encryption_mode); | ||
66 | res = -EINVAL; | ||
67 | goto out; | ||
68 | } | ||
69 | ctx.contents_encryption_mode = policy->contents_encryption_mode; | ||
70 | ctx.filenames_encryption_mode = policy->filenames_encryption_mode; | ||
71 | BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE); | ||
72 | get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE); | ||
73 | |||
74 | res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, | ||
75 | EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, | ||
76 | sizeof(ctx), 0); | ||
77 | out: | ||
78 | if (!res) | ||
79 | ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); | ||
80 | return res; | ||
81 | } | ||
82 | |||
83 | int ext4_process_policy(const struct ext4_encryption_policy *policy, | ||
84 | struct inode *inode) | ||
85 | { | ||
86 | if (policy->version != 0) | ||
87 | return -EINVAL; | ||
88 | |||
89 | if (!ext4_inode_has_encryption_context(inode)) { | ||
90 | if (!ext4_empty_dir(inode)) | ||
91 | return -ENOTEMPTY; | ||
92 | return ext4_create_encryption_context_from_policy(inode, | ||
93 | policy); | ||
94 | } | ||
95 | |||
96 | if (ext4_is_encryption_context_consistent_with_policy(inode, policy)) | ||
97 | return 0; | ||
98 | |||
99 | printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n", | ||
100 | __func__); | ||
101 | return -EINVAL; | ||
102 | } | ||
103 | |||
104 | int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy) | ||
105 | { | ||
106 | struct ext4_encryption_context ctx; | ||
107 | |||
108 | int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, | ||
109 | EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, | ||
110 | &ctx, sizeof(ctx)); | ||
111 | if (res != sizeof(ctx)) | ||
112 | return -ENOENT; | ||
113 | if (ctx.format != EXT4_ENCRYPTION_CONTEXT_FORMAT_V1) | ||
114 | return -EINVAL; | ||
115 | policy->version = 0; | ||
116 | policy->contents_encryption_mode = ctx.contents_encryption_mode; | ||
117 | policy->filenames_encryption_mode = ctx.filenames_encryption_mode; | ||
118 | memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor, | ||
119 | EXT4_KEY_DESCRIPTOR_SIZE); | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | int ext4_is_child_context_consistent_with_parent(struct inode *parent, | ||
124 | struct inode *child) | ||
125 | { | ||
126 | struct ext4_encryption_context parent_ctx, child_ctx; | ||
127 | int res; | ||
128 | |||
129 | if ((parent == NULL) || (child == NULL)) { | ||
130 | pr_err("parent %p child %p\n", parent, child); | ||
131 | BUG_ON(1); | ||
132 | } | ||
133 | /* no restrictions if the parent directory is not encrypted */ | ||
134 | if (!ext4_encrypted_inode(parent)) | ||
135 | return 1; | ||
136 | res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION, | ||
137 | EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, | ||
138 | &parent_ctx, sizeof(parent_ctx)); | ||
139 | if (res != sizeof(parent_ctx)) | ||
140 | return 0; | ||
141 | /* if the child directory is not encrypted, this is always a problem */ | ||
142 | if (!ext4_encrypted_inode(child)) | ||
143 | return 0; | ||
144 | res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION, | ||
145 | EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, | ||
146 | &child_ctx, sizeof(child_ctx)); | ||
147 | if (res != sizeof(child_ctx)) | ||
148 | return 0; | ||
149 | return (memcmp(parent_ctx.master_key_descriptor, | ||
150 | child_ctx.master_key_descriptor, | ||
151 | EXT4_KEY_DESCRIPTOR_SIZE) == 0 && | ||
152 | (parent_ctx.contents_encryption_mode == | ||
153 | child_ctx.contents_encryption_mode) && | ||
154 | (parent_ctx.filenames_encryption_mode == | ||
155 | child_ctx.filenames_encryption_mode)); | ||
156 | } | ||
157 | |||
158 | /** | ||
159 | * ext4_inherit_context() - Sets a child context from its parent | ||
160 | * @parent: Parent inode from which the context is inherited. | ||
161 | * @child: Child inode that inherits the context from @parent. | ||
162 | * | ||
163 | * Return: Zero on success, non-zero otherwise | ||
164 | */ | ||
165 | int ext4_inherit_context(struct inode *parent, struct inode *child) | ||
166 | { | ||
167 | struct ext4_encryption_context ctx; | ||
168 | int res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION, | ||
169 | EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, | ||
170 | &ctx, sizeof(ctx)); | ||
171 | |||
172 | if (res != sizeof(ctx)) { | ||
173 | if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) { | ||
174 | ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1; | ||
175 | ctx.contents_encryption_mode = | ||
176 | EXT4_ENCRYPTION_MODE_AES_256_XTS; | ||
177 | ctx.filenames_encryption_mode = | ||
178 | EXT4_ENCRYPTION_MODE_AES_256_CTS; | ||
179 | memset(ctx.master_key_descriptor, 0x42, | ||
180 | EXT4_KEY_DESCRIPTOR_SIZE); | ||
181 | res = 0; | ||
182 | } else { | ||
183 | goto out; | ||
184 | } | ||
185 | } | ||
186 | get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE); | ||
187 | res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION, | ||
188 | EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx, | ||
189 | sizeof(ctx), 0); | ||
190 | out: | ||
191 | if (!res) | ||
192 | ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT); | ||
193 | return res; | ||
194 | } | ||
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index c24143ea9c08..61db51a5ce4c 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -22,10 +22,8 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
25 | #include <linux/jbd2.h> | ||
26 | #include <linux/buffer_head.h> | 25 | #include <linux/buffer_head.h> |
27 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
28 | #include <linux/rbtree.h> | ||
29 | #include "ext4.h" | 27 | #include "ext4.h" |
30 | #include "xattr.h" | 28 | #include "xattr.h" |
31 | 29 | ||
@@ -110,7 +108,10 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) | |||
110 | int err; | 108 | int err; |
111 | struct inode *inode = file_inode(file); | 109 | struct inode *inode = file_inode(file); |
112 | struct super_block *sb = inode->i_sb; | 110 | struct super_block *sb = inode->i_sb; |
111 | struct buffer_head *bh = NULL; | ||
113 | int dir_has_error = 0; | 112 | int dir_has_error = 0; |
113 | struct ext4_fname_crypto_ctx *enc_ctx = NULL; | ||
114 | struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; | ||
114 | 115 | ||
115 | if (is_dx_dir(inode)) { | 116 | if (is_dx_dir(inode)) { |
116 | err = ext4_dx_readdir(file, ctx); | 117 | err = ext4_dx_readdir(file, ctx); |
@@ -127,17 +128,28 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) | |||
127 | 128 | ||
128 | if (ext4_has_inline_data(inode)) { | 129 | if (ext4_has_inline_data(inode)) { |
129 | int has_inline_data = 1; | 130 | int has_inline_data = 1; |
130 | int ret = ext4_read_inline_dir(file, ctx, | 131 | err = ext4_read_inline_dir(file, ctx, |
131 | &has_inline_data); | 132 | &has_inline_data); |
132 | if (has_inline_data) | 133 | if (has_inline_data) |
133 | return ret; | 134 | return err; |
135 | } | ||
136 | |||
137 | enc_ctx = ext4_get_fname_crypto_ctx(inode, EXT4_NAME_LEN); | ||
138 | if (IS_ERR(enc_ctx)) | ||
139 | return PTR_ERR(enc_ctx); | ||
140 | if (enc_ctx) { | ||
141 | err = ext4_fname_crypto_alloc_buffer(enc_ctx, EXT4_NAME_LEN, | ||
142 | &fname_crypto_str); | ||
143 | if (err < 0) { | ||
144 | ext4_put_fname_crypto_ctx(&enc_ctx); | ||
145 | return err; | ||
146 | } | ||
134 | } | 147 | } |
135 | 148 | ||
136 | offset = ctx->pos & (sb->s_blocksize - 1); | 149 | offset = ctx->pos & (sb->s_blocksize - 1); |
137 | 150 | ||
138 | while (ctx->pos < inode->i_size) { | 151 | while (ctx->pos < inode->i_size) { |
139 | struct ext4_map_blocks map; | 152 | struct ext4_map_blocks map; |
140 | struct buffer_head *bh = NULL; | ||
141 | 153 | ||
142 | map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); | 154 | map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); |
143 | map.m_len = 1; | 155 | map.m_len = 1; |
@@ -180,6 +192,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) | |||
180 | (unsigned long long)ctx->pos); | 192 | (unsigned long long)ctx->pos); |
181 | ctx->pos += sb->s_blocksize - offset; | 193 | ctx->pos += sb->s_blocksize - offset; |
182 | brelse(bh); | 194 | brelse(bh); |
195 | bh = NULL; | ||
183 | continue; | 196 | continue; |
184 | } | 197 | } |
185 | set_buffer_verified(bh); | 198 | set_buffer_verified(bh); |
@@ -226,25 +239,44 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) | |||
226 | offset += ext4_rec_len_from_disk(de->rec_len, | 239 | offset += ext4_rec_len_from_disk(de->rec_len, |
227 | sb->s_blocksize); | 240 | sb->s_blocksize); |
228 | if (le32_to_cpu(de->inode)) { | 241 | if (le32_to_cpu(de->inode)) { |
229 | if (!dir_emit(ctx, de->name, | 242 | if (enc_ctx == NULL) { |
230 | de->name_len, | 243 | /* Directory is not encrypted */ |
231 | le32_to_cpu(de->inode), | 244 | if (!dir_emit(ctx, de->name, |
232 | get_dtype(sb, de->file_type))) { | 245 | de->name_len, |
233 | brelse(bh); | 246 | le32_to_cpu(de->inode), |
234 | return 0; | 247 | get_dtype(sb, de->file_type))) |
248 | goto done; | ||
249 | } else { | ||
250 | /* Directory is encrypted */ | ||
251 | err = ext4_fname_disk_to_usr(enc_ctx, | ||
252 | de, &fname_crypto_str); | ||
253 | if (err < 0) | ||
254 | goto errout; | ||
255 | if (!dir_emit(ctx, | ||
256 | fname_crypto_str.name, err, | ||
257 | le32_to_cpu(de->inode), | ||
258 | get_dtype(sb, de->file_type))) | ||
259 | goto done; | ||
235 | } | 260 | } |
236 | } | 261 | } |
237 | ctx->pos += ext4_rec_len_from_disk(de->rec_len, | 262 | ctx->pos += ext4_rec_len_from_disk(de->rec_len, |
238 | sb->s_blocksize); | 263 | sb->s_blocksize); |
239 | } | 264 | } |
240 | offset = 0; | 265 | if ((ctx->pos < inode->i_size) && !dir_relax(inode)) |
266 | goto done; | ||
241 | brelse(bh); | 267 | brelse(bh); |
242 | if (ctx->pos < inode->i_size) { | 268 | bh = NULL; |
243 | if (!dir_relax(inode)) | 269 | offset = 0; |
244 | return 0; | ||
245 | } | ||
246 | } | 270 | } |
247 | return 0; | 271 | done: |
272 | err = 0; | ||
273 | errout: | ||
274 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
275 | ext4_put_fname_crypto_ctx(&enc_ctx); | ||
276 | ext4_fname_crypto_free_buffer(&fname_crypto_str); | ||
277 | #endif | ||
278 | brelse(bh); | ||
279 | return err; | ||
248 | } | 280 | } |
249 | 281 | ||
250 | static inline int is_32bit_api(void) | 282 | static inline int is_32bit_api(void) |
@@ -384,10 +416,15 @@ void ext4_htree_free_dir_info(struct dir_private_info *p) | |||
384 | 416 | ||
385 | /* | 417 | /* |
386 | * Given a directory entry, enter it into the fname rb tree. | 418 | * Given a directory entry, enter it into the fname rb tree. |
419 | * | ||
420 | * When filename encryption is enabled, the dirent will hold the | ||
421 | * encrypted filename, while the htree will hold decrypted filename. | ||
422 | * The decrypted filename is passed in via ent_name. parameter. | ||
387 | */ | 423 | */ |
388 | int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | 424 | int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, |
389 | __u32 minor_hash, | 425 | __u32 minor_hash, |
390 | struct ext4_dir_entry_2 *dirent) | 426 | struct ext4_dir_entry_2 *dirent, |
427 | struct ext4_str *ent_name) | ||
391 | { | 428 | { |
392 | struct rb_node **p, *parent = NULL; | 429 | struct rb_node **p, *parent = NULL; |
393 | struct fname *fname, *new_fn; | 430 | struct fname *fname, *new_fn; |
@@ -398,17 +435,17 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
398 | p = &info->root.rb_node; | 435 | p = &info->root.rb_node; |
399 | 436 | ||
400 | /* Create and allocate the fname structure */ | 437 | /* Create and allocate the fname structure */ |
401 | len = sizeof(struct fname) + dirent->name_len + 1; | 438 | len = sizeof(struct fname) + ent_name->len + 1; |
402 | new_fn = kzalloc(len, GFP_KERNEL); | 439 | new_fn = kzalloc(len, GFP_KERNEL); |
403 | if (!new_fn) | 440 | if (!new_fn) |
404 | return -ENOMEM; | 441 | return -ENOMEM; |
405 | new_fn->hash = hash; | 442 | new_fn->hash = hash; |
406 | new_fn->minor_hash = minor_hash; | 443 | new_fn->minor_hash = minor_hash; |
407 | new_fn->inode = le32_to_cpu(dirent->inode); | 444 | new_fn->inode = le32_to_cpu(dirent->inode); |
408 | new_fn->name_len = dirent->name_len; | 445 | new_fn->name_len = ent_name->len; |
409 | new_fn->file_type = dirent->file_type; | 446 | new_fn->file_type = dirent->file_type; |
410 | memcpy(new_fn->name, dirent->name, dirent->name_len); | 447 | memcpy(new_fn->name, ent_name->name, ent_name->len); |
411 | new_fn->name[dirent->name_len] = 0; | 448 | new_fn->name[ent_name->len] = 0; |
412 | 449 | ||
413 | while (*p) { | 450 | while (*p) { |
414 | parent = *p; | 451 | parent = *p; |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c8eb32eefc3c..ef267adce19a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -422,7 +422,7 @@ enum { | |||
422 | EXT4_INODE_DIRTY = 8, | 422 | EXT4_INODE_DIRTY = 8, |
423 | EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ | 423 | EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ |
424 | EXT4_INODE_NOCOMPR = 10, /* Don't compress */ | 424 | EXT4_INODE_NOCOMPR = 10, /* Don't compress */ |
425 | EXT4_INODE_ENCRYPT = 11, /* Compression error */ | 425 | EXT4_INODE_ENCRYPT = 11, /* Encrypted file */ |
426 | /* End compression flags --- maybe not all used */ | 426 | /* End compression flags --- maybe not all used */ |
427 | EXT4_INODE_INDEX = 12, /* hash-indexed directory */ | 427 | EXT4_INODE_INDEX = 12, /* hash-indexed directory */ |
428 | EXT4_INODE_IMAGIC = 13, /* AFS directory */ | 428 | EXT4_INODE_IMAGIC = 13, /* AFS directory */ |
@@ -582,6 +582,15 @@ enum { | |||
582 | #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 | 582 | #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 |
583 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 | 583 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 |
584 | 584 | ||
585 | /* Encryption algorithms */ | ||
586 | #define EXT4_ENCRYPTION_MODE_INVALID 0 | ||
587 | #define EXT4_ENCRYPTION_MODE_AES_256_XTS 1 | ||
588 | #define EXT4_ENCRYPTION_MODE_AES_256_GCM 2 | ||
589 | #define EXT4_ENCRYPTION_MODE_AES_256_CBC 3 | ||
590 | #define EXT4_ENCRYPTION_MODE_AES_256_CTS 4 | ||
591 | |||
592 | #include "ext4_crypto.h" | ||
593 | |||
585 | /* | 594 | /* |
586 | * ioctl commands | 595 | * ioctl commands |
587 | */ | 596 | */ |
@@ -603,6 +612,9 @@ enum { | |||
603 | #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) | 612 | #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) |
604 | #define EXT4_IOC_SWAP_BOOT _IO('f', 17) | 613 | #define EXT4_IOC_SWAP_BOOT _IO('f', 17) |
605 | #define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) | 614 | #define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) |
615 | #define EXT4_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct ext4_encryption_policy) | ||
616 | #define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) | ||
617 | #define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy) | ||
606 | 618 | ||
607 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 619 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
608 | /* | 620 | /* |
@@ -939,6 +951,11 @@ struct ext4_inode_info { | |||
939 | 951 | ||
940 | /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ | 952 | /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ |
941 | __u32 i_csum_seed; | 953 | __u32 i_csum_seed; |
954 | |||
955 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
956 | /* Encryption params */ | ||
957 | struct ext4_encryption_key i_encryption_key; | ||
958 | #endif | ||
942 | }; | 959 | }; |
943 | 960 | ||
944 | /* | 961 | /* |
@@ -1142,7 +1159,8 @@ struct ext4_super_block { | |||
1142 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ | 1159 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ |
1143 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ | 1160 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ |
1144 | __u8 s_checksum_type; /* metadata checksum algorithm used */ | 1161 | __u8 s_checksum_type; /* metadata checksum algorithm used */ |
1145 | __le16 s_reserved_pad; | 1162 | __u8 s_encryption_level; /* versioning level for encryption */ |
1163 | __u8 s_reserved_pad; /* Padding to next 32bits */ | ||
1146 | __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ | 1164 | __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ |
1147 | __le32 s_snapshot_inum; /* Inode number of active snapshot */ | 1165 | __le32 s_snapshot_inum; /* Inode number of active snapshot */ |
1148 | __le32 s_snapshot_id; /* sequential ID of active snapshot */ | 1166 | __le32 s_snapshot_id; /* sequential ID of active snapshot */ |
@@ -1169,7 +1187,9 @@ struct ext4_super_block { | |||
1169 | __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ | 1187 | __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ |
1170 | __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */ | 1188 | __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */ |
1171 | __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */ | 1189 | __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */ |
1172 | __le32 s_reserved[105]; /* Padding to the end of the block */ | 1190 | __u8 s_encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ |
1191 | __le32 s_lpf_ino; /* Location of the lost+found inode */ | ||
1192 | __le32 s_reserved[100]; /* Padding to the end of the block */ | ||
1173 | __le32 s_checksum; /* crc32c(superblock) */ | 1193 | __le32 s_checksum; /* crc32c(superblock) */ |
1174 | }; | 1194 | }; |
1175 | 1195 | ||
@@ -1180,8 +1200,16 @@ struct ext4_super_block { | |||
1180 | /* | 1200 | /* |
1181 | * run-time mount flags | 1201 | * run-time mount flags |
1182 | */ | 1202 | */ |
1183 | #define EXT4_MF_MNTDIR_SAMPLED 0x0001 | 1203 | #define EXT4_MF_MNTDIR_SAMPLED 0x0001 |
1184 | #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ | 1204 | #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ |
1205 | #define EXT4_MF_TEST_DUMMY_ENCRYPTION 0x0004 | ||
1206 | |||
1207 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1208 | #define DUMMY_ENCRYPTION_ENABLED(sbi) (unlikely((sbi)->s_mount_flags & \ | ||
1209 | EXT4_MF_TEST_DUMMY_ENCRYPTION)) | ||
1210 | #else | ||
1211 | #define DUMMY_ENCRYPTION_ENABLED(sbi) (0) | ||
1212 | #endif | ||
1185 | 1213 | ||
1186 | /* Number of quota types we support */ | 1214 | /* Number of quota types we support */ |
1187 | #define EXT4_MAXQUOTAS 2 | 1215 | #define EXT4_MAXQUOTAS 2 |
@@ -1351,6 +1379,12 @@ struct ext4_sb_info { | |||
1351 | struct ratelimit_state s_err_ratelimit_state; | 1379 | struct ratelimit_state s_err_ratelimit_state; |
1352 | struct ratelimit_state s_warning_ratelimit_state; | 1380 | struct ratelimit_state s_warning_ratelimit_state; |
1353 | struct ratelimit_state s_msg_ratelimit_state; | 1381 | struct ratelimit_state s_msg_ratelimit_state; |
1382 | |||
1383 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1384 | /* Encryption */ | ||
1385 | uint32_t s_file_encryption_mode; | ||
1386 | uint32_t s_dir_encryption_mode; | ||
1387 | #endif | ||
1354 | }; | 1388 | }; |
1355 | 1389 | ||
1356 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1390 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -1466,6 +1500,18 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1466 | #define EXT4_SB(sb) (sb) | 1500 | #define EXT4_SB(sb) (sb) |
1467 | #endif | 1501 | #endif |
1468 | 1502 | ||
1503 | /* | ||
1504 | * Returns true if the inode is inode is encrypted | ||
1505 | */ | ||
1506 | static inline int ext4_encrypted_inode(struct inode *inode) | ||
1507 | { | ||
1508 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1509 | return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT); | ||
1510 | #else | ||
1511 | return 0; | ||
1512 | #endif | ||
1513 | } | ||
1514 | |||
1469 | #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime | 1515 | #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime |
1470 | 1516 | ||
1471 | /* | 1517 | /* |
@@ -1575,8 +1621,9 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1575 | EXT4_FEATURE_INCOMPAT_EXTENTS| \ | 1621 | EXT4_FEATURE_INCOMPAT_EXTENTS| \ |
1576 | EXT4_FEATURE_INCOMPAT_64BIT| \ | 1622 | EXT4_FEATURE_INCOMPAT_64BIT| \ |
1577 | EXT4_FEATURE_INCOMPAT_FLEX_BG| \ | 1623 | EXT4_FEATURE_INCOMPAT_FLEX_BG| \ |
1578 | EXT4_FEATURE_INCOMPAT_MMP | \ | 1624 | EXT4_FEATURE_INCOMPAT_MMP | \ |
1579 | EXT4_FEATURE_INCOMPAT_INLINE_DATA) | 1625 | EXT4_FEATURE_INCOMPAT_INLINE_DATA | \ |
1626 | EXT4_FEATURE_INCOMPAT_ENCRYPT) | ||
1580 | #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ | 1627 | #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ |
1581 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ | 1628 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ |
1582 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ | 1629 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ |
@@ -2001,6 +2048,99 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, | |||
2001 | struct ext4_group_desc *gdp); | 2048 | struct ext4_group_desc *gdp); |
2002 | ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); | 2049 | ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); |
2003 | 2050 | ||
2051 | /* crypto_policy.c */ | ||
2052 | int ext4_is_child_context_consistent_with_parent(struct inode *parent, | ||
2053 | struct inode *child); | ||
2054 | int ext4_inherit_context(struct inode *parent, struct inode *child); | ||
2055 | void ext4_to_hex(char *dst, char *src, size_t src_size); | ||
2056 | int ext4_process_policy(const struct ext4_encryption_policy *policy, | ||
2057 | struct inode *inode); | ||
2058 | int ext4_get_policy(struct inode *inode, | ||
2059 | struct ext4_encryption_policy *policy); | ||
2060 | |||
2061 | /* crypto.c */ | ||
2062 | bool ext4_valid_contents_enc_mode(uint32_t mode); | ||
2063 | uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size); | ||
2064 | extern struct workqueue_struct *ext4_read_workqueue; | ||
2065 | struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode); | ||
2066 | void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx); | ||
2067 | void ext4_restore_control_page(struct page *data_page); | ||
2068 | struct page *ext4_encrypt(struct inode *inode, | ||
2069 | struct page *plaintext_page); | ||
2070 | int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page); | ||
2071 | int ext4_decrypt_one(struct inode *inode, struct page *page); | ||
2072 | int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex); | ||
2073 | |||
2074 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
2075 | int ext4_init_crypto(void); | ||
2076 | void ext4_exit_crypto(void); | ||
2077 | static inline int ext4_sb_has_crypto(struct super_block *sb) | ||
2078 | { | ||
2079 | return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT); | ||
2080 | } | ||
2081 | #else | ||
2082 | static inline int ext4_init_crypto(void) { return 0; } | ||
2083 | static inline void ext4_exit_crypto(void) { } | ||
2084 | static inline int ext4_sb_has_crypto(struct super_block *sb) | ||
2085 | { | ||
2086 | return 0; | ||
2087 | } | ||
2088 | #endif | ||
2089 | |||
2090 | /* crypto_fname.c */ | ||
2091 | bool ext4_valid_filenames_enc_mode(uint32_t mode); | ||
2092 | u32 ext4_fname_crypto_round_up(u32 size, u32 blksize); | ||
2093 | int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx, | ||
2094 | u32 ilen, struct ext4_str *crypto_str); | ||
2095 | int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, | ||
2096 | const struct ext4_str *iname, | ||
2097 | struct ext4_str *oname); | ||
2098 | int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx, | ||
2099 | const struct ext4_dir_entry_2 *de, | ||
2100 | struct ext4_str *oname); | ||
2101 | int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx, | ||
2102 | const struct qstr *iname, | ||
2103 | struct ext4_str *oname); | ||
2104 | int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx, | ||
2105 | const struct qstr *iname, | ||
2106 | struct dx_hash_info *hinfo); | ||
2107 | int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx, | ||
2108 | const struct ext4_dir_entry_2 *de, | ||
2109 | struct dx_hash_info *hinfo); | ||
2110 | int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx, | ||
2111 | u32 namelen); | ||
2112 | |||
2113 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
2114 | void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx); | ||
2115 | struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode, | ||
2116 | u32 max_len); | ||
2117 | void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str); | ||
2118 | #else | ||
2119 | static inline | ||
2120 | void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx) { } | ||
2121 | static inline | ||
2122 | struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode, | ||
2123 | u32 max_len) | ||
2124 | { | ||
2125 | return NULL; | ||
2126 | } | ||
2127 | static inline void ext4_fname_crypto_free_buffer(struct ext4_str *p) { } | ||
2128 | #endif | ||
2129 | |||
2130 | |||
2131 | /* crypto_key.c */ | ||
2132 | int ext4_generate_encryption_key(struct inode *inode); | ||
2133 | |||
2134 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
2135 | int ext4_has_encryption_key(struct inode *inode); | ||
2136 | #else | ||
2137 | static inline int ext4_has_encryption_key(struct inode *inode) | ||
2138 | { | ||
2139 | return 0; | ||
2140 | } | ||
2141 | #endif | ||
2142 | |||
2143 | |||
2004 | /* dir.c */ | 2144 | /* dir.c */ |
2005 | extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, | 2145 | extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, |
2006 | struct file *, | 2146 | struct file *, |
@@ -2011,17 +2151,20 @@ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, | |||
2011 | unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \ | 2151 | unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \ |
2012 | (de), (bh), (buf), (size), (offset))) | 2152 | (de), (bh), (buf), (size), (offset))) |
2013 | extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | 2153 | extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, |
2014 | __u32 minor_hash, | 2154 | __u32 minor_hash, |
2015 | struct ext4_dir_entry_2 *dirent); | 2155 | struct ext4_dir_entry_2 *dirent, |
2156 | struct ext4_str *ent_name); | ||
2016 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); | 2157 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); |
2017 | extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, | 2158 | extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, |
2018 | struct buffer_head *bh, | 2159 | struct buffer_head *bh, |
2019 | void *buf, int buf_size, | 2160 | void *buf, int buf_size, |
2020 | const char *name, int namelen, | 2161 | const char *name, int namelen, |
2021 | struct ext4_dir_entry_2 **dest_de); | 2162 | struct ext4_dir_entry_2 **dest_de); |
2022 | void ext4_insert_dentry(struct inode *inode, | 2163 | int ext4_insert_dentry(struct inode *dir, |
2164 | struct inode *inode, | ||
2023 | struct ext4_dir_entry_2 *de, | 2165 | struct ext4_dir_entry_2 *de, |
2024 | int buf_size, | 2166 | int buf_size, |
2167 | const struct qstr *iname, | ||
2025 | const char *name, int namelen); | 2168 | const char *name, int namelen); |
2026 | static inline void ext4_update_dx_flag(struct inode *inode) | 2169 | static inline void ext4_update_dx_flag(struct inode *inode) |
2027 | { | 2170 | { |
@@ -2099,6 +2242,7 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, | |||
2099 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); | 2242 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); |
2100 | 2243 | ||
2101 | /* inode.c */ | 2244 | /* inode.c */ |
2245 | int ext4_inode_is_fast_symlink(struct inode *inode); | ||
2102 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); | 2246 | struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); |
2103 | struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); | 2247 | struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); |
2104 | int ext4_get_block_write(struct inode *inode, sector_t iblock, | 2248 | int ext4_get_block_write(struct inode *inode, sector_t iblock, |
@@ -2189,6 +2333,7 @@ extern int ext4_generic_delete_entry(handle_t *handle, | |||
2189 | void *entry_buf, | 2333 | void *entry_buf, |
2190 | int buf_size, | 2334 | int buf_size, |
2191 | int csum_size); | 2335 | int csum_size); |
2336 | extern int ext4_empty_dir(struct inode *inode); | ||
2192 | 2337 | ||
2193 | /* resize.c */ | 2338 | /* resize.c */ |
2194 | extern int ext4_group_add(struct super_block *sb, | 2339 | extern int ext4_group_add(struct super_block *sb, |
@@ -2698,6 +2843,10 @@ static inline void ext4_set_de_type(struct super_block *sb, | |||
2698 | de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; | 2843 | de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; |
2699 | } | 2844 | } |
2700 | 2845 | ||
2846 | /* readpages.c */ | ||
2847 | extern int ext4_mpage_readpages(struct address_space *mapping, | ||
2848 | struct list_head *pages, struct page *page, | ||
2849 | unsigned nr_pages); | ||
2701 | 2850 | ||
2702 | /* symlink.c */ | 2851 | /* symlink.c */ |
2703 | extern const struct inode_operations ext4_symlink_inode_operations; | 2852 | extern const struct inode_operations ext4_symlink_inode_operations; |
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h new file mode 100644 index 000000000000..c2ba35a914b6 --- /dev/null +++ b/fs/ext4/ext4_crypto.h | |||
@@ -0,0 +1,147 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/ext4_crypto.h | ||
3 | * | ||
4 | * Copyright (C) 2015, Google, Inc. | ||
5 | * | ||
6 | * This contains encryption header content for ext4 | ||
7 | * | ||
8 | * Written by Michael Halcrow, 2015. | ||
9 | */ | ||
10 | |||
11 | #ifndef _EXT4_CRYPTO_H | ||
12 | #define _EXT4_CRYPTO_H | ||
13 | |||
14 | #include <linux/fs.h> | ||
15 | |||
16 | #define EXT4_KEY_DESCRIPTOR_SIZE 8 | ||
17 | |||
18 | /* Policy provided via an ioctl on the topmost directory */ | ||
19 | struct ext4_encryption_policy { | ||
20 | char version; | ||
21 | char contents_encryption_mode; | ||
22 | char filenames_encryption_mode; | ||
23 | char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE]; | ||
24 | } __attribute__((__packed__)); | ||
25 | |||
26 | #define EXT4_ENCRYPTION_CONTEXT_FORMAT_V1 1 | ||
27 | #define EXT4_KEY_DERIVATION_NONCE_SIZE 16 | ||
28 | |||
29 | /** | ||
30 | * Encryption context for inode | ||
31 | * | ||
32 | * Protector format: | ||
33 | * 1 byte: Protector format (1 = this version) | ||
34 | * 1 byte: File contents encryption mode | ||
35 | * 1 byte: File names encryption mode | ||
36 | * 1 byte: Reserved | ||
37 | * 8 bytes: Master Key descriptor | ||
38 | * 16 bytes: Encryption Key derivation nonce | ||
39 | */ | ||
40 | struct ext4_encryption_context { | ||
41 | char format; | ||
42 | char contents_encryption_mode; | ||
43 | char filenames_encryption_mode; | ||
44 | char reserved; | ||
45 | char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE]; | ||
46 | char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE]; | ||
47 | } __attribute__((__packed__)); | ||
48 | |||
49 | /* Encryption parameters */ | ||
50 | #define EXT4_XTS_TWEAK_SIZE 16 | ||
51 | #define EXT4_AES_128_ECB_KEY_SIZE 16 | ||
52 | #define EXT4_AES_256_GCM_KEY_SIZE 32 | ||
53 | #define EXT4_AES_256_CBC_KEY_SIZE 32 | ||
54 | #define EXT4_AES_256_CTS_KEY_SIZE 32 | ||
55 | #define EXT4_AES_256_XTS_KEY_SIZE 64 | ||
56 | #define EXT4_MAX_KEY_SIZE 64 | ||
57 | |||
58 | #define EXT4_KEY_DESC_PREFIX "ext4:" | ||
59 | #define EXT4_KEY_DESC_PREFIX_SIZE 5 | ||
60 | |||
61 | struct ext4_encryption_key { | ||
62 | uint32_t mode; | ||
63 | char raw[EXT4_MAX_KEY_SIZE]; | ||
64 | uint32_t size; | ||
65 | }; | ||
66 | |||
67 | #define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 | ||
68 | #define EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002 | ||
69 | |||
70 | struct ext4_crypto_ctx { | ||
71 | struct crypto_tfm *tfm; /* Crypto API context */ | ||
72 | struct page *bounce_page; /* Ciphertext page on write path */ | ||
73 | struct page *control_page; /* Original page on write path */ | ||
74 | struct bio *bio; /* The bio for this context */ | ||
75 | struct work_struct work; /* Work queue for read complete path */ | ||
76 | struct list_head free_list; /* Free list */ | ||
77 | int flags; /* Flags */ | ||
78 | int mode; /* Encryption mode for tfm */ | ||
79 | }; | ||
80 | |||
81 | struct ext4_completion_result { | ||
82 | struct completion completion; | ||
83 | int res; | ||
84 | }; | ||
85 | |||
86 | #define DECLARE_EXT4_COMPLETION_RESULT(ecr) \ | ||
87 | struct ext4_completion_result ecr = { \ | ||
88 | COMPLETION_INITIALIZER((ecr).completion), 0 } | ||
89 | |||
90 | static inline int ext4_encryption_key_size(int mode) | ||
91 | { | ||
92 | switch (mode) { | ||
93 | case EXT4_ENCRYPTION_MODE_AES_256_XTS: | ||
94 | return EXT4_AES_256_XTS_KEY_SIZE; | ||
95 | case EXT4_ENCRYPTION_MODE_AES_256_GCM: | ||
96 | return EXT4_AES_256_GCM_KEY_SIZE; | ||
97 | case EXT4_ENCRYPTION_MODE_AES_256_CBC: | ||
98 | return EXT4_AES_256_CBC_KEY_SIZE; | ||
99 | case EXT4_ENCRYPTION_MODE_AES_256_CTS: | ||
100 | return EXT4_AES_256_CTS_KEY_SIZE; | ||
101 | default: | ||
102 | BUG(); | ||
103 | } | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | #define EXT4_FNAME_NUM_SCATTER_ENTRIES 4 | ||
108 | #define EXT4_CRYPTO_BLOCK_SIZE 16 | ||
109 | #define EXT4_FNAME_CRYPTO_DIGEST_SIZE 32 | ||
110 | |||
111 | struct ext4_str { | ||
112 | unsigned char *name; | ||
113 | u32 len; | ||
114 | }; | ||
115 | |||
116 | struct ext4_fname_crypto_ctx { | ||
117 | u32 lim; | ||
118 | char tmp_buf[EXT4_CRYPTO_BLOCK_SIZE]; | ||
119 | struct crypto_ablkcipher *ctfm; | ||
120 | struct crypto_hash *htfm; | ||
121 | struct page *workpage; | ||
122 | struct ext4_encryption_key key; | ||
123 | unsigned has_valid_key : 1; | ||
124 | unsigned ctfm_key_is_ready : 1; | ||
125 | }; | ||
126 | |||
127 | /** | ||
128 | * For encrypted symlinks, the ciphertext length is stored at the beginning | ||
129 | * of the string in little-endian format. | ||
130 | */ | ||
131 | struct ext4_encrypted_symlink_data { | ||
132 | __le16 len; | ||
133 | char encrypted_path[1]; | ||
134 | } __attribute__((__packed__)); | ||
135 | |||
136 | /** | ||
137 | * This function is used to calculate the disk space required to | ||
138 | * store a filename of length l in encrypted symlink format. | ||
139 | */ | ||
140 | static inline u32 encrypted_symlink_data_len(u32 l) | ||
141 | { | ||
142 | if (l < EXT4_CRYPTO_BLOCK_SIZE) | ||
143 | l = EXT4_CRYPTO_BLOCK_SIZE; | ||
144 | return (l + sizeof(struct ext4_encrypted_symlink_data) - 1); | ||
145 | } | ||
146 | |||
147 | #endif /* _EXT4_CRYPTO_H */ | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bed43081720f..973816bfe4a9 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1717,12 +1717,6 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1717 | { | 1717 | { |
1718 | unsigned short ext1_ee_len, ext2_ee_len; | 1718 | unsigned short ext1_ee_len, ext2_ee_len; |
1719 | 1719 | ||
1720 | /* | ||
1721 | * Make sure that both extents are initialized. We don't merge | ||
1722 | * unwritten extents so that we can be sure that end_io code has | ||
1723 | * the extent that was written properly split out and conversion to | ||
1724 | * initialized is trivial. | ||
1725 | */ | ||
1726 | if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2)) | 1720 | if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2)) |
1727 | return 0; | 1721 | return 0; |
1728 | 1722 | ||
@@ -3128,6 +3122,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
3128 | ee_len = ext4_ext_get_actual_len(ex); | 3122 | ee_len = ext4_ext_get_actual_len(ex); |
3129 | ee_pblock = ext4_ext_pblock(ex); | 3123 | ee_pblock = ext4_ext_pblock(ex); |
3130 | 3124 | ||
3125 | if (ext4_encrypted_inode(inode)) | ||
3126 | return ext4_encrypted_zeroout(inode, ex); | ||
3127 | |||
3131 | ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); | 3128 | ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); |
3132 | if (ret > 0) | 3129 | if (ret > 0) |
3133 | ret = 0; | 3130 | ret = 0; |
@@ -4535,19 +4532,7 @@ got_allocated_blocks: | |||
4535 | */ | 4532 | */ |
4536 | reserved_clusters = get_reserved_cluster_alloc(inode, | 4533 | reserved_clusters = get_reserved_cluster_alloc(inode, |
4537 | map->m_lblk, allocated); | 4534 | map->m_lblk, allocated); |
4538 | if (map_from_cluster) { | 4535 | if (!map_from_cluster) { |
4539 | if (reserved_clusters) { | ||
4540 | /* | ||
4541 | * We have clusters reserved for this range. | ||
4542 | * But since we are not doing actual allocation | ||
4543 | * and are simply using blocks from previously | ||
4544 | * allocated cluster, we should release the | ||
4545 | * reservation and not claim quota. | ||
4546 | */ | ||
4547 | ext4_da_update_reserve_space(inode, | ||
4548 | reserved_clusters, 0); | ||
4549 | } | ||
4550 | } else { | ||
4551 | BUG_ON(allocated_clusters < reserved_clusters); | 4536 | BUG_ON(allocated_clusters < reserved_clusters); |
4552 | if (reserved_clusters < allocated_clusters) { | 4537 | if (reserved_clusters < allocated_clusters) { |
4553 | struct ext4_inode_info *ei = EXT4_I(inode); | 4538 | struct ext4_inode_info *ei = EXT4_I(inode); |
@@ -4803,12 +4788,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4803 | else | 4788 | else |
4804 | max_blocks -= lblk; | 4789 | max_blocks -= lblk; |
4805 | 4790 | ||
4806 | flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT | | ||
4807 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | | ||
4808 | EXT4_EX_NOCACHE; | ||
4809 | if (mode & FALLOC_FL_KEEP_SIZE) | ||
4810 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | ||
4811 | |||
4812 | mutex_lock(&inode->i_mutex); | 4791 | mutex_lock(&inode->i_mutex); |
4813 | 4792 | ||
4814 | /* | 4793 | /* |
@@ -4825,15 +4804,28 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4825 | ret = inode_newsize_ok(inode, new_size); | 4804 | ret = inode_newsize_ok(inode, new_size); |
4826 | if (ret) | 4805 | if (ret) |
4827 | goto out_mutex; | 4806 | goto out_mutex; |
4828 | /* | ||
4829 | * If we have a partial block after EOF we have to allocate | ||
4830 | * the entire block. | ||
4831 | */ | ||
4832 | if (partial_end) | ||
4833 | max_blocks += 1; | ||
4834 | } | 4807 | } |
4835 | 4808 | ||
4809 | flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; | ||
4810 | if (mode & FALLOC_FL_KEEP_SIZE) | ||
4811 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | ||
4812 | |||
4813 | /* Preallocate the range including the unaligned edges */ | ||
4814 | if (partial_begin || partial_end) { | ||
4815 | ret = ext4_alloc_file_blocks(file, | ||
4816 | round_down(offset, 1 << blkbits) >> blkbits, | ||
4817 | (round_up((offset + len), 1 << blkbits) - | ||
4818 | round_down(offset, 1 << blkbits)) >> blkbits, | ||
4819 | new_size, flags, mode); | ||
4820 | if (ret) | ||
4821 | goto out_mutex; | ||
4822 | |||
4823 | } | ||
4824 | |||
4825 | /* Zero range excluding the unaligned edges */ | ||
4836 | if (max_blocks > 0) { | 4826 | if (max_blocks > 0) { |
4827 | flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | | ||
4828 | EXT4_EX_NOCACHE); | ||
4837 | 4829 | ||
4838 | /* Now release the pages and zero block aligned part of pages*/ | 4830 | /* Now release the pages and zero block aligned part of pages*/ |
4839 | truncate_pagecache_range(inode, start, end - 1); | 4831 | truncate_pagecache_range(inode, start, end - 1); |
@@ -4847,19 +4839,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, | |||
4847 | flags, mode); | 4839 | flags, mode); |
4848 | if (ret) | 4840 | if (ret) |
4849 | goto out_dio; | 4841 | goto out_dio; |
4850 | /* | ||
4851 | * Remove entire range from the extent status tree. | ||
4852 | * | ||
4853 | * ext4_es_remove_extent(inode, lblk, max_blocks) is | ||
4854 | * NOT sufficient. I'm not sure why this is the case, | ||
4855 | * but let's be conservative and remove the extent | ||
4856 | * status tree for the entire inode. There should be | ||
4857 | * no outstanding delalloc extents thanks to the | ||
4858 | * filemap_write_and_wait_range() call above. | ||
4859 | */ | ||
4860 | ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); | ||
4861 | if (ret) | ||
4862 | goto out_dio; | ||
4863 | } | 4842 | } |
4864 | if (!partial_begin && !partial_end) | 4843 | if (!partial_begin && !partial_end) |
4865 | goto out_dio; | 4844 | goto out_dio; |
@@ -4922,6 +4901,20 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
4922 | ext4_lblk_t lblk; | 4901 | ext4_lblk_t lblk; |
4923 | unsigned int blkbits = inode->i_blkbits; | 4902 | unsigned int blkbits = inode->i_blkbits; |
4924 | 4903 | ||
4904 | /* | ||
4905 | * Encrypted inodes can't handle collapse range or insert | ||
4906 | * range since we would need to re-encrypt blocks with a | ||
4907 | * different IV or XTS tweak (which are based on the logical | ||
4908 | * block number). | ||
4909 | * | ||
4910 | * XXX It's not clear why zero range isn't working, but we'll | ||
4911 | * leave it disabled for encrypted inodes for now. This is a | ||
4912 | * bug we should fix.... | ||
4913 | */ | ||
4914 | if (ext4_encrypted_inode(inode) && | ||
4915 | (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))) | ||
4916 | return -EOPNOTSUPP; | ||
4917 | |||
4925 | /* Return error if mode is not supported */ | 4918 | /* Return error if mode is not supported */ |
4926 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | | 4919 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | |
4927 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) | 4920 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) |
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index e04d45733976..d33d5a6852b9 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -9,12 +9,10 @@ | |||
9 | * | 9 | * |
10 | * Ext4 extents status tree core functions. | 10 | * Ext4 extents status tree core functions. |
11 | */ | 11 | */ |
12 | #include <linux/rbtree.h> | ||
13 | #include <linux/list_sort.h> | 12 | #include <linux/list_sort.h> |
14 | #include <linux/proc_fs.h> | 13 | #include <linux/proc_fs.h> |
15 | #include <linux/seq_file.h> | 14 | #include <linux/seq_file.h> |
16 | #include "ext4.h" | 15 | #include "ext4.h" |
17 | #include "extents_status.h" | ||
18 | 16 | ||
19 | #include <trace/events/ext4.h> | 17 | #include <trace/events/ext4.h> |
20 | 18 | ||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index e576d682b353..0613c256c344 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -20,7 +20,6 @@ | |||
20 | 20 | ||
21 | #include <linux/time.h> | 21 | #include <linux/time.h> |
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/jbd2.h> | ||
24 | #include <linux/mount.h> | 23 | #include <linux/mount.h> |
25 | #include <linux/path.h> | 24 | #include <linux/path.h> |
26 | #include <linux/quotaops.h> | 25 | #include <linux/quotaops.h> |
@@ -221,6 +220,13 @@ static const struct vm_operations_struct ext4_file_vm_ops = { | |||
221 | 220 | ||
222 | static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | 221 | static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) |
223 | { | 222 | { |
223 | struct inode *inode = file->f_mapping->host; | ||
224 | |||
225 | if (ext4_encrypted_inode(inode)) { | ||
226 | int err = ext4_generate_encryption_key(inode); | ||
227 | if (err) | ||
228 | return 0; | ||
229 | } | ||
224 | file_accessed(file); | 230 | file_accessed(file); |
225 | if (IS_DAX(file_inode(file))) { | 231 | if (IS_DAX(file_inode(file))) { |
226 | vma->vm_ops = &ext4_dax_vm_ops; | 232 | vma->vm_ops = &ext4_dax_vm_ops; |
@@ -238,6 +244,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
238 | struct vfsmount *mnt = filp->f_path.mnt; | 244 | struct vfsmount *mnt = filp->f_path.mnt; |
239 | struct path path; | 245 | struct path path; |
240 | char buf[64], *cp; | 246 | char buf[64], *cp; |
247 | int ret; | ||
241 | 248 | ||
242 | if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) && | 249 | if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) && |
243 | !(sb->s_flags & MS_RDONLY))) { | 250 | !(sb->s_flags & MS_RDONLY))) { |
@@ -276,11 +283,17 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
276 | * writing and the journal is present | 283 | * writing and the journal is present |
277 | */ | 284 | */ |
278 | if (filp->f_mode & FMODE_WRITE) { | 285 | if (filp->f_mode & FMODE_WRITE) { |
279 | int ret = ext4_inode_attach_jinode(inode); | 286 | ret = ext4_inode_attach_jinode(inode); |
280 | if (ret < 0) | 287 | if (ret < 0) |
281 | return ret; | 288 | return ret; |
282 | } | 289 | } |
283 | return dquot_file_open(inode, filp); | 290 | ret = dquot_file_open(inode, filp); |
291 | if (!ret && ext4_encrypted_inode(inode)) { | ||
292 | ret = ext4_generate_encryption_key(inode); | ||
293 | if (ret) | ||
294 | ret = -EACCES; | ||
295 | } | ||
296 | return ret; | ||
284 | } | 297 | } |
285 | 298 | ||
286 | /* | 299 | /* |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a8bc47f75fa0..e9d632e9aa4b 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
29 | #include <linux/jbd2.h> | ||
30 | #include <linux/blkdev.h> | 29 | #include <linux/blkdev.h> |
31 | 30 | ||
32 | #include "ext4.h" | 31 | #include "ext4.h" |
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 3d586f02883e..e026aa941fd5 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c | |||
@@ -10,7 +10,6 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
13 | #include <linux/jbd2.h> | ||
14 | #include <linux/cryptohash.h> | 13 | #include <linux/cryptohash.h> |
15 | #include "ext4.h" | 14 | #include "ext4.h" |
16 | 15 | ||
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index ac644c31ca67..2cf18a2d5c72 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -14,7 +14,6 @@ | |||
14 | 14 | ||
15 | #include <linux/time.h> | 15 | #include <linux/time.h> |
16 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
17 | #include <linux/jbd2.h> | ||
18 | #include <linux/stat.h> | 17 | #include <linux/stat.h> |
19 | #include <linux/string.h> | 18 | #include <linux/string.h> |
20 | #include <linux/quotaops.h> | 19 | #include <linux/quotaops.h> |
@@ -997,6 +996,12 @@ got: | |||
997 | ei->i_block_group = group; | 996 | ei->i_block_group = group; |
998 | ei->i_last_alloc_group = ~0; | 997 | ei->i_last_alloc_group = ~0; |
999 | 998 | ||
999 | /* If the directory encrypted, then we should encrypt the inode. */ | ||
1000 | if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) && | ||
1001 | (ext4_encrypted_inode(dir) || | ||
1002 | DUMMY_ENCRYPTION_ENABLED(sbi))) | ||
1003 | ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); | ||
1004 | |||
1000 | ext4_set_inode_flags(inode); | 1005 | ext4_set_inode_flags(inode); |
1001 | if (IS_DIRSYNC(inode)) | 1006 | if (IS_DIRSYNC(inode)) |
1002 | ext4_handle_sync(handle); | 1007 | ext4_handle_sync(handle); |
@@ -1029,11 +1034,28 @@ got: | |||
1029 | ext4_set_inode_state(inode, EXT4_STATE_NEW); | 1034 | ext4_set_inode_state(inode, EXT4_STATE_NEW); |
1030 | 1035 | ||
1031 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; | 1036 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; |
1032 | 1037 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | |
1038 | if ((sbi->s_file_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID) && | ||
1039 | (sbi->s_dir_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID)) { | ||
1040 | ei->i_inline_off = 0; | ||
1041 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
1042 | EXT4_FEATURE_INCOMPAT_INLINE_DATA)) | ||
1043 | ext4_set_inode_state(inode, | ||
1044 | EXT4_STATE_MAY_INLINE_DATA); | ||
1045 | } else { | ||
1046 | /* Inline data and encryption are incompatible | ||
1047 | * We turn off inline data since encryption is enabled */ | ||
1048 | ei->i_inline_off = 1; | ||
1049 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
1050 | EXT4_FEATURE_INCOMPAT_INLINE_DATA)) | ||
1051 | ext4_clear_inode_state(inode, | ||
1052 | EXT4_STATE_MAY_INLINE_DATA); | ||
1053 | } | ||
1054 | #else | ||
1033 | ei->i_inline_off = 0; | 1055 | ei->i_inline_off = 0; |
1034 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA)) | 1056 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA)) |
1035 | ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); | 1057 | ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); |
1036 | 1058 | #endif | |
1037 | ret = inode; | 1059 | ret = inode; |
1038 | err = dquot_alloc_inode(inode); | 1060 | err = dquot_alloc_inode(inode); |
1039 | if (err) | 1061 | if (err) |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 4b143febf21f..feb2cafbeace 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
@@ -11,11 +11,13 @@ | |||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | */ | 13 | */ |
14 | |||
15 | #include <linux/fiemap.h> | ||
16 | |||
14 | #include "ext4_jbd2.h" | 17 | #include "ext4_jbd2.h" |
15 | #include "ext4.h" | 18 | #include "ext4.h" |
16 | #include "xattr.h" | 19 | #include "xattr.h" |
17 | #include "truncate.h" | 20 | #include "truncate.h" |
18 | #include <linux/fiemap.h> | ||
19 | 21 | ||
20 | #define EXT4_XATTR_SYSTEM_DATA "data" | 22 | #define EXT4_XATTR_SYSTEM_DATA "data" |
21 | #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) | 23 | #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) |
@@ -972,7 +974,7 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, | |||
972 | offset = 0; | 974 | offset = 0; |
973 | while ((void *)de < dlimit) { | 975 | while ((void *)de < dlimit) { |
974 | de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); | 976 | de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); |
975 | trace_printk("de: off %u rlen %u name %*.s nlen %u ino %u\n", | 977 | trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n", |
976 | offset, de_len, de->name_len, de->name, | 978 | offset, de_len, de->name_len, de->name, |
977 | de->name_len, le32_to_cpu(de->inode)); | 979 | de->name_len, le32_to_cpu(de->inode)); |
978 | if (ext4_check_dir_entry(dir, NULL, de, bh, | 980 | if (ext4_check_dir_entry(dir, NULL, de, bh, |
@@ -1014,7 +1016,8 @@ static int ext4_add_dirent_to_inline(handle_t *handle, | |||
1014 | err = ext4_journal_get_write_access(handle, iloc->bh); | 1016 | err = ext4_journal_get_write_access(handle, iloc->bh); |
1015 | if (err) | 1017 | if (err) |
1016 | return err; | 1018 | return err; |
1017 | ext4_insert_dentry(inode, de, inline_size, name, namelen); | 1019 | ext4_insert_dentry(dir, inode, de, inline_size, &dentry->d_name, |
1020 | name, namelen); | ||
1018 | 1021 | ||
1019 | ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); | 1022 | ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); |
1020 | 1023 | ||
@@ -1327,6 +1330,7 @@ int htree_inlinedir_to_tree(struct file *dir_file, | |||
1327 | struct ext4_iloc iloc; | 1330 | struct ext4_iloc iloc; |
1328 | void *dir_buf = NULL; | 1331 | void *dir_buf = NULL; |
1329 | struct ext4_dir_entry_2 fake; | 1332 | struct ext4_dir_entry_2 fake; |
1333 | struct ext4_str tmp_str; | ||
1330 | 1334 | ||
1331 | ret = ext4_get_inode_loc(inode, &iloc); | 1335 | ret = ext4_get_inode_loc(inode, &iloc); |
1332 | if (ret) | 1336 | if (ret) |
@@ -1398,8 +1402,10 @@ int htree_inlinedir_to_tree(struct file *dir_file, | |||
1398 | continue; | 1402 | continue; |
1399 | if (de->inode == 0) | 1403 | if (de->inode == 0) |
1400 | continue; | 1404 | continue; |
1401 | err = ext4_htree_store_dirent(dir_file, | 1405 | tmp_str.name = de->name; |
1402 | hinfo->hash, hinfo->minor_hash, de); | 1406 | tmp_str.len = de->name_len; |
1407 | err = ext4_htree_store_dirent(dir_file, hinfo->hash, | ||
1408 | hinfo->minor_hash, de, &tmp_str); | ||
1403 | if (err) { | 1409 | if (err) { |
1404 | count = err; | 1410 | count = err; |
1405 | goto out; | 1411 | goto out; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b49cf6e59953..366476e71e10 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -20,7 +20,6 @@ | |||
20 | 20 | ||
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/time.h> | 22 | #include <linux/time.h> |
23 | #include <linux/jbd2.h> | ||
24 | #include <linux/highuid.h> | 23 | #include <linux/highuid.h> |
25 | #include <linux/pagemap.h> | 24 | #include <linux/pagemap.h> |
26 | #include <linux/quotaops.h> | 25 | #include <linux/quotaops.h> |
@@ -36,7 +35,6 @@ | |||
36 | #include <linux/kernel.h> | 35 | #include <linux/kernel.h> |
37 | #include <linux/printk.h> | 36 | #include <linux/printk.h> |
38 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
39 | #include <linux/ratelimit.h> | ||
40 | #include <linux/bitops.h> | 38 | #include <linux/bitops.h> |
41 | 39 | ||
42 | #include "ext4_jbd2.h" | 40 | #include "ext4_jbd2.h" |
@@ -140,7 +138,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, | |||
140 | /* | 138 | /* |
141 | * Test whether an inode is a fast symlink. | 139 | * Test whether an inode is a fast symlink. |
142 | */ | 140 | */ |
143 | static int ext4_inode_is_fast_symlink(struct inode *inode) | 141 | int ext4_inode_is_fast_symlink(struct inode *inode) |
144 | { | 142 | { |
145 | int ea_blocks = EXT4_I(inode)->i_file_acl ? | 143 | int ea_blocks = EXT4_I(inode)->i_file_acl ? |
146 | EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; | 144 | EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; |
@@ -887,6 +885,95 @@ int do_journal_get_write_access(handle_t *handle, | |||
887 | 885 | ||
888 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, | 886 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, |
889 | struct buffer_head *bh_result, int create); | 887 | struct buffer_head *bh_result, int create); |
888 | |||
889 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
890 | static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, | ||
891 | get_block_t *get_block) | ||
892 | { | ||
893 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | ||
894 | unsigned to = from + len; | ||
895 | struct inode *inode = page->mapping->host; | ||
896 | unsigned block_start, block_end; | ||
897 | sector_t block; | ||
898 | int err = 0; | ||
899 | unsigned blocksize = inode->i_sb->s_blocksize; | ||
900 | unsigned bbits; | ||
901 | struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; | ||
902 | bool decrypt = false; | ||
903 | |||
904 | BUG_ON(!PageLocked(page)); | ||
905 | BUG_ON(from > PAGE_CACHE_SIZE); | ||
906 | BUG_ON(to > PAGE_CACHE_SIZE); | ||
907 | BUG_ON(from > to); | ||
908 | |||
909 | if (!page_has_buffers(page)) | ||
910 | create_empty_buffers(page, blocksize, 0); | ||
911 | head = page_buffers(page); | ||
912 | bbits = ilog2(blocksize); | ||
913 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); | ||
914 | |||
915 | for (bh = head, block_start = 0; bh != head || !block_start; | ||
916 | block++, block_start = block_end, bh = bh->b_this_page) { | ||
917 | block_end = block_start + blocksize; | ||
918 | if (block_end <= from || block_start >= to) { | ||
919 | if (PageUptodate(page)) { | ||
920 | if (!buffer_uptodate(bh)) | ||
921 | set_buffer_uptodate(bh); | ||
922 | } | ||
923 | continue; | ||
924 | } | ||
925 | if (buffer_new(bh)) | ||
926 | clear_buffer_new(bh); | ||
927 | if (!buffer_mapped(bh)) { | ||
928 | WARN_ON(bh->b_size != blocksize); | ||
929 | err = get_block(inode, block, bh, 1); | ||
930 | if (err) | ||
931 | break; | ||
932 | if (buffer_new(bh)) { | ||
933 | unmap_underlying_metadata(bh->b_bdev, | ||
934 | bh->b_blocknr); | ||
935 | if (PageUptodate(page)) { | ||
936 | clear_buffer_new(bh); | ||
937 | set_buffer_uptodate(bh); | ||
938 | mark_buffer_dirty(bh); | ||
939 | continue; | ||
940 | } | ||
941 | if (block_end > to || block_start < from) | ||
942 | zero_user_segments(page, to, block_end, | ||
943 | block_start, from); | ||
944 | continue; | ||
945 | } | ||
946 | } | ||
947 | if (PageUptodate(page)) { | ||
948 | if (!buffer_uptodate(bh)) | ||
949 | set_buffer_uptodate(bh); | ||
950 | continue; | ||
951 | } | ||
952 | if (!buffer_uptodate(bh) && !buffer_delay(bh) && | ||
953 | !buffer_unwritten(bh) && | ||
954 | (block_start < from || block_end > to)) { | ||
955 | ll_rw_block(READ, 1, &bh); | ||
956 | *wait_bh++ = bh; | ||
957 | decrypt = ext4_encrypted_inode(inode) && | ||
958 | S_ISREG(inode->i_mode); | ||
959 | } | ||
960 | } | ||
961 | /* | ||
962 | * If we issued read requests, let them complete. | ||
963 | */ | ||
964 | while (wait_bh > wait) { | ||
965 | wait_on_buffer(*--wait_bh); | ||
966 | if (!buffer_uptodate(*wait_bh)) | ||
967 | err = -EIO; | ||
968 | } | ||
969 | if (unlikely(err)) | ||
970 | page_zero_new_buffers(page, from, to); | ||
971 | else if (decrypt) | ||
972 | err = ext4_decrypt_one(inode, page); | ||
973 | return err; | ||
974 | } | ||
975 | #endif | ||
976 | |||
890 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 977 | static int ext4_write_begin(struct file *file, struct address_space *mapping, |
891 | loff_t pos, unsigned len, unsigned flags, | 978 | loff_t pos, unsigned len, unsigned flags, |
892 | struct page **pagep, void **fsdata) | 979 | struct page **pagep, void **fsdata) |
@@ -949,11 +1036,19 @@ retry_journal: | |||
949 | /* In case writeback began while the page was unlocked */ | 1036 | /* In case writeback began while the page was unlocked */ |
950 | wait_for_stable_page(page); | 1037 | wait_for_stable_page(page); |
951 | 1038 | ||
1039 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1040 | if (ext4_should_dioread_nolock(inode)) | ||
1041 | ret = ext4_block_write_begin(page, pos, len, | ||
1042 | ext4_get_block_write); | ||
1043 | else | ||
1044 | ret = ext4_block_write_begin(page, pos, len, | ||
1045 | ext4_get_block); | ||
1046 | #else | ||
952 | if (ext4_should_dioread_nolock(inode)) | 1047 | if (ext4_should_dioread_nolock(inode)) |
953 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); | 1048 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); |
954 | else | 1049 | else |
955 | ret = __block_write_begin(page, pos, len, ext4_get_block); | 1050 | ret = __block_write_begin(page, pos, len, ext4_get_block); |
956 | 1051 | #endif | |
957 | if (!ret && ext4_should_journal_data(inode)) { | 1052 | if (!ret && ext4_should_journal_data(inode)) { |
958 | ret = ext4_walk_page_buffers(handle, page_buffers(page), | 1053 | ret = ext4_walk_page_buffers(handle, page_buffers(page), |
959 | from, to, NULL, | 1054 | from, to, NULL, |
@@ -2575,7 +2670,12 @@ retry_journal: | |||
2575 | /* In case writeback began while the page was unlocked */ | 2670 | /* In case writeback began while the page was unlocked */ |
2576 | wait_for_stable_page(page); | 2671 | wait_for_stable_page(page); |
2577 | 2672 | ||
2673 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
2674 | ret = ext4_block_write_begin(page, pos, len, | ||
2675 | ext4_da_get_block_prep); | ||
2676 | #else | ||
2578 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); | 2677 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); |
2678 | #endif | ||
2579 | if (ret < 0) { | 2679 | if (ret < 0) { |
2580 | unlock_page(page); | 2680 | unlock_page(page); |
2581 | ext4_journal_stop(handle); | 2681 | ext4_journal_stop(handle); |
@@ -2821,7 +2921,7 @@ static int ext4_readpage(struct file *file, struct page *page) | |||
2821 | ret = ext4_readpage_inline(inode, page); | 2921 | ret = ext4_readpage_inline(inode, page); |
2822 | 2922 | ||
2823 | if (ret == -EAGAIN) | 2923 | if (ret == -EAGAIN) |
2824 | return mpage_readpage(page, ext4_get_block); | 2924 | return ext4_mpage_readpages(page->mapping, NULL, page, 1); |
2825 | 2925 | ||
2826 | return ret; | 2926 | return ret; |
2827 | } | 2927 | } |
@@ -2836,7 +2936,7 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
2836 | if (ext4_has_inline_data(inode)) | 2936 | if (ext4_has_inline_data(inode)) |
2837 | return 0; | 2937 | return 0; |
2838 | 2938 | ||
2839 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 2939 | return ext4_mpage_readpages(mapping, pages, NULL, nr_pages); |
2840 | } | 2940 | } |
2841 | 2941 | ||
2842 | static void ext4_invalidatepage(struct page *page, unsigned int offset, | 2942 | static void ext4_invalidatepage(struct page *page, unsigned int offset, |
@@ -3033,6 +3133,9 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
3033 | get_block_func = ext4_get_block_write; | 3133 | get_block_func = ext4_get_block_write; |
3034 | dio_flags = DIO_LOCKING; | 3134 | dio_flags = DIO_LOCKING; |
3035 | } | 3135 | } |
3136 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
3137 | BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); | ||
3138 | #endif | ||
3036 | if (IS_DAX(inode)) | 3139 | if (IS_DAX(inode)) |
3037 | ret = dax_do_io(iocb, inode, iter, offset, get_block_func, | 3140 | ret = dax_do_io(iocb, inode, iter, offset, get_block_func, |
3038 | ext4_end_io_dio, dio_flags); | 3141 | ext4_end_io_dio, dio_flags); |
@@ -3097,6 +3200,11 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
3097 | size_t count = iov_iter_count(iter); | 3200 | size_t count = iov_iter_count(iter); |
3098 | ssize_t ret; | 3201 | ssize_t ret; |
3099 | 3202 | ||
3203 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
3204 | if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) | ||
3205 | return 0; | ||
3206 | #endif | ||
3207 | |||
3100 | /* | 3208 | /* |
3101 | * If we are doing data journalling we don't support O_DIRECT | 3209 | * If we are doing data journalling we don't support O_DIRECT |
3102 | */ | 3210 | */ |
@@ -3261,6 +3369,13 @@ static int __ext4_block_zero_page_range(handle_t *handle, | |||
3261 | /* Uhhuh. Read error. Complain and punt. */ | 3369 | /* Uhhuh. Read error. Complain and punt. */ |
3262 | if (!buffer_uptodate(bh)) | 3370 | if (!buffer_uptodate(bh)) |
3263 | goto unlock; | 3371 | goto unlock; |
3372 | if (S_ISREG(inode->i_mode) && | ||
3373 | ext4_encrypted_inode(inode)) { | ||
3374 | /* We expect the key to be set. */ | ||
3375 | BUG_ON(!ext4_has_encryption_key(inode)); | ||
3376 | BUG_ON(blocksize != PAGE_CACHE_SIZE); | ||
3377 | WARN_ON_ONCE(ext4_decrypt_one(inode, page)); | ||
3378 | } | ||
3264 | } | 3379 | } |
3265 | if (ext4_should_journal_data(inode)) { | 3380 | if (ext4_should_journal_data(inode)) { |
3266 | BUFFER_TRACE(bh, "get write access"); | 3381 | BUFFER_TRACE(bh, "get write access"); |
@@ -4096,7 +4211,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4096 | inode->i_op = &ext4_dir_inode_operations; | 4211 | inode->i_op = &ext4_dir_inode_operations; |
4097 | inode->i_fop = &ext4_dir_operations; | 4212 | inode->i_fop = &ext4_dir_operations; |
4098 | } else if (S_ISLNK(inode->i_mode)) { | 4213 | } else if (S_ISLNK(inode->i_mode)) { |
4099 | if (ext4_inode_is_fast_symlink(inode)) { | 4214 | if (ext4_inode_is_fast_symlink(inode) && |
4215 | !ext4_encrypted_inode(inode)) { | ||
4100 | inode->i_op = &ext4_fast_symlink_inode_operations; | 4216 | inode->i_op = &ext4_fast_symlink_inode_operations; |
4101 | nd_terminate_link(ei->i_data, inode->i_size, | 4217 | nd_terminate_link(ei->i_data, inode->i_size, |
4102 | sizeof(ei->i_data) - 1); | 4218 | sizeof(ei->i_data) - 1); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f58a0d106726..2cb9e178d1c5 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -8,12 +8,12 @@ | |||
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <linux/jbd2.h> | ||
12 | #include <linux/capability.h> | 11 | #include <linux/capability.h> |
13 | #include <linux/time.h> | 12 | #include <linux/time.h> |
14 | #include <linux/compat.h> | 13 | #include <linux/compat.h> |
15 | #include <linux/mount.h> | 14 | #include <linux/mount.h> |
16 | #include <linux/file.h> | 15 | #include <linux/file.h> |
16 | #include <linux/random.h> | ||
17 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
18 | #include "ext4_jbd2.h" | 18 | #include "ext4_jbd2.h" |
19 | #include "ext4.h" | 19 | #include "ext4.h" |
@@ -196,6 +196,16 @@ journal_err_out: | |||
196 | return err; | 196 | return err; |
197 | } | 197 | } |
198 | 198 | ||
199 | static int uuid_is_zero(__u8 u[16]) | ||
200 | { | ||
201 | int i; | ||
202 | |||
203 | for (i = 0; i < 16; i++) | ||
204 | if (u[i]) | ||
205 | return 0; | ||
206 | return 1; | ||
207 | } | ||
208 | |||
199 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 209 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
200 | { | 210 | { |
201 | struct inode *inode = file_inode(filp); | 211 | struct inode *inode = file_inode(filp); |
@@ -615,7 +625,78 @@ resizefs_out: | |||
615 | } | 625 | } |
616 | case EXT4_IOC_PRECACHE_EXTENTS: | 626 | case EXT4_IOC_PRECACHE_EXTENTS: |
617 | return ext4_ext_precache(inode); | 627 | return ext4_ext_precache(inode); |
628 | case EXT4_IOC_SET_ENCRYPTION_POLICY: { | ||
629 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
630 | struct ext4_encryption_policy policy; | ||
631 | int err = 0; | ||
632 | |||
633 | if (copy_from_user(&policy, | ||
634 | (struct ext4_encryption_policy __user *)arg, | ||
635 | sizeof(policy))) { | ||
636 | err = -EFAULT; | ||
637 | goto encryption_policy_out; | ||
638 | } | ||
618 | 639 | ||
640 | err = ext4_process_policy(&policy, inode); | ||
641 | encryption_policy_out: | ||
642 | return err; | ||
643 | #else | ||
644 | return -EOPNOTSUPP; | ||
645 | #endif | ||
646 | } | ||
647 | case EXT4_IOC_GET_ENCRYPTION_PWSALT: { | ||
648 | int err, err2; | ||
649 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
650 | handle_t *handle; | ||
651 | |||
652 | if (!ext4_sb_has_crypto(sb)) | ||
653 | return -EOPNOTSUPP; | ||
654 | if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) { | ||
655 | err = mnt_want_write_file(filp); | ||
656 | if (err) | ||
657 | return err; | ||
658 | handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); | ||
659 | if (IS_ERR(handle)) { | ||
660 | err = PTR_ERR(handle); | ||
661 | goto pwsalt_err_exit; | ||
662 | } | ||
663 | err = ext4_journal_get_write_access(handle, sbi->s_sbh); | ||
664 | if (err) | ||
665 | goto pwsalt_err_journal; | ||
666 | generate_random_uuid(sbi->s_es->s_encrypt_pw_salt); | ||
667 | err = ext4_handle_dirty_metadata(handle, NULL, | ||
668 | sbi->s_sbh); | ||
669 | pwsalt_err_journal: | ||
670 | err2 = ext4_journal_stop(handle); | ||
671 | if (err2 && !err) | ||
672 | err = err2; | ||
673 | pwsalt_err_exit: | ||
674 | mnt_drop_write_file(filp); | ||
675 | if (err) | ||
676 | return err; | ||
677 | } | ||
678 | if (copy_to_user((void *) arg, sbi->s_es->s_encrypt_pw_salt, | ||
679 | 16)) | ||
680 | return -EFAULT; | ||
681 | return 0; | ||
682 | } | ||
683 | case EXT4_IOC_GET_ENCRYPTION_POLICY: { | ||
684 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
685 | struct ext4_encryption_policy policy; | ||
686 | int err = 0; | ||
687 | |||
688 | if (!ext4_encrypted_inode(inode)) | ||
689 | return -ENOENT; | ||
690 | err = ext4_get_policy(inode, &policy); | ||
691 | if (err) | ||
692 | return err; | ||
693 | if (copy_to_user((void *)arg, &policy, sizeof(policy))) | ||
694 | return -EFAULT; | ||
695 | return 0; | ||
696 | #else | ||
697 | return -EOPNOTSUPP; | ||
698 | #endif | ||
699 | } | ||
619 | default: | 700 | default: |
620 | return -ENOTTY; | 701 | return -ENOTTY; |
621 | } | 702 | } |
@@ -680,6 +761,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
680 | case FITRIM: | 761 | case FITRIM: |
681 | case EXT4_IOC_RESIZE_FS: | 762 | case EXT4_IOC_RESIZE_FS: |
682 | case EXT4_IOC_PRECACHE_EXTENTS: | 763 | case EXT4_IOC_PRECACHE_EXTENTS: |
764 | case EXT4_IOC_SET_ENCRYPTION_POLICY: | ||
765 | case EXT4_IOC_GET_ENCRYPTION_PWSALT: | ||
766 | case EXT4_IOC_GET_ENCRYPTION_POLICY: | ||
683 | break; | 767 | break; |
684 | default: | 768 | default: |
685 | return -ENOIOCTLCMD; | 769 | return -ENOIOCTLCMD; |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2291923dae4e..ef22cd951c0c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -26,7 +26,6 @@ | |||
26 | 26 | ||
27 | #include <linux/fs.h> | 27 | #include <linux/fs.h> |
28 | #include <linux/pagemap.h> | 28 | #include <linux/pagemap.h> |
29 | #include <linux/jbd2.h> | ||
30 | #include <linux/time.h> | 29 | #include <linux/time.h> |
31 | #include <linux/fcntl.h> | 30 | #include <linux/fcntl.h> |
32 | #include <linux/stat.h> | 31 | #include <linux/stat.h> |
@@ -254,8 +253,9 @@ static struct dx_frame *dx_probe(const struct qstr *d_name, | |||
254 | struct dx_hash_info *hinfo, | 253 | struct dx_hash_info *hinfo, |
255 | struct dx_frame *frame); | 254 | struct dx_frame *frame); |
256 | static void dx_release(struct dx_frame *frames); | 255 | static void dx_release(struct dx_frame *frames); |
257 | static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, | 256 | static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, |
258 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); | 257 | unsigned blocksize, struct dx_hash_info *hinfo, |
258 | struct dx_map_entry map[]); | ||
259 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); | 259 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); |
260 | static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, | 260 | static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, |
261 | struct dx_map_entry *offsets, int count, unsigned blocksize); | 261 | struct dx_map_entry *offsets, int count, unsigned blocksize); |
@@ -586,8 +586,10 @@ struct stats | |||
586 | unsigned bcount; | 586 | unsigned bcount; |
587 | }; | 587 | }; |
588 | 588 | ||
589 | static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de, | 589 | static struct stats dx_show_leaf(struct inode *dir, |
590 | int size, int show_names) | 590 | struct dx_hash_info *hinfo, |
591 | struct ext4_dir_entry_2 *de, | ||
592 | int size, int show_names) | ||
591 | { | 593 | { |
592 | unsigned names = 0, space = 0; | 594 | unsigned names = 0, space = 0; |
593 | char *base = (char *) de; | 595 | char *base = (char *) de; |
@@ -600,12 +602,80 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent | |||
600 | { | 602 | { |
601 | if (show_names) | 603 | if (show_names) |
602 | { | 604 | { |
605 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
606 | int len; | ||
607 | char *name; | ||
608 | struct ext4_str fname_crypto_str | ||
609 | = {.name = NULL, .len = 0}; | ||
610 | struct ext4_fname_crypto_ctx *ctx = NULL; | ||
611 | int res; | ||
612 | |||
613 | name = de->name; | ||
614 | len = de->name_len; | ||
615 | ctx = ext4_get_fname_crypto_ctx(dir, | ||
616 | EXT4_NAME_LEN); | ||
617 | if (IS_ERR(ctx)) { | ||
618 | printk(KERN_WARNING "Error acquiring" | ||
619 | " crypto ctxt--skipping crypto\n"); | ||
620 | ctx = NULL; | ||
621 | } | ||
622 | if (ctx == NULL) { | ||
623 | /* Directory is not encrypted */ | ||
624 | ext4fs_dirhash(de->name, | ||
625 | de->name_len, &h); | ||
626 | printk("%*.s:(U)%x.%u ", len, | ||
627 | name, h.hash, | ||
628 | (unsigned) ((char *) de | ||
629 | - base)); | ||
630 | } else { | ||
631 | /* Directory is encrypted */ | ||
632 | res = ext4_fname_crypto_alloc_buffer( | ||
633 | ctx, de->name_len, | ||
634 | &fname_crypto_str); | ||
635 | if (res < 0) { | ||
636 | printk(KERN_WARNING "Error " | ||
637 | "allocating crypto " | ||
638 | "buffer--skipping " | ||
639 | "crypto\n"); | ||
640 | ext4_put_fname_crypto_ctx(&ctx); | ||
641 | ctx = NULL; | ||
642 | } | ||
643 | res = ext4_fname_disk_to_usr(ctx, de, | ||
644 | &fname_crypto_str); | ||
645 | if (res < 0) { | ||
646 | printk(KERN_WARNING "Error " | ||
647 | "converting filename " | ||
648 | "from disk to usr" | ||
649 | "\n"); | ||
650 | name = "??"; | ||
651 | len = 2; | ||
652 | } else { | ||
653 | name = fname_crypto_str.name; | ||
654 | len = fname_crypto_str.len; | ||
655 | } | ||
656 | res = ext4_fname_disk_to_hash(ctx, de, | ||
657 | &h); | ||
658 | if (res < 0) { | ||
659 | printk(KERN_WARNING "Error " | ||
660 | "converting filename " | ||
661 | "from disk to htree" | ||
662 | "\n"); | ||
663 | h.hash = 0xDEADBEEF; | ||
664 | } | ||
665 | printk("%*.s:(E)%x.%u ", len, name, | ||
666 | h.hash, (unsigned) ((char *) de | ||
667 | - base)); | ||
668 | ext4_put_fname_crypto_ctx(&ctx); | ||
669 | ext4_fname_crypto_free_buffer( | ||
670 | &fname_crypto_str); | ||
671 | } | ||
672 | #else | ||
603 | int len = de->name_len; | 673 | int len = de->name_len; |
604 | char *name = de->name; | 674 | char *name = de->name; |
605 | while (len--) printk("%c", *name++); | ||
606 | ext4fs_dirhash(de->name, de->name_len, &h); | 675 | ext4fs_dirhash(de->name, de->name_len, &h); |
607 | printk(":%x.%u ", h.hash, | 676 | printk("%*.s:%x.%u ", len, name, h.hash, |
608 | (unsigned) ((char *) de - base)); | 677 | (unsigned) ((char *) de - base)); |
678 | #endif | ||
609 | } | 679 | } |
610 | space += EXT4_DIR_REC_LEN(de->name_len); | 680 | space += EXT4_DIR_REC_LEN(de->name_len); |
611 | names++; | 681 | names++; |
@@ -623,7 +693,6 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
623 | unsigned count = dx_get_count(entries), names = 0, space = 0, i; | 693 | unsigned count = dx_get_count(entries), names = 0, space = 0, i; |
624 | unsigned bcount = 0; | 694 | unsigned bcount = 0; |
625 | struct buffer_head *bh; | 695 | struct buffer_head *bh; |
626 | int err; | ||
627 | printk("%i indexed blocks...\n", count); | 696 | printk("%i indexed blocks...\n", count); |
628 | for (i = 0; i < count; i++, entries++) | 697 | for (i = 0; i < count; i++, entries++) |
629 | { | 698 | { |
@@ -637,7 +706,8 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
637 | continue; | 706 | continue; |
638 | stats = levels? | 707 | stats = levels? |
639 | dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): | 708 | dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): |
640 | dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); | 709 | dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) |
710 | bh->b_data, blocksize, 0); | ||
641 | names += stats.names; | 711 | names += stats.names; |
642 | space += stats.space; | 712 | space += stats.space; |
643 | bcount += stats.bcount; | 713 | bcount += stats.bcount; |
@@ -687,8 +757,28 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
687 | if (hinfo->hash_version <= DX_HASH_TEA) | 757 | if (hinfo->hash_version <= DX_HASH_TEA) |
688 | hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; | 758 | hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; |
689 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; | 759 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
760 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
761 | if (d_name) { | ||
762 | struct ext4_fname_crypto_ctx *ctx = NULL; | ||
763 | int res; | ||
764 | |||
765 | /* Check if the directory is encrypted */ | ||
766 | ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); | ||
767 | if (IS_ERR(ctx)) { | ||
768 | ret_err = ERR_PTR(PTR_ERR(ctx)); | ||
769 | goto fail; | ||
770 | } | ||
771 | res = ext4_fname_usr_to_hash(ctx, d_name, hinfo); | ||
772 | if (res < 0) { | ||
773 | ret_err = ERR_PTR(res); | ||
774 | goto fail; | ||
775 | } | ||
776 | ext4_put_fname_crypto_ctx(&ctx); | ||
777 | } | ||
778 | #else | ||
690 | if (d_name) | 779 | if (d_name) |
691 | ext4fs_dirhash(d_name->name, d_name->len, hinfo); | 780 | ext4fs_dirhash(d_name->name, d_name->len, hinfo); |
781 | #endif | ||
692 | hash = hinfo->hash; | 782 | hash = hinfo->hash; |
693 | 783 | ||
694 | if (root->info.unused_flags & 1) { | 784 | if (root->info.unused_flags & 1) { |
@@ -773,6 +863,7 @@ fail: | |||
773 | brelse(frame->bh); | 863 | brelse(frame->bh); |
774 | frame--; | 864 | frame--; |
775 | } | 865 | } |
866 | |||
776 | if (ret_err == ERR_PTR(ERR_BAD_DX_DIR)) | 867 | if (ret_err == ERR_PTR(ERR_BAD_DX_DIR)) |
777 | ext4_warning(dir->i_sb, | 868 | ext4_warning(dir->i_sb, |
778 | "Corrupt dir inode %lu, running e2fsck is " | 869 | "Corrupt dir inode %lu, running e2fsck is " |
@@ -878,6 +969,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
878 | struct buffer_head *bh; | 969 | struct buffer_head *bh; |
879 | struct ext4_dir_entry_2 *de, *top; | 970 | struct ext4_dir_entry_2 *de, *top; |
880 | int err = 0, count = 0; | 971 | int err = 0, count = 0; |
972 | struct ext4_fname_crypto_ctx *ctx = NULL; | ||
973 | struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}, tmp_str; | ||
881 | 974 | ||
882 | dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", | 975 | dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", |
883 | (unsigned long)block)); | 976 | (unsigned long)block)); |
@@ -889,6 +982,24 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
889 | top = (struct ext4_dir_entry_2 *) ((char *) de + | 982 | top = (struct ext4_dir_entry_2 *) ((char *) de + |
890 | dir->i_sb->s_blocksize - | 983 | dir->i_sb->s_blocksize - |
891 | EXT4_DIR_REC_LEN(0)); | 984 | EXT4_DIR_REC_LEN(0)); |
985 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
986 | /* Check if the directory is encrypted */ | ||
987 | ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); | ||
988 | if (IS_ERR(ctx)) { | ||
989 | err = PTR_ERR(ctx); | ||
990 | brelse(bh); | ||
991 | return err; | ||
992 | } | ||
993 | if (ctx != NULL) { | ||
994 | err = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN, | ||
995 | &fname_crypto_str); | ||
996 | if (err < 0) { | ||
997 | ext4_put_fname_crypto_ctx(&ctx); | ||
998 | brelse(bh); | ||
999 | return err; | ||
1000 | } | ||
1001 | } | ||
1002 | #endif | ||
892 | for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { | 1003 | for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { |
893 | if (ext4_check_dir_entry(dir, NULL, de, bh, | 1004 | if (ext4_check_dir_entry(dir, NULL, de, bh, |
894 | bh->b_data, bh->b_size, | 1005 | bh->b_data, bh->b_size, |
@@ -897,21 +1008,52 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
897 | /* silently ignore the rest of the block */ | 1008 | /* silently ignore the rest of the block */ |
898 | break; | 1009 | break; |
899 | } | 1010 | } |
1011 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1012 | err = ext4_fname_disk_to_hash(ctx, de, hinfo); | ||
1013 | if (err < 0) { | ||
1014 | count = err; | ||
1015 | goto errout; | ||
1016 | } | ||
1017 | #else | ||
900 | ext4fs_dirhash(de->name, de->name_len, hinfo); | 1018 | ext4fs_dirhash(de->name, de->name_len, hinfo); |
1019 | #endif | ||
901 | if ((hinfo->hash < start_hash) || | 1020 | if ((hinfo->hash < start_hash) || |
902 | ((hinfo->hash == start_hash) && | 1021 | ((hinfo->hash == start_hash) && |
903 | (hinfo->minor_hash < start_minor_hash))) | 1022 | (hinfo->minor_hash < start_minor_hash))) |
904 | continue; | 1023 | continue; |
905 | if (de->inode == 0) | 1024 | if (de->inode == 0) |
906 | continue; | 1025 | continue; |
907 | if ((err = ext4_htree_store_dirent(dir_file, | 1026 | if (ctx == NULL) { |
908 | hinfo->hash, hinfo->minor_hash, de)) != 0) { | 1027 | /* Directory is not encrypted */ |
909 | brelse(bh); | 1028 | tmp_str.name = de->name; |
910 | return err; | 1029 | tmp_str.len = de->name_len; |
1030 | err = ext4_htree_store_dirent(dir_file, | ||
1031 | hinfo->hash, hinfo->minor_hash, de, | ||
1032 | &tmp_str); | ||
1033 | } else { | ||
1034 | /* Directory is encrypted */ | ||
1035 | err = ext4_fname_disk_to_usr(ctx, de, | ||
1036 | &fname_crypto_str); | ||
1037 | if (err < 0) { | ||
1038 | count = err; | ||
1039 | goto errout; | ||
1040 | } | ||
1041 | err = ext4_htree_store_dirent(dir_file, | ||
1042 | hinfo->hash, hinfo->minor_hash, de, | ||
1043 | &fname_crypto_str); | ||
1044 | } | ||
1045 | if (err != 0) { | ||
1046 | count = err; | ||
1047 | goto errout; | ||
911 | } | 1048 | } |
912 | count++; | 1049 | count++; |
913 | } | 1050 | } |
1051 | errout: | ||
914 | brelse(bh); | 1052 | brelse(bh); |
1053 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1054 | ext4_put_fname_crypto_ctx(&ctx); | ||
1055 | ext4_fname_crypto_free_buffer(&fname_crypto_str); | ||
1056 | #endif | ||
915 | return count; | 1057 | return count; |
916 | } | 1058 | } |
917 | 1059 | ||
@@ -935,6 +1077,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
935 | int count = 0; | 1077 | int count = 0; |
936 | int ret, err; | 1078 | int ret, err; |
937 | __u32 hashval; | 1079 | __u32 hashval; |
1080 | struct ext4_str tmp_str; | ||
938 | 1081 | ||
939 | dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", | 1082 | dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", |
940 | start_hash, start_minor_hash)); | 1083 | start_hash, start_minor_hash)); |
@@ -970,14 +1113,22 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
970 | /* Add '.' and '..' from the htree header */ | 1113 | /* Add '.' and '..' from the htree header */ |
971 | if (!start_hash && !start_minor_hash) { | 1114 | if (!start_hash && !start_minor_hash) { |
972 | de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; | 1115 | de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; |
973 | if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0) | 1116 | tmp_str.name = de->name; |
1117 | tmp_str.len = de->name_len; | ||
1118 | err = ext4_htree_store_dirent(dir_file, 0, 0, | ||
1119 | de, &tmp_str); | ||
1120 | if (err != 0) | ||
974 | goto errout; | 1121 | goto errout; |
975 | count++; | 1122 | count++; |
976 | } | 1123 | } |
977 | if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { | 1124 | if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { |
978 | de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; | 1125 | de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; |
979 | de = ext4_next_entry(de, dir->i_sb->s_blocksize); | 1126 | de = ext4_next_entry(de, dir->i_sb->s_blocksize); |
980 | if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0) | 1127 | tmp_str.name = de->name; |
1128 | tmp_str.len = de->name_len; | ||
1129 | err = ext4_htree_store_dirent(dir_file, 2, 0, | ||
1130 | de, &tmp_str); | ||
1131 | if (err != 0) | ||
981 | goto errout; | 1132 | goto errout; |
982 | count++; | 1133 | count++; |
983 | } | 1134 | } |
@@ -1035,17 +1186,33 @@ static inline int search_dirblock(struct buffer_head *bh, | |||
1035 | * Create map of hash values, offsets, and sizes, stored at end of block. | 1186 | * Create map of hash values, offsets, and sizes, stored at end of block. |
1036 | * Returns number of entries mapped. | 1187 | * Returns number of entries mapped. |
1037 | */ | 1188 | */ |
1038 | static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, | 1189 | static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, |
1039 | struct dx_hash_info *hinfo, | 1190 | unsigned blocksize, struct dx_hash_info *hinfo, |
1040 | struct dx_map_entry *map_tail) | 1191 | struct dx_map_entry *map_tail) |
1041 | { | 1192 | { |
1042 | int count = 0; | 1193 | int count = 0; |
1043 | char *base = (char *) de; | 1194 | char *base = (char *) de; |
1044 | struct dx_hash_info h = *hinfo; | 1195 | struct dx_hash_info h = *hinfo; |
1196 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1197 | struct ext4_fname_crypto_ctx *ctx = NULL; | ||
1198 | int err; | ||
1199 | |||
1200 | ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); | ||
1201 | if (IS_ERR(ctx)) | ||
1202 | return PTR_ERR(ctx); | ||
1203 | #endif | ||
1045 | 1204 | ||
1046 | while ((char *) de < base + blocksize) { | 1205 | while ((char *) de < base + blocksize) { |
1047 | if (de->name_len && de->inode) { | 1206 | if (de->name_len && de->inode) { |
1207 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1208 | err = ext4_fname_disk_to_hash(ctx, de, &h); | ||
1209 | if (err < 0) { | ||
1210 | ext4_put_fname_crypto_ctx(&ctx); | ||
1211 | return err; | ||
1212 | } | ||
1213 | #else | ||
1048 | ext4fs_dirhash(de->name, de->name_len, &h); | 1214 | ext4fs_dirhash(de->name, de->name_len, &h); |
1215 | #endif | ||
1049 | map_tail--; | 1216 | map_tail--; |
1050 | map_tail->hash = h.hash; | 1217 | map_tail->hash = h.hash; |
1051 | map_tail->offs = ((char *) de - base)>>2; | 1218 | map_tail->offs = ((char *) de - base)>>2; |
@@ -1056,6 +1223,9 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, | |||
1056 | /* XXX: do we need to check rec_len == 0 case? -Chris */ | 1223 | /* XXX: do we need to check rec_len == 0 case? -Chris */ |
1057 | de = ext4_next_entry(de, blocksize); | 1224 | de = ext4_next_entry(de, blocksize); |
1058 | } | 1225 | } |
1226 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1227 | ext4_put_fname_crypto_ctx(&ctx); | ||
1228 | #endif | ||
1059 | return count; | 1229 | return count; |
1060 | } | 1230 | } |
1061 | 1231 | ||
@@ -1106,57 +1276,107 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) | |||
1106 | * `len <= EXT4_NAME_LEN' is guaranteed by caller. | 1276 | * `len <= EXT4_NAME_LEN' is guaranteed by caller. |
1107 | * `de != NULL' is guaranteed by caller. | 1277 | * `de != NULL' is guaranteed by caller. |
1108 | */ | 1278 | */ |
1109 | static inline int ext4_match (int len, const char * const name, | 1279 | static inline int ext4_match(struct ext4_fname_crypto_ctx *ctx, |
1110 | struct ext4_dir_entry_2 * de) | 1280 | struct ext4_str *fname_crypto_str, |
1281 | int len, const char * const name, | ||
1282 | struct ext4_dir_entry_2 *de) | ||
1111 | { | 1283 | { |
1112 | if (len != de->name_len) | 1284 | int res; |
1113 | return 0; | 1285 | |
1114 | if (!de->inode) | 1286 | if (!de->inode) |
1115 | return 0; | 1287 | return 0; |
1116 | return !memcmp(name, de->name, len); | 1288 | |
1289 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1290 | if (ctx) { | ||
1291 | /* Directory is encrypted */ | ||
1292 | res = ext4_fname_disk_to_usr(ctx, de, fname_crypto_str); | ||
1293 | if (res < 0) | ||
1294 | return res; | ||
1295 | if (len != res) | ||
1296 | return 0; | ||
1297 | res = memcmp(name, fname_crypto_str->name, len); | ||
1298 | return (res == 0) ? 1 : 0; | ||
1299 | } | ||
1300 | #endif | ||
1301 | if (len != de->name_len) | ||
1302 | return 0; | ||
1303 | res = memcmp(name, de->name, len); | ||
1304 | return (res == 0) ? 1 : 0; | ||
1117 | } | 1305 | } |
1118 | 1306 | ||
1119 | /* | 1307 | /* |
1120 | * Returns 0 if not found, -1 on failure, and 1 on success | 1308 | * Returns 0 if not found, -1 on failure, and 1 on success |
1121 | */ | 1309 | */ |
1122 | int search_dir(struct buffer_head *bh, | 1310 | int search_dir(struct buffer_head *bh, char *search_buf, int buf_size, |
1123 | char *search_buf, | 1311 | struct inode *dir, const struct qstr *d_name, |
1124 | int buf_size, | 1312 | unsigned int offset, struct ext4_dir_entry_2 **res_dir) |
1125 | struct inode *dir, | ||
1126 | const struct qstr *d_name, | ||
1127 | unsigned int offset, | ||
1128 | struct ext4_dir_entry_2 **res_dir) | ||
1129 | { | 1313 | { |
1130 | struct ext4_dir_entry_2 * de; | 1314 | struct ext4_dir_entry_2 * de; |
1131 | char * dlimit; | 1315 | char * dlimit; |
1132 | int de_len; | 1316 | int de_len; |
1133 | const char *name = d_name->name; | 1317 | const char *name = d_name->name; |
1134 | int namelen = d_name->len; | 1318 | int namelen = d_name->len; |
1319 | struct ext4_fname_crypto_ctx *ctx = NULL; | ||
1320 | struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; | ||
1321 | int res; | ||
1322 | |||
1323 | ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); | ||
1324 | if (IS_ERR(ctx)) | ||
1325 | return -1; | ||
1326 | |||
1327 | if (ctx != NULL) { | ||
1328 | /* Allocate buffer to hold maximum name length */ | ||
1329 | res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN, | ||
1330 | &fname_crypto_str); | ||
1331 | if (res < 0) { | ||
1332 | ext4_put_fname_crypto_ctx(&ctx); | ||
1333 | return -1; | ||
1334 | } | ||
1335 | } | ||
1135 | 1336 | ||
1136 | de = (struct ext4_dir_entry_2 *)search_buf; | 1337 | de = (struct ext4_dir_entry_2 *)search_buf; |
1137 | dlimit = search_buf + buf_size; | 1338 | dlimit = search_buf + buf_size; |
1138 | while ((char *) de < dlimit) { | 1339 | while ((char *) de < dlimit) { |
1139 | /* this code is executed quadratically often */ | 1340 | /* this code is executed quadratically often */ |
1140 | /* do minimal checking `by hand' */ | 1341 | /* do minimal checking `by hand' */ |
1342 | if ((char *) de + de->name_len <= dlimit) { | ||
1343 | res = ext4_match(ctx, &fname_crypto_str, namelen, | ||
1344 | name, de); | ||
1345 | if (res < 0) { | ||
1346 | res = -1; | ||
1347 | goto return_result; | ||
1348 | } | ||
1349 | if (res > 0) { | ||
1350 | /* found a match - just to be sure, do | ||
1351 | * a full check */ | ||
1352 | if (ext4_check_dir_entry(dir, NULL, de, bh, | ||
1353 | bh->b_data, | ||
1354 | bh->b_size, offset)) { | ||
1355 | res = -1; | ||
1356 | goto return_result; | ||
1357 | } | ||
1358 | *res_dir = de; | ||
1359 | res = 1; | ||
1360 | goto return_result; | ||
1361 | } | ||
1141 | 1362 | ||
1142 | if ((char *) de + namelen <= dlimit && | ||
1143 | ext4_match (namelen, name, de)) { | ||
1144 | /* found a match - just to be sure, do a full check */ | ||
1145 | if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data, | ||
1146 | bh->b_size, offset)) | ||
1147 | return -1; | ||
1148 | *res_dir = de; | ||
1149 | return 1; | ||
1150 | } | 1363 | } |
1151 | /* prevent looping on a bad block */ | 1364 | /* prevent looping on a bad block */ |
1152 | de_len = ext4_rec_len_from_disk(de->rec_len, | 1365 | de_len = ext4_rec_len_from_disk(de->rec_len, |
1153 | dir->i_sb->s_blocksize); | 1366 | dir->i_sb->s_blocksize); |
1154 | if (de_len <= 0) | 1367 | if (de_len <= 0) { |
1155 | return -1; | 1368 | res = -1; |
1369 | goto return_result; | ||
1370 | } | ||
1156 | offset += de_len; | 1371 | offset += de_len; |
1157 | de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); | 1372 | de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); |
1158 | } | 1373 | } |
1159 | return 0; | 1374 | |
1375 | res = 0; | ||
1376 | return_result: | ||
1377 | ext4_put_fname_crypto_ctx(&ctx); | ||
1378 | ext4_fname_crypto_free_buffer(&fname_crypto_str); | ||
1379 | return res; | ||
1160 | } | 1380 | } |
1161 | 1381 | ||
1162 | static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, | 1382 | static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, |
@@ -1345,6 +1565,9 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q | |||
1345 | ext4_lblk_t block; | 1565 | ext4_lblk_t block; |
1346 | int retval; | 1566 | int retval; |
1347 | 1567 | ||
1568 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1569 | *res_dir = NULL; | ||
1570 | #endif | ||
1348 | frame = dx_probe(d_name, dir, &hinfo, frames); | 1571 | frame = dx_probe(d_name, dir, &hinfo, frames); |
1349 | if (IS_ERR(frame)) | 1572 | if (IS_ERR(frame)) |
1350 | return (struct buffer_head *) frame; | 1573 | return (struct buffer_head *) frame; |
@@ -1417,6 +1640,18 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi | |||
1417 | ino); | 1640 | ino); |
1418 | return ERR_PTR(-EIO); | 1641 | return ERR_PTR(-EIO); |
1419 | } | 1642 | } |
1643 | if (!IS_ERR(inode) && ext4_encrypted_inode(dir) && | ||
1644 | (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
1645 | S_ISLNK(inode->i_mode)) && | ||
1646 | !ext4_is_child_context_consistent_with_parent(dir, | ||
1647 | inode)) { | ||
1648 | iput(inode); | ||
1649 | ext4_warning(inode->i_sb, | ||
1650 | "Inconsistent encryption contexts: %lu/%lu\n", | ||
1651 | (unsigned long) dir->i_ino, | ||
1652 | (unsigned long) inode->i_ino); | ||
1653 | return ERR_PTR(-EPERM); | ||
1654 | } | ||
1420 | } | 1655 | } |
1421 | return d_splice_alias(inode, dentry); | 1656 | return d_splice_alias(inode, dentry); |
1422 | } | 1657 | } |
@@ -1541,7 +1776,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1541 | 1776 | ||
1542 | /* create map in the end of data2 block */ | 1777 | /* create map in the end of data2 block */ |
1543 | map = (struct dx_map_entry *) (data2 + blocksize); | 1778 | map = (struct dx_map_entry *) (data2 + blocksize); |
1544 | count = dx_make_map((struct ext4_dir_entry_2 *) data1, | 1779 | count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1, |
1545 | blocksize, hinfo, map); | 1780 | blocksize, hinfo, map); |
1546 | map -= count; | 1781 | map -= count; |
1547 | dx_sort_map(map, count); | 1782 | dx_sort_map(map, count); |
@@ -1564,7 +1799,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1564 | hash2, split, count-split)); | 1799 | hash2, split, count-split)); |
1565 | 1800 | ||
1566 | /* Fancy dance to stay within two buffers */ | 1801 | /* Fancy dance to stay within two buffers */ |
1567 | de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize); | 1802 | de2 = dx_move_dirents(data1, data2, map + split, count - split, |
1803 | blocksize); | ||
1568 | de = dx_pack_dirents(data1, blocksize); | 1804 | de = dx_pack_dirents(data1, blocksize); |
1569 | de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - | 1805 | de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - |
1570 | (char *) de, | 1806 | (char *) de, |
@@ -1580,8 +1816,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1580 | initialize_dirent_tail(t, blocksize); | 1816 | initialize_dirent_tail(t, blocksize); |
1581 | } | 1817 | } |
1582 | 1818 | ||
1583 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); | 1819 | dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1, |
1584 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); | 1820 | blocksize, 1)); |
1821 | dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2, | ||
1822 | blocksize, 1)); | ||
1585 | 1823 | ||
1586 | /* Which block gets the new entry? */ | 1824 | /* Which block gets the new entry? */ |
1587 | if (hinfo->hash >= hash2) { | 1825 | if (hinfo->hash >= hash2) { |
@@ -1618,15 +1856,48 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, | |||
1618 | int nlen, rlen; | 1856 | int nlen, rlen; |
1619 | unsigned int offset = 0; | 1857 | unsigned int offset = 0; |
1620 | char *top; | 1858 | char *top; |
1859 | struct ext4_fname_crypto_ctx *ctx = NULL; | ||
1860 | struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; | ||
1861 | int res; | ||
1862 | |||
1863 | ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); | ||
1864 | if (IS_ERR(ctx)) | ||
1865 | return -1; | ||
1866 | |||
1867 | if (ctx != NULL) { | ||
1868 | /* Calculate record length needed to store the entry */ | ||
1869 | res = ext4_fname_crypto_namelen_on_disk(ctx, namelen); | ||
1870 | if (res < 0) { | ||
1871 | ext4_put_fname_crypto_ctx(&ctx); | ||
1872 | return res; | ||
1873 | } | ||
1874 | reclen = EXT4_DIR_REC_LEN(res); | ||
1875 | |||
1876 | /* Allocate buffer to hold maximum name length */ | ||
1877 | res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN, | ||
1878 | &fname_crypto_str); | ||
1879 | if (res < 0) { | ||
1880 | ext4_put_fname_crypto_ctx(&ctx); | ||
1881 | return -1; | ||
1882 | } | ||
1883 | } | ||
1621 | 1884 | ||
1622 | de = (struct ext4_dir_entry_2 *)buf; | 1885 | de = (struct ext4_dir_entry_2 *)buf; |
1623 | top = buf + buf_size - reclen; | 1886 | top = buf + buf_size - reclen; |
1624 | while ((char *) de <= top) { | 1887 | while ((char *) de <= top) { |
1625 | if (ext4_check_dir_entry(dir, NULL, de, bh, | 1888 | if (ext4_check_dir_entry(dir, NULL, de, bh, |
1626 | buf, buf_size, offset)) | 1889 | buf, buf_size, offset)) { |
1627 | return -EIO; | 1890 | res = -EIO; |
1628 | if (ext4_match(namelen, name, de)) | 1891 | goto return_result; |
1629 | return -EEXIST; | 1892 | } |
1893 | /* Provide crypto context and crypto buffer to ext4 match */ | ||
1894 | res = ext4_match(ctx, &fname_crypto_str, namelen, name, de); | ||
1895 | if (res < 0) | ||
1896 | goto return_result; | ||
1897 | if (res > 0) { | ||
1898 | res = -EEXIST; | ||
1899 | goto return_result; | ||
1900 | } | ||
1630 | nlen = EXT4_DIR_REC_LEN(de->name_len); | 1901 | nlen = EXT4_DIR_REC_LEN(de->name_len); |
1631 | rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); | 1902 | rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); |
1632 | if ((de->inode ? rlen - nlen : rlen) >= reclen) | 1903 | if ((de->inode ? rlen - nlen : rlen) >= reclen) |
@@ -1634,26 +1905,62 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, | |||
1634 | de = (struct ext4_dir_entry_2 *)((char *)de + rlen); | 1905 | de = (struct ext4_dir_entry_2 *)((char *)de + rlen); |
1635 | offset += rlen; | 1906 | offset += rlen; |
1636 | } | 1907 | } |
1637 | if ((char *) de > top) | ||
1638 | return -ENOSPC; | ||
1639 | 1908 | ||
1640 | *dest_de = de; | 1909 | if ((char *) de > top) |
1641 | return 0; | 1910 | res = -ENOSPC; |
1911 | else { | ||
1912 | *dest_de = de; | ||
1913 | res = 0; | ||
1914 | } | ||
1915 | return_result: | ||
1916 | ext4_put_fname_crypto_ctx(&ctx); | ||
1917 | ext4_fname_crypto_free_buffer(&fname_crypto_str); | ||
1918 | return res; | ||
1642 | } | 1919 | } |
1643 | 1920 | ||
1644 | void ext4_insert_dentry(struct inode *inode, | 1921 | int ext4_insert_dentry(struct inode *dir, |
1645 | struct ext4_dir_entry_2 *de, | 1922 | struct inode *inode, |
1646 | int buf_size, | 1923 | struct ext4_dir_entry_2 *de, |
1647 | const char *name, int namelen) | 1924 | int buf_size, |
1925 | const struct qstr *iname, | ||
1926 | const char *name, int namelen) | ||
1648 | { | 1927 | { |
1649 | 1928 | ||
1650 | int nlen, rlen; | 1929 | int nlen, rlen; |
1930 | struct ext4_fname_crypto_ctx *ctx = NULL; | ||
1931 | struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; | ||
1932 | struct ext4_str tmp_str; | ||
1933 | int res; | ||
1934 | |||
1935 | ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); | ||
1936 | if (IS_ERR(ctx)) | ||
1937 | return -EIO; | ||
1938 | /* By default, the input name would be written to the disk */ | ||
1939 | tmp_str.name = (unsigned char *)name; | ||
1940 | tmp_str.len = namelen; | ||
1941 | if (ctx != NULL) { | ||
1942 | /* Directory is encrypted */ | ||
1943 | res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN, | ||
1944 | &fname_crypto_str); | ||
1945 | if (res < 0) { | ||
1946 | ext4_put_fname_crypto_ctx(&ctx); | ||
1947 | return -ENOMEM; | ||
1948 | } | ||
1949 | res = ext4_fname_usr_to_disk(ctx, iname, &fname_crypto_str); | ||
1950 | if (res < 0) { | ||
1951 | ext4_put_fname_crypto_ctx(&ctx); | ||
1952 | ext4_fname_crypto_free_buffer(&fname_crypto_str); | ||
1953 | return res; | ||
1954 | } | ||
1955 | tmp_str.name = fname_crypto_str.name; | ||
1956 | tmp_str.len = fname_crypto_str.len; | ||
1957 | } | ||
1651 | 1958 | ||
1652 | nlen = EXT4_DIR_REC_LEN(de->name_len); | 1959 | nlen = EXT4_DIR_REC_LEN(de->name_len); |
1653 | rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); | 1960 | rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); |
1654 | if (de->inode) { | 1961 | if (de->inode) { |
1655 | struct ext4_dir_entry_2 *de1 = | 1962 | struct ext4_dir_entry_2 *de1 = |
1656 | (struct ext4_dir_entry_2 *)((char *)de + nlen); | 1963 | (struct ext4_dir_entry_2 *)((char *)de + nlen); |
1657 | de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size); | 1964 | de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size); |
1658 | de->rec_len = ext4_rec_len_to_disk(nlen, buf_size); | 1965 | de->rec_len = ext4_rec_len_to_disk(nlen, buf_size); |
1659 | de = de1; | 1966 | de = de1; |
@@ -1661,9 +1968,14 @@ void ext4_insert_dentry(struct inode *inode, | |||
1661 | de->file_type = EXT4_FT_UNKNOWN; | 1968 | de->file_type = EXT4_FT_UNKNOWN; |
1662 | de->inode = cpu_to_le32(inode->i_ino); | 1969 | de->inode = cpu_to_le32(inode->i_ino); |
1663 | ext4_set_de_type(inode->i_sb, de, inode->i_mode); | 1970 | ext4_set_de_type(inode->i_sb, de, inode->i_mode); |
1664 | de->name_len = namelen; | 1971 | de->name_len = tmp_str.len; |
1665 | memcpy(de->name, name, namelen); | 1972 | |
1973 | memcpy(de->name, tmp_str.name, tmp_str.len); | ||
1974 | ext4_put_fname_crypto_ctx(&ctx); | ||
1975 | ext4_fname_crypto_free_buffer(&fname_crypto_str); | ||
1976 | return 0; | ||
1666 | } | 1977 | } |
1978 | |||
1667 | /* | 1979 | /* |
1668 | * Add a new entry into a directory (leaf) block. If de is non-NULL, | 1980 | * Add a new entry into a directory (leaf) block. If de is non-NULL, |
1669 | * it points to a directory entry which is guaranteed to be large | 1981 | * it points to a directory entry which is guaranteed to be large |
@@ -1700,8 +2012,12 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1700 | return err; | 2012 | return err; |
1701 | } | 2013 | } |
1702 | 2014 | ||
1703 | /* By now the buffer is marked for journaling */ | 2015 | /* By now the buffer is marked for journaling. Due to crypto operations, |
1704 | ext4_insert_dentry(inode, de, blocksize, name, namelen); | 2016 | * the following function call may fail */ |
2017 | err = ext4_insert_dentry(dir, inode, de, blocksize, &dentry->d_name, | ||
2018 | name, namelen); | ||
2019 | if (err < 0) | ||
2020 | return err; | ||
1705 | 2021 | ||
1706 | /* | 2022 | /* |
1707 | * XXX shouldn't update any times until successful | 2023 | * XXX shouldn't update any times until successful |
@@ -1733,8 +2049,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1733 | struct inode *inode, struct buffer_head *bh) | 2049 | struct inode *inode, struct buffer_head *bh) |
1734 | { | 2050 | { |
1735 | struct inode *dir = dentry->d_parent->d_inode; | 2051 | struct inode *dir = dentry->d_parent->d_inode; |
2052 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
2053 | struct ext4_fname_crypto_ctx *ctx = NULL; | ||
2054 | int res; | ||
2055 | #else | ||
1736 | const char *name = dentry->d_name.name; | 2056 | const char *name = dentry->d_name.name; |
1737 | int namelen = dentry->d_name.len; | 2057 | int namelen = dentry->d_name.len; |
2058 | #endif | ||
1738 | struct buffer_head *bh2; | 2059 | struct buffer_head *bh2; |
1739 | struct dx_root *root; | 2060 | struct dx_root *root; |
1740 | struct dx_frame frames[2], *frame; | 2061 | struct dx_frame frames[2], *frame; |
@@ -1748,7 +2069,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1748 | struct dx_hash_info hinfo; | 2069 | struct dx_hash_info hinfo; |
1749 | ext4_lblk_t block; | 2070 | ext4_lblk_t block; |
1750 | struct fake_dirent *fde; | 2071 | struct fake_dirent *fde; |
1751 | int csum_size = 0; | 2072 | int csum_size = 0; |
2073 | |||
2074 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
2075 | ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN); | ||
2076 | if (IS_ERR(ctx)) | ||
2077 | return PTR_ERR(ctx); | ||
2078 | #endif | ||
1752 | 2079 | ||
1753 | if (ext4_has_metadata_csum(inode->i_sb)) | 2080 | if (ext4_has_metadata_csum(inode->i_sb)) |
1754 | csum_size = sizeof(struct ext4_dir_entry_tail); | 2081 | csum_size = sizeof(struct ext4_dir_entry_tail); |
@@ -1815,7 +2142,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1815 | if (hinfo.hash_version <= DX_HASH_TEA) | 2142 | if (hinfo.hash_version <= DX_HASH_TEA) |
1816 | hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; | 2143 | hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; |
1817 | hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; | 2144 | hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
2145 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
2146 | res = ext4_fname_usr_to_hash(ctx, &dentry->d_name, &hinfo); | ||
2147 | if (res < 0) { | ||
2148 | ext4_put_fname_crypto_ctx(&ctx); | ||
2149 | ext4_mark_inode_dirty(handle, dir); | ||
2150 | brelse(bh); | ||
2151 | return res; | ||
2152 | } | ||
2153 | ext4_put_fname_crypto_ctx(&ctx); | ||
2154 | #else | ||
1818 | ext4fs_dirhash(name, namelen, &hinfo); | 2155 | ext4fs_dirhash(name, namelen, &hinfo); |
2156 | #endif | ||
1819 | memset(frames, 0, sizeof(frames)); | 2157 | memset(frames, 0, sizeof(frames)); |
1820 | frame = frames; | 2158 | frame = frames; |
1821 | frame->entries = entries; | 2159 | frame->entries = entries; |
@@ -1865,7 +2203,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1865 | struct inode *inode) | 2203 | struct inode *inode) |
1866 | { | 2204 | { |
1867 | struct inode *dir = dentry->d_parent->d_inode; | 2205 | struct inode *dir = dentry->d_parent->d_inode; |
1868 | struct buffer_head *bh; | 2206 | struct buffer_head *bh = NULL; |
1869 | struct ext4_dir_entry_2 *de; | 2207 | struct ext4_dir_entry_2 *de; |
1870 | struct ext4_dir_entry_tail *t; | 2208 | struct ext4_dir_entry_tail *t; |
1871 | struct super_block *sb; | 2209 | struct super_block *sb; |
@@ -1889,14 +2227,14 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1889 | return retval; | 2227 | return retval; |
1890 | if (retval == 1) { | 2228 | if (retval == 1) { |
1891 | retval = 0; | 2229 | retval = 0; |
1892 | return retval; | 2230 | goto out; |
1893 | } | 2231 | } |
1894 | } | 2232 | } |
1895 | 2233 | ||
1896 | if (is_dx(dir)) { | 2234 | if (is_dx(dir)) { |
1897 | retval = ext4_dx_add_entry(handle, dentry, inode); | 2235 | retval = ext4_dx_add_entry(handle, dentry, inode); |
1898 | if (!retval || (retval != ERR_BAD_DX_DIR)) | 2236 | if (!retval || (retval != ERR_BAD_DX_DIR)) |
1899 | return retval; | 2237 | goto out; |
1900 | ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); | 2238 | ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); |
1901 | dx_fallback++; | 2239 | dx_fallback++; |
1902 | ext4_mark_inode_dirty(handle, dir); | 2240 | ext4_mark_inode_dirty(handle, dir); |
@@ -1908,14 +2246,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1908 | return PTR_ERR(bh); | 2246 | return PTR_ERR(bh); |
1909 | 2247 | ||
1910 | retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); | 2248 | retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); |
1911 | if (retval != -ENOSPC) { | 2249 | if (retval != -ENOSPC) |
1912 | brelse(bh); | 2250 | goto out; |
1913 | return retval; | ||
1914 | } | ||
1915 | 2251 | ||
1916 | if (blocks == 1 && !dx_fallback && | 2252 | if (blocks == 1 && !dx_fallback && |
1917 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) | 2253 | EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { |
1918 | return make_indexed_dir(handle, dentry, inode, bh); | 2254 | retval = make_indexed_dir(handle, dentry, inode, bh); |
2255 | bh = NULL; /* make_indexed_dir releases bh */ | ||
2256 | goto out; | ||
2257 | } | ||
1919 | brelse(bh); | 2258 | brelse(bh); |
1920 | } | 2259 | } |
1921 | bh = ext4_append(handle, dir, &block); | 2260 | bh = ext4_append(handle, dir, &block); |
@@ -1931,6 +2270,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1931 | } | 2270 | } |
1932 | 2271 | ||
1933 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); | 2272 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
2273 | out: | ||
1934 | brelse(bh); | 2274 | brelse(bh); |
1935 | if (retval == 0) | 2275 | if (retval == 0) |
1936 | ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); | 2276 | ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); |
@@ -2237,7 +2577,20 @@ retry: | |||
2237 | inode->i_op = &ext4_file_inode_operations; | 2577 | inode->i_op = &ext4_file_inode_operations; |
2238 | inode->i_fop = &ext4_file_operations; | 2578 | inode->i_fop = &ext4_file_operations; |
2239 | ext4_set_aops(inode); | 2579 | ext4_set_aops(inode); |
2240 | err = ext4_add_nondir(handle, dentry, inode); | 2580 | err = 0; |
2581 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
2582 | if (!err && (ext4_encrypted_inode(dir) || | ||
2583 | DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)))) { | ||
2584 | err = ext4_inherit_context(dir, inode); | ||
2585 | if (err) { | ||
2586 | clear_nlink(inode); | ||
2587 | unlock_new_inode(inode); | ||
2588 | iput(inode); | ||
2589 | } | ||
2590 | } | ||
2591 | #endif | ||
2592 | if (!err) | ||
2593 | err = ext4_add_nondir(handle, dentry, inode); | ||
2241 | if (!err && IS_DIRSYNC(dir)) | 2594 | if (!err && IS_DIRSYNC(dir)) |
2242 | ext4_handle_sync(handle); | 2595 | ext4_handle_sync(handle); |
2243 | } | 2596 | } |
@@ -2418,6 +2771,14 @@ retry: | |||
2418 | err = ext4_init_new_dir(handle, dir, inode); | 2771 | err = ext4_init_new_dir(handle, dir, inode); |
2419 | if (err) | 2772 | if (err) |
2420 | goto out_clear_inode; | 2773 | goto out_clear_inode; |
2774 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
2775 | if (ext4_encrypted_inode(dir) || | ||
2776 | DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) { | ||
2777 | err = ext4_inherit_context(dir, inode); | ||
2778 | if (err) | ||
2779 | goto out_clear_inode; | ||
2780 | } | ||
2781 | #endif | ||
2421 | err = ext4_mark_inode_dirty(handle, inode); | 2782 | err = ext4_mark_inode_dirty(handle, inode); |
2422 | if (!err) | 2783 | if (!err) |
2423 | err = ext4_add_entry(handle, dentry, inode); | 2784 | err = ext4_add_entry(handle, dentry, inode); |
@@ -2450,7 +2811,7 @@ out_stop: | |||
2450 | /* | 2811 | /* |
2451 | * routine to check that the specified directory is empty (for rmdir) | 2812 | * routine to check that the specified directory is empty (for rmdir) |
2452 | */ | 2813 | */ |
2453 | static int empty_dir(struct inode *inode) | 2814 | int ext4_empty_dir(struct inode *inode) |
2454 | { | 2815 | { |
2455 | unsigned int offset; | 2816 | unsigned int offset; |
2456 | struct buffer_head *bh; | 2817 | struct buffer_head *bh; |
@@ -2718,7 +3079,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) | |||
2718 | goto end_rmdir; | 3079 | goto end_rmdir; |
2719 | 3080 | ||
2720 | retval = -ENOTEMPTY; | 3081 | retval = -ENOTEMPTY; |
2721 | if (!empty_dir(inode)) | 3082 | if (!ext4_empty_dir(inode)) |
2722 | goto end_rmdir; | 3083 | goto end_rmdir; |
2723 | 3084 | ||
2724 | handle = ext4_journal_start(dir, EXT4_HT_DIR, | 3085 | handle = ext4_journal_start(dir, EXT4_HT_DIR, |
@@ -2828,16 +3189,25 @@ static int ext4_symlink(struct inode *dir, | |||
2828 | { | 3189 | { |
2829 | handle_t *handle; | 3190 | handle_t *handle; |
2830 | struct inode *inode; | 3191 | struct inode *inode; |
2831 | int l, err, retries = 0; | 3192 | int err, len = strlen(symname); |
2832 | int credits; | 3193 | int credits; |
2833 | 3194 | bool encryption_required; | |
2834 | l = strlen(symname)+1; | 3195 | struct ext4_str disk_link; |
2835 | if (l > dir->i_sb->s_blocksize) | 3196 | struct ext4_encrypted_symlink_data *sd = NULL; |
3197 | |||
3198 | disk_link.len = len + 1; | ||
3199 | disk_link.name = (char *) symname; | ||
3200 | |||
3201 | encryption_required = (ext4_encrypted_inode(dir) || | ||
3202 | DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))); | ||
3203 | if (encryption_required) | ||
3204 | disk_link.len = encrypted_symlink_data_len(len) + 1; | ||
3205 | if (disk_link.len > dir->i_sb->s_blocksize) | ||
2836 | return -ENAMETOOLONG; | 3206 | return -ENAMETOOLONG; |
2837 | 3207 | ||
2838 | dquot_initialize(dir); | 3208 | dquot_initialize(dir); |
2839 | 3209 | ||
2840 | if (l > EXT4_N_BLOCKS * 4) { | 3210 | if ((disk_link.len > EXT4_N_BLOCKS * 4)) { |
2841 | /* | 3211 | /* |
2842 | * For non-fast symlinks, we just allocate inode and put it on | 3212 | * For non-fast symlinks, we just allocate inode and put it on |
2843 | * orphan list in the first transaction => we need bitmap, | 3213 | * orphan list in the first transaction => we need bitmap, |
@@ -2856,16 +3226,49 @@ static int ext4_symlink(struct inode *dir, | |||
2856 | credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 3226 | credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + |
2857 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; | 3227 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; |
2858 | } | 3228 | } |
2859 | retry: | 3229 | |
2860 | inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO, | 3230 | inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO, |
2861 | &dentry->d_name, 0, NULL, | 3231 | &dentry->d_name, 0, NULL, |
2862 | EXT4_HT_DIR, credits); | 3232 | EXT4_HT_DIR, credits); |
2863 | handle = ext4_journal_current_handle(); | 3233 | handle = ext4_journal_current_handle(); |
2864 | err = PTR_ERR(inode); | 3234 | if (IS_ERR(inode)) { |
2865 | if (IS_ERR(inode)) | 3235 | if (handle) |
2866 | goto out_stop; | 3236 | ext4_journal_stop(handle); |
3237 | return PTR_ERR(inode); | ||
3238 | } | ||
3239 | |||
3240 | if (encryption_required) { | ||
3241 | struct ext4_fname_crypto_ctx *ctx = NULL; | ||
3242 | struct qstr istr; | ||
3243 | struct ext4_str ostr; | ||
3244 | |||
3245 | sd = kzalloc(disk_link.len, GFP_NOFS); | ||
3246 | if (!sd) { | ||
3247 | err = -ENOMEM; | ||
3248 | goto err_drop_inode; | ||
3249 | } | ||
3250 | err = ext4_inherit_context(dir, inode); | ||
3251 | if (err) | ||
3252 | goto err_drop_inode; | ||
3253 | ctx = ext4_get_fname_crypto_ctx(inode, | ||
3254 | inode->i_sb->s_blocksize); | ||
3255 | if (IS_ERR_OR_NULL(ctx)) { | ||
3256 | /* We just set the policy, so ctx should not be NULL */ | ||
3257 | err = (ctx == NULL) ? -EIO : PTR_ERR(ctx); | ||
3258 | goto err_drop_inode; | ||
3259 | } | ||
3260 | istr.name = (const unsigned char *) symname; | ||
3261 | istr.len = len; | ||
3262 | ostr.name = sd->encrypted_path; | ||
3263 | err = ext4_fname_usr_to_disk(ctx, &istr, &ostr); | ||
3264 | ext4_put_fname_crypto_ctx(&ctx); | ||
3265 | if (err < 0) | ||
3266 | goto err_drop_inode; | ||
3267 | sd->len = cpu_to_le16(ostr.len); | ||
3268 | disk_link.name = (char *) sd; | ||
3269 | } | ||
2867 | 3270 | ||
2868 | if (l > EXT4_N_BLOCKS * 4) { | 3271 | if ((disk_link.len > EXT4_N_BLOCKS * 4)) { |
2869 | inode->i_op = &ext4_symlink_inode_operations; | 3272 | inode->i_op = &ext4_symlink_inode_operations; |
2870 | ext4_set_aops(inode); | 3273 | ext4_set_aops(inode); |
2871 | /* | 3274 | /* |
@@ -2881,9 +3284,10 @@ retry: | |||
2881 | drop_nlink(inode); | 3284 | drop_nlink(inode); |
2882 | err = ext4_orphan_add(handle, inode); | 3285 | err = ext4_orphan_add(handle, inode); |
2883 | ext4_journal_stop(handle); | 3286 | ext4_journal_stop(handle); |
3287 | handle = NULL; | ||
2884 | if (err) | 3288 | if (err) |
2885 | goto err_drop_inode; | 3289 | goto err_drop_inode; |
2886 | err = __page_symlink(inode, symname, l, 1); | 3290 | err = __page_symlink(inode, disk_link.name, disk_link.len, 1); |
2887 | if (err) | 3291 | if (err) |
2888 | goto err_drop_inode; | 3292 | goto err_drop_inode; |
2889 | /* | 3293 | /* |
@@ -2895,34 +3299,37 @@ retry: | |||
2895 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); | 3299 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); |
2896 | if (IS_ERR(handle)) { | 3300 | if (IS_ERR(handle)) { |
2897 | err = PTR_ERR(handle); | 3301 | err = PTR_ERR(handle); |
3302 | handle = NULL; | ||
2898 | goto err_drop_inode; | 3303 | goto err_drop_inode; |
2899 | } | 3304 | } |
2900 | set_nlink(inode, 1); | 3305 | set_nlink(inode, 1); |
2901 | err = ext4_orphan_del(handle, inode); | 3306 | err = ext4_orphan_del(handle, inode); |
2902 | if (err) { | 3307 | if (err) |
2903 | ext4_journal_stop(handle); | ||
2904 | clear_nlink(inode); | ||
2905 | goto err_drop_inode; | 3308 | goto err_drop_inode; |
2906 | } | ||
2907 | } else { | 3309 | } else { |
2908 | /* clear the extent format for fast symlink */ | 3310 | /* clear the extent format for fast symlink */ |
2909 | ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); | 3311 | ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); |
2910 | inode->i_op = &ext4_fast_symlink_inode_operations; | 3312 | inode->i_op = encryption_required ? |
2911 | memcpy((char *)&EXT4_I(inode)->i_data, symname, l); | 3313 | &ext4_symlink_inode_operations : |
2912 | inode->i_size = l-1; | 3314 | &ext4_fast_symlink_inode_operations; |
3315 | memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name, | ||
3316 | disk_link.len); | ||
3317 | inode->i_size = disk_link.len - 1; | ||
2913 | } | 3318 | } |
2914 | EXT4_I(inode)->i_disksize = inode->i_size; | 3319 | EXT4_I(inode)->i_disksize = inode->i_size; |
2915 | err = ext4_add_nondir(handle, dentry, inode); | 3320 | err = ext4_add_nondir(handle, dentry, inode); |
2916 | if (!err && IS_DIRSYNC(dir)) | 3321 | if (!err && IS_DIRSYNC(dir)) |
2917 | ext4_handle_sync(handle); | 3322 | ext4_handle_sync(handle); |
2918 | 3323 | ||
2919 | out_stop: | ||
2920 | if (handle) | 3324 | if (handle) |
2921 | ext4_journal_stop(handle); | 3325 | ext4_journal_stop(handle); |
2922 | if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) | 3326 | kfree(sd); |
2923 | goto retry; | ||
2924 | return err; | 3327 | return err; |
2925 | err_drop_inode: | 3328 | err_drop_inode: |
3329 | if (handle) | ||
3330 | ext4_journal_stop(handle); | ||
3331 | kfree(sd); | ||
3332 | clear_nlink(inode); | ||
2926 | unlock_new_inode(inode); | 3333 | unlock_new_inode(inode); |
2927 | iput(inode); | 3334 | iput(inode); |
2928 | return err; | 3335 | return err; |
@@ -2937,7 +3344,9 @@ static int ext4_link(struct dentry *old_dentry, | |||
2937 | 3344 | ||
2938 | if (inode->i_nlink >= EXT4_LINK_MAX) | 3345 | if (inode->i_nlink >= EXT4_LINK_MAX) |
2939 | return -EMLINK; | 3346 | return -EMLINK; |
2940 | 3347 | if (ext4_encrypted_inode(dir) && | |
3348 | !ext4_is_child_context_consistent_with_parent(dir, inode)) | ||
3349 | return -EPERM; | ||
2941 | dquot_initialize(dir); | 3350 | dquot_initialize(dir); |
2942 | 3351 | ||
2943 | retry: | 3352 | retry: |
@@ -3238,6 +3647,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3238 | if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) | 3647 | if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) |
3239 | goto end_rename; | 3648 | goto end_rename; |
3240 | 3649 | ||
3650 | if ((old.dir != new.dir) && | ||
3651 | ext4_encrypted_inode(new.dir) && | ||
3652 | !ext4_is_child_context_consistent_with_parent(new.dir, | ||
3653 | old.inode)) { | ||
3654 | retval = -EPERM; | ||
3655 | goto end_rename; | ||
3656 | } | ||
3657 | |||
3241 | new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, | 3658 | new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, |
3242 | &new.de, &new.inlined); | 3659 | &new.de, &new.inlined); |
3243 | if (IS_ERR(new.bh)) { | 3660 | if (IS_ERR(new.bh)) { |
@@ -3258,12 +3675,18 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3258 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); | 3675 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); |
3259 | if (!(flags & RENAME_WHITEOUT)) { | 3676 | if (!(flags & RENAME_WHITEOUT)) { |
3260 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); | 3677 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); |
3261 | if (IS_ERR(handle)) | 3678 | if (IS_ERR(handle)) { |
3262 | return PTR_ERR(handle); | 3679 | retval = PTR_ERR(handle); |
3680 | handle = NULL; | ||
3681 | goto end_rename; | ||
3682 | } | ||
3263 | } else { | 3683 | } else { |
3264 | whiteout = ext4_whiteout_for_rename(&old, credits, &handle); | 3684 | whiteout = ext4_whiteout_for_rename(&old, credits, &handle); |
3265 | if (IS_ERR(whiteout)) | 3685 | if (IS_ERR(whiteout)) { |
3266 | return PTR_ERR(whiteout); | 3686 | retval = PTR_ERR(whiteout); |
3687 | whiteout = NULL; | ||
3688 | goto end_rename; | ||
3689 | } | ||
3267 | } | 3690 | } |
3268 | 3691 | ||
3269 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) | 3692 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) |
@@ -3272,7 +3695,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3272 | if (S_ISDIR(old.inode->i_mode)) { | 3695 | if (S_ISDIR(old.inode->i_mode)) { |
3273 | if (new.inode) { | 3696 | if (new.inode) { |
3274 | retval = -ENOTEMPTY; | 3697 | retval = -ENOTEMPTY; |
3275 | if (!empty_dir(new.inode)) | 3698 | if (!ext4_empty_dir(new.inode)) |
3276 | goto end_rename; | 3699 | goto end_rename; |
3277 | } else { | 3700 | } else { |
3278 | retval = -EMLINK; | 3701 | retval = -EMLINK; |
@@ -3346,8 +3769,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3346 | 3769 | ||
3347 | ext4_dec_count(handle, old.dir); | 3770 | ext4_dec_count(handle, old.dir); |
3348 | if (new.inode) { | 3771 | if (new.inode) { |
3349 | /* checked empty_dir above, can't have another parent, | 3772 | /* checked ext4_empty_dir above, can't have another |
3350 | * ext4_dec_count() won't work for many-linked dirs */ | 3773 | * parent, ext4_dec_count() won't work for many-linked |
3774 | * dirs */ | ||
3351 | clear_nlink(new.inode); | 3775 | clear_nlink(new.inode); |
3352 | } else { | 3776 | } else { |
3353 | ext4_inc_count(handle, new.dir); | 3777 | ext4_inc_count(handle, new.dir); |
@@ -3427,8 +3851,11 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3427 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, | 3851 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, |
3428 | (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + | 3852 | (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + |
3429 | 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); | 3853 | 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); |
3430 | if (IS_ERR(handle)) | 3854 | if (IS_ERR(handle)) { |
3431 | return PTR_ERR(handle); | 3855 | retval = PTR_ERR(handle); |
3856 | handle = NULL; | ||
3857 | goto end_rename; | ||
3858 | } | ||
3432 | 3859 | ||
3433 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) | 3860 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) |
3434 | ext4_handle_sync(handle); | 3861 | ext4_handle_sync(handle); |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 464984261e69..5765f88b3904 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -8,7 +8,6 @@ | |||
8 | 8 | ||
9 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
10 | #include <linux/time.h> | 10 | #include <linux/time.h> |
11 | #include <linux/jbd2.h> | ||
12 | #include <linux/highuid.h> | 11 | #include <linux/highuid.h> |
13 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
14 | #include <linux/quotaops.h> | 13 | #include <linux/quotaops.h> |
@@ -24,7 +23,6 @@ | |||
24 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
25 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
26 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
27 | #include <linux/ratelimit.h> | ||
28 | 26 | ||
29 | #include "ext4_jbd2.h" | 27 | #include "ext4_jbd2.h" |
30 | #include "xattr.h" | 28 | #include "xattr.h" |
@@ -68,6 +66,10 @@ static void ext4_finish_bio(struct bio *bio) | |||
68 | 66 | ||
69 | bio_for_each_segment_all(bvec, bio, i) { | 67 | bio_for_each_segment_all(bvec, bio, i) { |
70 | struct page *page = bvec->bv_page; | 68 | struct page *page = bvec->bv_page; |
69 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
70 | struct page *data_page = NULL; | ||
71 | struct ext4_crypto_ctx *ctx = NULL; | ||
72 | #endif | ||
71 | struct buffer_head *bh, *head; | 73 | struct buffer_head *bh, *head; |
72 | unsigned bio_start = bvec->bv_offset; | 74 | unsigned bio_start = bvec->bv_offset; |
73 | unsigned bio_end = bio_start + bvec->bv_len; | 75 | unsigned bio_end = bio_start + bvec->bv_len; |
@@ -77,6 +79,15 @@ static void ext4_finish_bio(struct bio *bio) | |||
77 | if (!page) | 79 | if (!page) |
78 | continue; | 80 | continue; |
79 | 81 | ||
82 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
83 | if (!page->mapping) { | ||
84 | /* The bounce data pages are unmapped. */ | ||
85 | data_page = page; | ||
86 | ctx = (struct ext4_crypto_ctx *)page_private(data_page); | ||
87 | page = ctx->control_page; | ||
88 | } | ||
89 | #endif | ||
90 | |||
80 | if (error) { | 91 | if (error) { |
81 | SetPageError(page); | 92 | SetPageError(page); |
82 | set_bit(AS_EIO, &page->mapping->flags); | 93 | set_bit(AS_EIO, &page->mapping->flags); |
@@ -101,8 +112,13 @@ static void ext4_finish_bio(struct bio *bio) | |||
101 | } while ((bh = bh->b_this_page) != head); | 112 | } while ((bh = bh->b_this_page) != head); |
102 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | 113 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); |
103 | local_irq_restore(flags); | 114 | local_irq_restore(flags); |
104 | if (!under_io) | 115 | if (!under_io) { |
116 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
117 | if (ctx) | ||
118 | ext4_restore_control_page(data_page); | ||
119 | #endif | ||
105 | end_page_writeback(page); | 120 | end_page_writeback(page); |
121 | } | ||
106 | } | 122 | } |
107 | } | 123 | } |
108 | 124 | ||
@@ -377,6 +393,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io, | |||
377 | 393 | ||
378 | static int io_submit_add_bh(struct ext4_io_submit *io, | 394 | static int io_submit_add_bh(struct ext4_io_submit *io, |
379 | struct inode *inode, | 395 | struct inode *inode, |
396 | struct page *page, | ||
380 | struct buffer_head *bh) | 397 | struct buffer_head *bh) |
381 | { | 398 | { |
382 | int ret; | 399 | int ret; |
@@ -390,7 +407,7 @@ submit_and_retry: | |||
390 | if (ret) | 407 | if (ret) |
391 | return ret; | 408 | return ret; |
392 | } | 409 | } |
393 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | 410 | ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); |
394 | if (ret != bh->b_size) | 411 | if (ret != bh->b_size) |
395 | goto submit_and_retry; | 412 | goto submit_and_retry; |
396 | io->io_next_block++; | 413 | io->io_next_block++; |
@@ -403,6 +420,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
403 | struct writeback_control *wbc, | 420 | struct writeback_control *wbc, |
404 | bool keep_towrite) | 421 | bool keep_towrite) |
405 | { | 422 | { |
423 | struct page *data_page = NULL; | ||
406 | struct inode *inode = page->mapping->host; | 424 | struct inode *inode = page->mapping->host; |
407 | unsigned block_start, blocksize; | 425 | unsigned block_start, blocksize; |
408 | struct buffer_head *bh, *head; | 426 | struct buffer_head *bh, *head; |
@@ -462,19 +480,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
462 | set_buffer_async_write(bh); | 480 | set_buffer_async_write(bh); |
463 | } while ((bh = bh->b_this_page) != head); | 481 | } while ((bh = bh->b_this_page) != head); |
464 | 482 | ||
465 | /* Now submit buffers to write */ | ||
466 | bh = head = page_buffers(page); | 483 | bh = head = page_buffers(page); |
484 | |||
485 | if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { | ||
486 | data_page = ext4_encrypt(inode, page); | ||
487 | if (IS_ERR(data_page)) { | ||
488 | ret = PTR_ERR(data_page); | ||
489 | data_page = NULL; | ||
490 | goto out; | ||
491 | } | ||
492 | } | ||
493 | |||
494 | /* Now submit buffers to write */ | ||
467 | do { | 495 | do { |
468 | if (!buffer_async_write(bh)) | 496 | if (!buffer_async_write(bh)) |
469 | continue; | 497 | continue; |
470 | ret = io_submit_add_bh(io, inode, bh); | 498 | ret = io_submit_add_bh(io, inode, |
499 | data_page ? data_page : page, bh); | ||
471 | if (ret) { | 500 | if (ret) { |
472 | /* | 501 | /* |
473 | * We only get here on ENOMEM. Not much else | 502 | * We only get here on ENOMEM. Not much else |
474 | * we can do but mark the page as dirty, and | 503 | * we can do but mark the page as dirty, and |
475 | * better luck next time. | 504 | * better luck next time. |
476 | */ | 505 | */ |
477 | redirty_page_for_writepage(wbc, page); | ||
478 | break; | 506 | break; |
479 | } | 507 | } |
480 | nr_submitted++; | 508 | nr_submitted++; |
@@ -483,6 +511,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
483 | 511 | ||
484 | /* Error stopped previous loop? Clean up buffers... */ | 512 | /* Error stopped previous loop? Clean up buffers... */ |
485 | if (ret) { | 513 | if (ret) { |
514 | out: | ||
515 | if (data_page) | ||
516 | ext4_restore_control_page(data_page); | ||
517 | printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret); | ||
518 | redirty_page_for_writepage(wbc, page); | ||
486 | do { | 519 | do { |
487 | clear_buffer_async_write(bh); | 520 | clear_buffer_async_write(bh); |
488 | bh = bh->b_this_page; | 521 | bh = bh->b_this_page; |
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c new file mode 100644 index 000000000000..171b9ac4b45e --- /dev/null +++ b/fs/ext4/readpage.c | |||
@@ -0,0 +1,328 @@ | |||
1 | /* | ||
2 | * linux/fs/ext4/readpage.c | ||
3 | * | ||
4 | * Copyright (C) 2002, Linus Torvalds. | ||
5 | * Copyright (C) 2015, Google, Inc. | ||
6 | * | ||
7 | * This was originally taken from fs/mpage.c | ||
8 | * | ||
9 | * The intent is the ext4_mpage_readpages() function here is intended | ||
10 | * to replace mpage_readpages() in the general case, not just for | ||
11 | * encrypted files. It has some limitations (see below), where it | ||
12 | * will fall back to read_block_full_page(), but these limitations | ||
13 | * should only be hit when page_size != block_size. | ||
14 | * | ||
15 | * This will allow us to attach a callback function to support ext4 | ||
16 | * encryption. | ||
17 | * | ||
18 | * If anything unusual happens, such as: | ||
19 | * | ||
20 | * - encountering a page which has buffers | ||
21 | * - encountering a page which has a non-hole after a hole | ||
22 | * - encountering a page with non-contiguous blocks | ||
23 | * | ||
24 | * then this code just gives up and calls the buffer_head-based read function. | ||
25 | * It does handle a page which has holes at the end - that is a common case: | ||
26 | * the end-of-file on blocksize < PAGE_CACHE_SIZE setups. | ||
27 | * | ||
28 | */ | ||
29 | |||
30 | #include <linux/kernel.h> | ||
31 | #include <linux/export.h> | ||
32 | #include <linux/mm.h> | ||
33 | #include <linux/kdev_t.h> | ||
34 | #include <linux/gfp.h> | ||
35 | #include <linux/bio.h> | ||
36 | #include <linux/fs.h> | ||
37 | #include <linux/buffer_head.h> | ||
38 | #include <linux/blkdev.h> | ||
39 | #include <linux/highmem.h> | ||
40 | #include <linux/prefetch.h> | ||
41 | #include <linux/mpage.h> | ||
42 | #include <linux/writeback.h> | ||
43 | #include <linux/backing-dev.h> | ||
44 | #include <linux/pagevec.h> | ||
45 | #include <linux/cleancache.h> | ||
46 | |||
47 | #include "ext4.h" | ||
48 | |||
49 | /* | ||
50 | * Call ext4_decrypt on every single page, reusing the encryption | ||
51 | * context. | ||
52 | */ | ||
53 | static void completion_pages(struct work_struct *work) | ||
54 | { | ||
55 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
56 | struct ext4_crypto_ctx *ctx = | ||
57 | container_of(work, struct ext4_crypto_ctx, work); | ||
58 | struct bio *bio = ctx->bio; | ||
59 | struct bio_vec *bv; | ||
60 | int i; | ||
61 | |||
62 | bio_for_each_segment_all(bv, bio, i) { | ||
63 | struct page *page = bv->bv_page; | ||
64 | |||
65 | int ret = ext4_decrypt(ctx, page); | ||
66 | if (ret) { | ||
67 | WARN_ON_ONCE(1); | ||
68 | SetPageError(page); | ||
69 | } else | ||
70 | SetPageUptodate(page); | ||
71 | unlock_page(page); | ||
72 | } | ||
73 | ext4_release_crypto_ctx(ctx); | ||
74 | bio_put(bio); | ||
75 | #else | ||
76 | BUG(); | ||
77 | #endif | ||
78 | } | ||
79 | |||
80 | static inline bool ext4_bio_encrypted(struct bio *bio) | ||
81 | { | ||
82 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
83 | return unlikely(bio->bi_private != NULL); | ||
84 | #else | ||
85 | return false; | ||
86 | #endif | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * I/O completion handler for multipage BIOs. | ||
91 | * | ||
92 | * The mpage code never puts partial pages into a BIO (except for end-of-file). | ||
93 | * If a page does not map to a contiguous run of blocks then it simply falls | ||
94 | * back to block_read_full_page(). | ||
95 | * | ||
96 | * Why is this? If a page's completion depends on a number of different BIOs | ||
97 | * which can complete in any order (or at the same time) then determining the | ||
98 | * status of that page is hard. See end_buffer_async_read() for the details. | ||
99 | * There is no point in duplicating all that complexity. | ||
100 | */ | ||
101 | static void mpage_end_io(struct bio *bio, int err) | ||
102 | { | ||
103 | struct bio_vec *bv; | ||
104 | int i; | ||
105 | |||
106 | if (ext4_bio_encrypted(bio)) { | ||
107 | struct ext4_crypto_ctx *ctx = bio->bi_private; | ||
108 | |||
109 | if (err) { | ||
110 | ext4_release_crypto_ctx(ctx); | ||
111 | } else { | ||
112 | INIT_WORK(&ctx->work, completion_pages); | ||
113 | ctx->bio = bio; | ||
114 | queue_work(ext4_read_workqueue, &ctx->work); | ||
115 | return; | ||
116 | } | ||
117 | } | ||
118 | bio_for_each_segment_all(bv, bio, i) { | ||
119 | struct page *page = bv->bv_page; | ||
120 | |||
121 | if (!err) { | ||
122 | SetPageUptodate(page); | ||
123 | } else { | ||
124 | ClearPageUptodate(page); | ||
125 | SetPageError(page); | ||
126 | } | ||
127 | unlock_page(page); | ||
128 | } | ||
129 | |||
130 | bio_put(bio); | ||
131 | } | ||
132 | |||
133 | int ext4_mpage_readpages(struct address_space *mapping, | ||
134 | struct list_head *pages, struct page *page, | ||
135 | unsigned nr_pages) | ||
136 | { | ||
137 | struct bio *bio = NULL; | ||
138 | unsigned page_idx; | ||
139 | sector_t last_block_in_bio = 0; | ||
140 | |||
141 | struct inode *inode = mapping->host; | ||
142 | const unsigned blkbits = inode->i_blkbits; | ||
143 | const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits; | ||
144 | const unsigned blocksize = 1 << blkbits; | ||
145 | sector_t block_in_file; | ||
146 | sector_t last_block; | ||
147 | sector_t last_block_in_file; | ||
148 | sector_t blocks[MAX_BUF_PER_PAGE]; | ||
149 | unsigned page_block; | ||
150 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
151 | int length; | ||
152 | unsigned relative_block = 0; | ||
153 | struct ext4_map_blocks map; | ||
154 | |||
155 | map.m_pblk = 0; | ||
156 | map.m_lblk = 0; | ||
157 | map.m_len = 0; | ||
158 | map.m_flags = 0; | ||
159 | |||
160 | for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { | ||
161 | int fully_mapped = 1; | ||
162 | unsigned first_hole = blocks_per_page; | ||
163 | |||
164 | prefetchw(&page->flags); | ||
165 | if (pages) { | ||
166 | page = list_entry(pages->prev, struct page, lru); | ||
167 | list_del(&page->lru); | ||
168 | if (add_to_page_cache_lru(page, mapping, | ||
169 | page->index, GFP_KERNEL)) | ||
170 | goto next_page; | ||
171 | } | ||
172 | |||
173 | if (page_has_buffers(page)) | ||
174 | goto confused; | ||
175 | |||
176 | block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); | ||
177 | last_block = block_in_file + nr_pages * blocks_per_page; | ||
178 | last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; | ||
179 | if (last_block > last_block_in_file) | ||
180 | last_block = last_block_in_file; | ||
181 | page_block = 0; | ||
182 | |||
183 | /* | ||
184 | * Map blocks using the previous result first. | ||
185 | */ | ||
186 | if ((map.m_flags & EXT4_MAP_MAPPED) && | ||
187 | block_in_file > map.m_lblk && | ||
188 | block_in_file < (map.m_lblk + map.m_len)) { | ||
189 | unsigned map_offset = block_in_file - map.m_lblk; | ||
190 | unsigned last = map.m_len - map_offset; | ||
191 | |||
192 | for (relative_block = 0; ; relative_block++) { | ||
193 | if (relative_block == last) { | ||
194 | /* needed? */ | ||
195 | map.m_flags &= ~EXT4_MAP_MAPPED; | ||
196 | break; | ||
197 | } | ||
198 | if (page_block == blocks_per_page) | ||
199 | break; | ||
200 | blocks[page_block] = map.m_pblk + map_offset + | ||
201 | relative_block; | ||
202 | page_block++; | ||
203 | block_in_file++; | ||
204 | } | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * Then do more ext4_map_blocks() calls until we are | ||
209 | * done with this page. | ||
210 | */ | ||
211 | while (page_block < blocks_per_page) { | ||
212 | if (block_in_file < last_block) { | ||
213 | map.m_lblk = block_in_file; | ||
214 | map.m_len = last_block - block_in_file; | ||
215 | |||
216 | if (ext4_map_blocks(NULL, inode, &map, 0) < 0) { | ||
217 | set_error_page: | ||
218 | SetPageError(page); | ||
219 | zero_user_segment(page, 0, | ||
220 | PAGE_CACHE_SIZE); | ||
221 | unlock_page(page); | ||
222 | goto next_page; | ||
223 | } | ||
224 | } | ||
225 | if ((map.m_flags & EXT4_MAP_MAPPED) == 0) { | ||
226 | fully_mapped = 0; | ||
227 | if (first_hole == blocks_per_page) | ||
228 | first_hole = page_block; | ||
229 | page_block++; | ||
230 | block_in_file++; | ||
231 | continue; | ||
232 | } | ||
233 | if (first_hole != blocks_per_page) | ||
234 | goto confused; /* hole -> non-hole */ | ||
235 | |||
236 | /* Contiguous blocks? */ | ||
237 | if (page_block && blocks[page_block-1] != map.m_pblk-1) | ||
238 | goto confused; | ||
239 | for (relative_block = 0; ; relative_block++) { | ||
240 | if (relative_block == map.m_len) { | ||
241 | /* needed? */ | ||
242 | map.m_flags &= ~EXT4_MAP_MAPPED; | ||
243 | break; | ||
244 | } else if (page_block == blocks_per_page) | ||
245 | break; | ||
246 | blocks[page_block] = map.m_pblk+relative_block; | ||
247 | page_block++; | ||
248 | block_in_file++; | ||
249 | } | ||
250 | } | ||
251 | if (first_hole != blocks_per_page) { | ||
252 | zero_user_segment(page, first_hole << blkbits, | ||
253 | PAGE_CACHE_SIZE); | ||
254 | if (first_hole == 0) { | ||
255 | SetPageUptodate(page); | ||
256 | unlock_page(page); | ||
257 | goto next_page; | ||
258 | } | ||
259 | } else if (fully_mapped) { | ||
260 | SetPageMappedToDisk(page); | ||
261 | } | ||
262 | if (fully_mapped && blocks_per_page == 1 && | ||
263 | !PageUptodate(page) && cleancache_get_page(page) == 0) { | ||
264 | SetPageUptodate(page); | ||
265 | goto confused; | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * This page will go to BIO. Do we need to send this | ||
270 | * BIO off first? | ||
271 | */ | ||
272 | if (bio && (last_block_in_bio != blocks[0] - 1)) { | ||
273 | submit_and_realloc: | ||
274 | submit_bio(READ, bio); | ||
275 | bio = NULL; | ||
276 | } | ||
277 | if (bio == NULL) { | ||
278 | struct ext4_crypto_ctx *ctx = NULL; | ||
279 | |||
280 | if (ext4_encrypted_inode(inode) && | ||
281 | S_ISREG(inode->i_mode)) { | ||
282 | ctx = ext4_get_crypto_ctx(inode); | ||
283 | if (IS_ERR(ctx)) | ||
284 | goto set_error_page; | ||
285 | } | ||
286 | bio = bio_alloc(GFP_KERNEL, | ||
287 | min_t(int, nr_pages, bio_get_nr_vecs(bdev))); | ||
288 | if (!bio) { | ||
289 | if (ctx) | ||
290 | ext4_release_crypto_ctx(ctx); | ||
291 | goto set_error_page; | ||
292 | } | ||
293 | bio->bi_bdev = bdev; | ||
294 | bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); | ||
295 | bio->bi_end_io = mpage_end_io; | ||
296 | bio->bi_private = ctx; | ||
297 | } | ||
298 | |||
299 | length = first_hole << blkbits; | ||
300 | if (bio_add_page(bio, page, length, 0) < length) | ||
301 | goto submit_and_realloc; | ||
302 | |||
303 | if (((map.m_flags & EXT4_MAP_BOUNDARY) && | ||
304 | (relative_block == map.m_len)) || | ||
305 | (first_hole != blocks_per_page)) { | ||
306 | submit_bio(READ, bio); | ||
307 | bio = NULL; | ||
308 | } else | ||
309 | last_block_in_bio = blocks[blocks_per_page - 1]; | ||
310 | goto next_page; | ||
311 | confused: | ||
312 | if (bio) { | ||
313 | submit_bio(READ, bio); | ||
314 | bio = NULL; | ||
315 | } | ||
316 | if (!PageUptodate(page)) | ||
317 | block_read_full_page(page, ext4_get_block); | ||
318 | else | ||
319 | unlock_page(page); | ||
320 | next_page: | ||
321 | if (pages) | ||
322 | page_cache_release(page); | ||
323 | } | ||
324 | BUG_ON(pages && !list_empty(pages)); | ||
325 | if (bio) | ||
326 | submit_bio(READ, bio); | ||
327 | return 0; | ||
328 | } | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d348c7d29d80..821f22dbe825 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
22 | #include <linux/time.h> | 22 | #include <linux/time.h> |
23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
24 | #include <linux/jbd2.h> | ||
25 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
26 | #include <linux/init.h> | 25 | #include <linux/init.h> |
27 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
@@ -323,22 +322,6 @@ static void save_error_info(struct super_block *sb, const char *func, | |||
323 | ext4_commit_super(sb, 1); | 322 | ext4_commit_super(sb, 1); |
324 | } | 323 | } |
325 | 324 | ||
326 | /* | ||
327 | * The del_gendisk() function uninitializes the disk-specific data | ||
328 | * structures, including the bdi structure, without telling anyone | ||
329 | * else. Once this happens, any attempt to call mark_buffer_dirty() | ||
330 | * (for example, by ext4_commit_super), will cause a kernel OOPS. | ||
331 | * This is a kludge to prevent these oops until we can put in a proper | ||
332 | * hook in del_gendisk() to inform the VFS and file system layers. | ||
333 | */ | ||
334 | static int block_device_ejected(struct super_block *sb) | ||
335 | { | ||
336 | struct inode *bd_inode = sb->s_bdev->bd_inode; | ||
337 | struct backing_dev_info *bdi = inode_to_bdi(bd_inode); | ||
338 | |||
339 | return bdi->dev == NULL; | ||
340 | } | ||
341 | |||
342 | static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) | 325 | static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) |
343 | { | 326 | { |
344 | struct super_block *sb = journal->j_private; | 327 | struct super_block *sb = journal->j_private; |
@@ -893,6 +876,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
893 | atomic_set(&ei->i_ioend_count, 0); | 876 | atomic_set(&ei->i_ioend_count, 0); |
894 | atomic_set(&ei->i_unwritten, 0); | 877 | atomic_set(&ei->i_unwritten, 0); |
895 | INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); | 878 | INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); |
879 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
880 | ei->i_encryption_key.mode = EXT4_ENCRYPTION_MODE_INVALID; | ||
881 | #endif | ||
896 | 882 | ||
897 | return &ei->vfs_inode; | 883 | return &ei->vfs_inode; |
898 | } | 884 | } |
@@ -1120,7 +1106,7 @@ enum { | |||
1120 | Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, | 1106 | Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, |
1121 | Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, | 1107 | Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, |
1122 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 1108 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
1123 | Opt_data_err_abort, Opt_data_err_ignore, | 1109 | Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption, |
1124 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 1110 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
1125 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, | 1111 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, |
1126 | Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, | 1112 | Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, |
@@ -1211,6 +1197,7 @@ static const match_table_t tokens = { | |||
1211 | {Opt_init_itable, "init_itable"}, | 1197 | {Opt_init_itable, "init_itable"}, |
1212 | {Opt_noinit_itable, "noinit_itable"}, | 1198 | {Opt_noinit_itable, "noinit_itable"}, |
1213 | {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, | 1199 | {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, |
1200 | {Opt_test_dummy_encryption, "test_dummy_encryption"}, | ||
1214 | {Opt_removed, "check=none"}, /* mount option from ext2/3 */ | 1201 | {Opt_removed, "check=none"}, /* mount option from ext2/3 */ |
1215 | {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ | 1202 | {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ |
1216 | {Opt_removed, "reservation"}, /* mount option from ext2/3 */ | 1203 | {Opt_removed, "reservation"}, /* mount option from ext2/3 */ |
@@ -1412,6 +1399,7 @@ static const struct mount_opts { | |||
1412 | {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, | 1399 | {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, |
1413 | {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, | 1400 | {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, |
1414 | {Opt_max_dir_size_kb, 0, MOPT_GTE0}, | 1401 | {Opt_max_dir_size_kb, 0, MOPT_GTE0}, |
1402 | {Opt_test_dummy_encryption, 0, MOPT_GTE0}, | ||
1415 | {Opt_err, 0, 0} | 1403 | {Opt_err, 0, 0} |
1416 | }; | 1404 | }; |
1417 | 1405 | ||
@@ -1588,6 +1576,15 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, | |||
1588 | } | 1576 | } |
1589 | *journal_ioprio = | 1577 | *journal_ioprio = |
1590 | IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); | 1578 | IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); |
1579 | } else if (token == Opt_test_dummy_encryption) { | ||
1580 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
1581 | sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION; | ||
1582 | ext4_msg(sb, KERN_WARNING, | ||
1583 | "Test dummy encryption mode enabled"); | ||
1584 | #else | ||
1585 | ext4_msg(sb, KERN_WARNING, | ||
1586 | "Test dummy encryption mount option ignored"); | ||
1587 | #endif | ||
1591 | } else if (m->flags & MOPT_DATAJ) { | 1588 | } else if (m->flags & MOPT_DATAJ) { |
1592 | if (is_remount) { | 1589 | if (is_remount) { |
1593 | if (!sbi->s_journal) | 1590 | if (!sbi->s_journal) |
@@ -2685,11 +2682,13 @@ static struct attribute *ext4_attrs[] = { | |||
2685 | EXT4_INFO_ATTR(lazy_itable_init); | 2682 | EXT4_INFO_ATTR(lazy_itable_init); |
2686 | EXT4_INFO_ATTR(batched_discard); | 2683 | EXT4_INFO_ATTR(batched_discard); |
2687 | EXT4_INFO_ATTR(meta_bg_resize); | 2684 | EXT4_INFO_ATTR(meta_bg_resize); |
2685 | EXT4_INFO_ATTR(encryption); | ||
2688 | 2686 | ||
2689 | static struct attribute *ext4_feat_attrs[] = { | 2687 | static struct attribute *ext4_feat_attrs[] = { |
2690 | ATTR_LIST(lazy_itable_init), | 2688 | ATTR_LIST(lazy_itable_init), |
2691 | ATTR_LIST(batched_discard), | 2689 | ATTR_LIST(batched_discard), |
2692 | ATTR_LIST(meta_bg_resize), | 2690 | ATTR_LIST(meta_bg_resize), |
2691 | ATTR_LIST(encryption), | ||
2693 | NULL, | 2692 | NULL, |
2694 | }; | 2693 | }; |
2695 | 2694 | ||
@@ -3448,6 +3447,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3448 | if (sb->s_bdev->bd_part) | 3447 | if (sb->s_bdev->bd_part) |
3449 | sbi->s_sectors_written_start = | 3448 | sbi->s_sectors_written_start = |
3450 | part_stat_read(sb->s_bdev->bd_part, sectors[1]); | 3449 | part_stat_read(sb->s_bdev->bd_part, sectors[1]); |
3450 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
3451 | /* Modes of operations for file and directory encryption. */ | ||
3452 | sbi->s_file_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS; | ||
3453 | sbi->s_dir_encryption_mode = EXT4_ENCRYPTION_MODE_INVALID; | ||
3454 | #endif | ||
3451 | 3455 | ||
3452 | /* Cleanup superblock name */ | 3456 | /* Cleanup superblock name */ |
3453 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) | 3457 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) |
@@ -3692,6 +3696,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3692 | } | 3696 | } |
3693 | } | 3697 | } |
3694 | 3698 | ||
3699 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT) && | ||
3700 | es->s_encryption_level) { | ||
3701 | ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d", | ||
3702 | es->s_encryption_level); | ||
3703 | goto failed_mount; | ||
3704 | } | ||
3705 | |||
3695 | if (sb->s_blocksize != blocksize) { | 3706 | if (sb->s_blocksize != blocksize) { |
3696 | /* Validate the filesystem blocksize */ | 3707 | /* Validate the filesystem blocksize */ |
3697 | if (!sb_set_blocksize(sb, blocksize)) { | 3708 | if (!sb_set_blocksize(sb, blocksize)) { |
@@ -4054,6 +4065,13 @@ no_journal: | |||
4054 | } | 4065 | } |
4055 | } | 4066 | } |
4056 | 4067 | ||
4068 | if (unlikely(sbi->s_mount_flags & EXT4_MF_TEST_DUMMY_ENCRYPTION) && | ||
4069 | !(sb->s_flags & MS_RDONLY) && | ||
4070 | !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) { | ||
4071 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT); | ||
4072 | ext4_commit_super(sb, 1); | ||
4073 | } | ||
4074 | |||
4057 | /* | 4075 | /* |
4058 | * Get the # of file system overhead blocks from the | 4076 | * Get the # of file system overhead blocks from the |
4059 | * superblock if present. | 4077 | * superblock if present. |
@@ -4570,7 +4588,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
4570 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; | 4588 | struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; |
4571 | int error = 0; | 4589 | int error = 0; |
4572 | 4590 | ||
4573 | if (!sbh || block_device_ejected(sb)) | 4591 | if (!sbh) |
4574 | return error; | 4592 | return error; |
4575 | if (buffer_write_io_error(sbh)) { | 4593 | if (buffer_write_io_error(sbh)) { |
4576 | /* | 4594 | /* |
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index ff3711932018..136ca0e911fd 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c | |||
@@ -18,13 +18,101 @@ | |||
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
21 | #include <linux/jbd2.h> | ||
22 | #include <linux/namei.h> | 21 | #include <linux/namei.h> |
23 | #include "ext4.h" | 22 | #include "ext4.h" |
24 | #include "xattr.h" | 23 | #include "xattr.h" |
25 | 24 | ||
25 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
26 | static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) | 26 | static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) |
27 | { | 27 | { |
28 | struct page *cpage = NULL; | ||
29 | char *caddr, *paddr = NULL; | ||
30 | struct ext4_str cstr, pstr; | ||
31 | struct inode *inode = dentry->d_inode; | ||
32 | struct ext4_fname_crypto_ctx *ctx = NULL; | ||
33 | struct ext4_encrypted_symlink_data *sd; | ||
34 | loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1); | ||
35 | int res; | ||
36 | u32 plen, max_size = inode->i_sb->s_blocksize; | ||
37 | |||
38 | if (!ext4_encrypted_inode(inode)) | ||
39 | return page_follow_link_light(dentry, nd); | ||
40 | |||
41 | ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize); | ||
42 | if (IS_ERR(ctx)) | ||
43 | return ctx; | ||
44 | |||
45 | if (ext4_inode_is_fast_symlink(inode)) { | ||
46 | caddr = (char *) EXT4_I(dentry->d_inode)->i_data; | ||
47 | max_size = sizeof(EXT4_I(dentry->d_inode)->i_data); | ||
48 | } else { | ||
49 | cpage = read_mapping_page(inode->i_mapping, 0, NULL); | ||
50 | if (IS_ERR(cpage)) { | ||
51 | ext4_put_fname_crypto_ctx(&ctx); | ||
52 | return cpage; | ||
53 | } | ||
54 | caddr = kmap(cpage); | ||
55 | caddr[size] = 0; | ||
56 | } | ||
57 | |||
58 | /* Symlink is encrypted */ | ||
59 | sd = (struct ext4_encrypted_symlink_data *)caddr; | ||
60 | cstr.name = sd->encrypted_path; | ||
61 | cstr.len = le32_to_cpu(sd->len); | ||
62 | if ((cstr.len + | ||
63 | sizeof(struct ext4_encrypted_symlink_data) - 1) > | ||
64 | max_size) { | ||
65 | /* Symlink data on the disk is corrupted */ | ||
66 | res = -EIO; | ||
67 | goto errout; | ||
68 | } | ||
69 | plen = (cstr.len < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) ? | ||
70 | EXT4_FNAME_CRYPTO_DIGEST_SIZE*2 : cstr.len; | ||
71 | paddr = kmalloc(plen + 1, GFP_NOFS); | ||
72 | if (!paddr) { | ||
73 | res = -ENOMEM; | ||
74 | goto errout; | ||
75 | } | ||
76 | pstr.name = paddr; | ||
77 | res = _ext4_fname_disk_to_usr(ctx, &cstr, &pstr); | ||
78 | if (res < 0) | ||
79 | goto errout; | ||
80 | /* Null-terminate the name */ | ||
81 | if (res <= plen) | ||
82 | paddr[res] = '\0'; | ||
83 | nd_set_link(nd, paddr); | ||
84 | ext4_put_fname_crypto_ctx(&ctx); | ||
85 | if (cpage) { | ||
86 | kunmap(cpage); | ||
87 | page_cache_release(cpage); | ||
88 | } | ||
89 | return NULL; | ||
90 | errout: | ||
91 | ext4_put_fname_crypto_ctx(&ctx); | ||
92 | if (cpage) { | ||
93 | kunmap(cpage); | ||
94 | page_cache_release(cpage); | ||
95 | } | ||
96 | kfree(paddr); | ||
97 | return ERR_PTR(res); | ||
98 | } | ||
99 | |||
100 | static void ext4_put_link(struct dentry *dentry, struct nameidata *nd, | ||
101 | void *cookie) | ||
102 | { | ||
103 | struct page *page = cookie; | ||
104 | |||
105 | if (!page) { | ||
106 | kfree(nd_get_link(nd)); | ||
107 | } else { | ||
108 | kunmap(page); | ||
109 | page_cache_release(page); | ||
110 | } | ||
111 | } | ||
112 | #endif | ||
113 | |||
114 | static void *ext4_follow_fast_link(struct dentry *dentry, struct nameidata *nd) | ||
115 | { | ||
28 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); | 116 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); |
29 | nd_set_link(nd, (char *) ei->i_data); | 117 | nd_set_link(nd, (char *) ei->i_data); |
30 | return NULL; | 118 | return NULL; |
@@ -32,8 +120,13 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
32 | 120 | ||
33 | const struct inode_operations ext4_symlink_inode_operations = { | 121 | const struct inode_operations ext4_symlink_inode_operations = { |
34 | .readlink = generic_readlink, | 122 | .readlink = generic_readlink, |
123 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | ||
124 | .follow_link = ext4_follow_link, | ||
125 | .put_link = ext4_put_link, | ||
126 | #else | ||
35 | .follow_link = page_follow_link_light, | 127 | .follow_link = page_follow_link_light, |
36 | .put_link = page_put_link, | 128 | .put_link = page_put_link, |
129 | #endif | ||
37 | .setattr = ext4_setattr, | 130 | .setattr = ext4_setattr, |
38 | .setxattr = generic_setxattr, | 131 | .setxattr = generic_setxattr, |
39 | .getxattr = generic_getxattr, | 132 | .getxattr = generic_getxattr, |
@@ -43,7 +136,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
43 | 136 | ||
44 | const struct inode_operations ext4_fast_symlink_inode_operations = { | 137 | const struct inode_operations ext4_fast_symlink_inode_operations = { |
45 | .readlink = generic_readlink, | 138 | .readlink = generic_readlink, |
46 | .follow_link = ext4_follow_link, | 139 | .follow_link = ext4_follow_fast_link, |
47 | .setattr = ext4_setattr, | 140 | .setattr = ext4_setattr, |
48 | .setxattr = generic_setxattr, | 141 | .setxattr = generic_setxattr, |
49 | .getxattr = generic_getxattr, | 142 | .getxattr = generic_getxattr, |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1e09fc77395c..759842ff8af0 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -55,7 +55,6 @@ | |||
55 | #include <linux/slab.h> | 55 | #include <linux/slab.h> |
56 | #include <linux/mbcache.h> | 56 | #include <linux/mbcache.h> |
57 | #include <linux/quotaops.h> | 57 | #include <linux/quotaops.h> |
58 | #include <linux/rwsem.h> | ||
59 | #include "ext4_jbd2.h" | 58 | #include "ext4_jbd2.h" |
60 | #include "ext4.h" | 59 | #include "ext4.h" |
61 | #include "xattr.h" | 60 | #include "xattr.h" |
@@ -639,8 +638,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) | |||
639 | free += EXT4_XATTR_LEN(name_len); | 638 | free += EXT4_XATTR_LEN(name_len); |
640 | } | 639 | } |
641 | if (i->value) { | 640 | if (i->value) { |
642 | if (free < EXT4_XATTR_SIZE(i->value_len) || | 641 | if (free < EXT4_XATTR_LEN(name_len) + |
643 | free < EXT4_XATTR_LEN(name_len) + | ||
644 | EXT4_XATTR_SIZE(i->value_len)) | 642 | EXT4_XATTR_SIZE(i->value_len)) |
645 | return -ENOSPC; | 643 | return -ENOSPC; |
646 | } | 644 | } |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 29bedf5589f6..ddc0957760ba 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #define EXT4_XATTR_INDEX_SECURITY 6 | 23 | #define EXT4_XATTR_INDEX_SECURITY 6 |
24 | #define EXT4_XATTR_INDEX_SYSTEM 7 | 24 | #define EXT4_XATTR_INDEX_SYSTEM 7 |
25 | #define EXT4_XATTR_INDEX_RICHACL 8 | 25 | #define EXT4_XATTR_INDEX_RICHACL 8 |
26 | #define EXT4_XATTR_INDEX_ENCRYPTION 9 | ||
26 | 27 | ||
27 | struct ext4_xattr_header { | 28 | struct ext4_xattr_header { |
28 | __le32 h_magic; /* magic number for identification */ | 29 | __le32 h_magic; /* magic number for identification */ |
@@ -98,6 +99,8 @@ extern const struct xattr_handler ext4_xattr_user_handler; | |||
98 | extern const struct xattr_handler ext4_xattr_trusted_handler; | 99 | extern const struct xattr_handler ext4_xattr_trusted_handler; |
99 | extern const struct xattr_handler ext4_xattr_security_handler; | 100 | extern const struct xattr_handler ext4_xattr_security_handler; |
100 | 101 | ||
102 | #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c" | ||
103 | |||
101 | extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); | 104 | extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); |
102 | 105 | ||
103 | extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); | 106 | extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); |
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 94e2d2ffabe1..05f0f663f14c 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig | |||
@@ -1,5 +1,5 @@ | |||
1 | config F2FS_FS | 1 | config F2FS_FS |
2 | tristate "F2FS filesystem support (EXPERIMENTAL)" | 2 | tristate "F2FS filesystem support" |
3 | depends on BLOCK | 3 | depends on BLOCK |
4 | help | 4 | help |
5 | F2FS is based on Log-structured File System (LFS), which supports | 5 | F2FS is based on Log-structured File System (LFS), which supports |
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 742202779bd5..4320ffab3495 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c | |||
@@ -351,13 +351,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, | |||
351 | 351 | ||
352 | *acl = f2fs_acl_clone(p, GFP_NOFS); | 352 | *acl = f2fs_acl_clone(p, GFP_NOFS); |
353 | if (!*acl) | 353 | if (!*acl) |
354 | return -ENOMEM; | 354 | goto no_mem; |
355 | 355 | ||
356 | ret = f2fs_acl_create_masq(*acl, mode); | 356 | ret = f2fs_acl_create_masq(*acl, mode); |
357 | if (ret < 0) { | 357 | if (ret < 0) |
358 | posix_acl_release(*acl); | 358 | goto no_mem_clone; |
359 | return -ENOMEM; | ||
360 | } | ||
361 | 359 | ||
362 | if (ret == 0) { | 360 | if (ret == 0) { |
363 | posix_acl_release(*acl); | 361 | posix_acl_release(*acl); |
@@ -378,6 +376,12 @@ no_acl: | |||
378 | *default_acl = NULL; | 376 | *default_acl = NULL; |
379 | *acl = NULL; | 377 | *acl = NULL; |
380 | return 0; | 378 | return 0; |
379 | |||
380 | no_mem_clone: | ||
381 | posix_acl_release(*acl); | ||
382 | no_mem: | ||
383 | posix_acl_release(p); | ||
384 | return -ENOMEM; | ||
381 | } | 385 | } |
382 | 386 | ||
383 | int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, | 387 | int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7f794b72b3b7..a5e17a2a0781 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
@@ -276,7 +276,7 @@ continue_unlock: | |||
276 | if (!clear_page_dirty_for_io(page)) | 276 | if (!clear_page_dirty_for_io(page)) |
277 | goto continue_unlock; | 277 | goto continue_unlock; |
278 | 278 | ||
279 | if (f2fs_write_meta_page(page, &wbc)) { | 279 | if (mapping->a_ops->writepage(page, &wbc)) { |
280 | unlock_page(page); | 280 | unlock_page(page); |
281 | break; | 281 | break; |
282 | } | 282 | } |
@@ -464,20 +464,19 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |||
464 | 464 | ||
465 | void recover_orphan_inodes(struct f2fs_sb_info *sbi) | 465 | void recover_orphan_inodes(struct f2fs_sb_info *sbi) |
466 | { | 466 | { |
467 | block_t start_blk, orphan_blkaddr, i, j; | 467 | block_t start_blk, orphan_blocks, i, j; |
468 | 468 | ||
469 | if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) | 469 | if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) |
470 | return; | 470 | return; |
471 | 471 | ||
472 | set_sbi_flag(sbi, SBI_POR_DOING); | 472 | set_sbi_flag(sbi, SBI_POR_DOING); |
473 | 473 | ||
474 | start_blk = __start_cp_addr(sbi) + 1 + | 474 | start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); |
475 | le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); | 475 | orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); |
476 | orphan_blkaddr = __start_sum_addr(sbi) - 1; | ||
477 | 476 | ||
478 | ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); | 477 | ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP); |
479 | 478 | ||
480 | for (i = 0; i < orphan_blkaddr; i++) { | 479 | for (i = 0; i < orphan_blocks; i++) { |
481 | struct page *page = get_meta_page(sbi, start_blk + i); | 480 | struct page *page = get_meta_page(sbi, start_blk + i); |
482 | struct f2fs_orphan_block *orphan_blk; | 481 | struct f2fs_orphan_block *orphan_blk; |
483 | 482 | ||
@@ -615,7 +614,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) | |||
615 | unsigned long blk_size = sbi->blocksize; | 614 | unsigned long blk_size = sbi->blocksize; |
616 | unsigned long long cp1_version = 0, cp2_version = 0; | 615 | unsigned long long cp1_version = 0, cp2_version = 0; |
617 | unsigned long long cp_start_blk_no; | 616 | unsigned long long cp_start_blk_no; |
618 | unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); | 617 | unsigned int cp_blks = 1 + __cp_payload(sbi); |
619 | block_t cp_blk_no; | 618 | block_t cp_blk_no; |
620 | int i; | 619 | int i; |
621 | 620 | ||
@@ -796,6 +795,7 @@ retry: | |||
796 | * wribacking dentry pages in the freeing inode. | 795 | * wribacking dentry pages in the freeing inode. |
797 | */ | 796 | */ |
798 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | 797 | f2fs_submit_merged_bio(sbi, DATA, WRITE); |
798 | cond_resched(); | ||
799 | } | 799 | } |
800 | goto retry; | 800 | goto retry; |
801 | } | 801 | } |
@@ -884,7 +884,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
884 | __u32 crc32 = 0; | 884 | __u32 crc32 = 0; |
885 | void *kaddr; | 885 | void *kaddr; |
886 | int i; | 886 | int i; |
887 | int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); | 887 | int cp_payload_blks = __cp_payload(sbi); |
888 | 888 | ||
889 | /* | 889 | /* |
890 | * This avoids to conduct wrong roll-forward operations and uses | 890 | * This avoids to conduct wrong roll-forward operations and uses |
@@ -1048,17 +1048,18 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1048 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | 1048 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); |
1049 | unsigned long long ckpt_ver; | 1049 | unsigned long long ckpt_ver; |
1050 | 1050 | ||
1051 | trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); | ||
1052 | |||
1053 | mutex_lock(&sbi->cp_mutex); | 1051 | mutex_lock(&sbi->cp_mutex); |
1054 | 1052 | ||
1055 | if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && | 1053 | if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && |
1056 | cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) | 1054 | (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC)) |
1057 | goto out; | 1055 | goto out; |
1058 | if (unlikely(f2fs_cp_error(sbi))) | 1056 | if (unlikely(f2fs_cp_error(sbi))) |
1059 | goto out; | 1057 | goto out; |
1060 | if (f2fs_readonly(sbi->sb)) | 1058 | if (f2fs_readonly(sbi->sb)) |
1061 | goto out; | 1059 | goto out; |
1060 | |||
1061 | trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops"); | ||
1062 | |||
1062 | if (block_operations(sbi)) | 1063 | if (block_operations(sbi)) |
1063 | goto out; | 1064 | goto out; |
1064 | 1065 | ||
@@ -1085,6 +1086,10 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1085 | 1086 | ||
1086 | unblock_operations(sbi); | 1087 | unblock_operations(sbi); |
1087 | stat_inc_cp_count(sbi->stat_info); | 1088 | stat_inc_cp_count(sbi->stat_info); |
1089 | |||
1090 | if (cpc->reason == CP_RECOVERY) | ||
1091 | f2fs_msg(sbi->sb, KERN_NOTICE, | ||
1092 | "checkpoint: version = %llx", ckpt_ver); | ||
1088 | out: | 1093 | out: |
1089 | mutex_unlock(&sbi->cp_mutex); | 1094 | mutex_unlock(&sbi->cp_mutex); |
1090 | trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); | 1095 | trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); |
@@ -1103,14 +1108,9 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) | |||
1103 | im->ino_num = 0; | 1108 | im->ino_num = 0; |
1104 | } | 1109 | } |
1105 | 1110 | ||
1106 | /* | ||
1107 | * considering 512 blocks in a segment 8 blocks are needed for cp | ||
1108 | * and log segment summaries. Remaining blocks are used to keep | ||
1109 | * orphan entries with the limitation one reserved segment | ||
1110 | * for cp pack we can have max 1020*504 orphan entries | ||
1111 | */ | ||
1112 | sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - | 1111 | sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - |
1113 | NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; | 1112 | NR_CURSEG_TYPE - __cp_payload(sbi)) * |
1113 | F2FS_ORPHANS_PER_BLOCK; | ||
1114 | } | 1114 | } |
1115 | 1115 | ||
1116 | int __init create_checkpoint_caches(void) | 1116 | int __init create_checkpoint_caches(void) |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 319eda511c4f..b91b0e10678e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -25,6 +25,9 @@ | |||
25 | #include "trace.h" | 25 | #include "trace.h" |
26 | #include <trace/events/f2fs.h> | 26 | #include <trace/events/f2fs.h> |
27 | 27 | ||
28 | static struct kmem_cache *extent_tree_slab; | ||
29 | static struct kmem_cache *extent_node_slab; | ||
30 | |||
28 | static void f2fs_read_end_io(struct bio *bio, int err) | 31 | static void f2fs_read_end_io(struct bio *bio, int err) |
29 | { | 32 | { |
30 | struct bio_vec *bvec; | 33 | struct bio_vec *bvec; |
@@ -197,7 +200,7 @@ alloc_new: | |||
197 | * ->node_page | 200 | * ->node_page |
198 | * update block addresses in the node page | 201 | * update block addresses in the node page |
199 | */ | 202 | */ |
200 | static void __set_data_blkaddr(struct dnode_of_data *dn) | 203 | void set_data_blkaddr(struct dnode_of_data *dn) |
201 | { | 204 | { |
202 | struct f2fs_node *rn; | 205 | struct f2fs_node *rn; |
203 | __le32 *addr_array; | 206 | __le32 *addr_array; |
@@ -226,7 +229,7 @@ int reserve_new_block(struct dnode_of_data *dn) | |||
226 | trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); | 229 | trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); |
227 | 230 | ||
228 | dn->data_blkaddr = NEW_ADDR; | 231 | dn->data_blkaddr = NEW_ADDR; |
229 | __set_data_blkaddr(dn); | 232 | set_data_blkaddr(dn); |
230 | mark_inode_dirty(dn->inode); | 233 | mark_inode_dirty(dn->inode); |
231 | sync_inode_page(dn); | 234 | sync_inode_page(dn); |
232 | return 0; | 235 | return 0; |
@@ -248,73 +251,62 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) | |||
248 | return err; | 251 | return err; |
249 | } | 252 | } |
250 | 253 | ||
251 | static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | 254 | static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs, |
252 | struct buffer_head *bh_result) | 255 | struct extent_info *ei, struct buffer_head *bh_result) |
256 | { | ||
257 | unsigned int blkbits = sb->s_blocksize_bits; | ||
258 | size_t max_size = bh_result->b_size; | ||
259 | size_t mapped_size; | ||
260 | |||
261 | clear_buffer_new(bh_result); | ||
262 | map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs); | ||
263 | mapped_size = (ei->fofs + ei->len - pgofs) << blkbits; | ||
264 | bh_result->b_size = min(max_size, mapped_size); | ||
265 | } | ||
266 | |||
267 | static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, | ||
268 | struct extent_info *ei) | ||
253 | { | 269 | { |
254 | struct f2fs_inode_info *fi = F2FS_I(inode); | 270 | struct f2fs_inode_info *fi = F2FS_I(inode); |
255 | pgoff_t start_fofs, end_fofs; | 271 | pgoff_t start_fofs, end_fofs; |
256 | block_t start_blkaddr; | 272 | block_t start_blkaddr; |
257 | 273 | ||
258 | if (is_inode_flag_set(fi, FI_NO_EXTENT)) | 274 | read_lock(&fi->ext_lock); |
259 | return 0; | ||
260 | |||
261 | read_lock(&fi->ext.ext_lock); | ||
262 | if (fi->ext.len == 0) { | 275 | if (fi->ext.len == 0) { |
263 | read_unlock(&fi->ext.ext_lock); | 276 | read_unlock(&fi->ext_lock); |
264 | return 0; | 277 | return false; |
265 | } | 278 | } |
266 | 279 | ||
267 | stat_inc_total_hit(inode->i_sb); | 280 | stat_inc_total_hit(inode->i_sb); |
268 | 281 | ||
269 | start_fofs = fi->ext.fofs; | 282 | start_fofs = fi->ext.fofs; |
270 | end_fofs = fi->ext.fofs + fi->ext.len - 1; | 283 | end_fofs = fi->ext.fofs + fi->ext.len - 1; |
271 | start_blkaddr = fi->ext.blk_addr; | 284 | start_blkaddr = fi->ext.blk; |
272 | 285 | ||
273 | if (pgofs >= start_fofs && pgofs <= end_fofs) { | 286 | if (pgofs >= start_fofs && pgofs <= end_fofs) { |
274 | unsigned int blkbits = inode->i_sb->s_blocksize_bits; | 287 | *ei = fi->ext; |
275 | size_t count; | ||
276 | |||
277 | set_buffer_new(bh_result); | ||
278 | map_bh(bh_result, inode->i_sb, | ||
279 | start_blkaddr + pgofs - start_fofs); | ||
280 | count = end_fofs - pgofs + 1; | ||
281 | if (count < (UINT_MAX >> blkbits)) | ||
282 | bh_result->b_size = (count << blkbits); | ||
283 | else | ||
284 | bh_result->b_size = UINT_MAX; | ||
285 | |||
286 | stat_inc_read_hit(inode->i_sb); | 288 | stat_inc_read_hit(inode->i_sb); |
287 | read_unlock(&fi->ext.ext_lock); | 289 | read_unlock(&fi->ext_lock); |
288 | return 1; | 290 | return true; |
289 | } | 291 | } |
290 | read_unlock(&fi->ext.ext_lock); | 292 | read_unlock(&fi->ext_lock); |
291 | return 0; | 293 | return false; |
292 | } | 294 | } |
293 | 295 | ||
294 | void update_extent_cache(struct dnode_of_data *dn) | 296 | static bool update_extent_info(struct inode *inode, pgoff_t fofs, |
297 | block_t blkaddr) | ||
295 | { | 298 | { |
296 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); | 299 | struct f2fs_inode_info *fi = F2FS_I(inode); |
297 | pgoff_t fofs, start_fofs, end_fofs; | 300 | pgoff_t start_fofs, end_fofs; |
298 | block_t start_blkaddr, end_blkaddr; | 301 | block_t start_blkaddr, end_blkaddr; |
299 | int need_update = true; | 302 | int need_update = true; |
300 | 303 | ||
301 | f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); | 304 | write_lock(&fi->ext_lock); |
302 | |||
303 | /* Update the page address in the parent node */ | ||
304 | __set_data_blkaddr(dn); | ||
305 | |||
306 | if (is_inode_flag_set(fi, FI_NO_EXTENT)) | ||
307 | return; | ||
308 | |||
309 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | ||
310 | dn->ofs_in_node; | ||
311 | |||
312 | write_lock(&fi->ext.ext_lock); | ||
313 | 305 | ||
314 | start_fofs = fi->ext.fofs; | 306 | start_fofs = fi->ext.fofs; |
315 | end_fofs = fi->ext.fofs + fi->ext.len - 1; | 307 | end_fofs = fi->ext.fofs + fi->ext.len - 1; |
316 | start_blkaddr = fi->ext.blk_addr; | 308 | start_blkaddr = fi->ext.blk; |
317 | end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; | 309 | end_blkaddr = fi->ext.blk + fi->ext.len - 1; |
318 | 310 | ||
319 | /* Drop and initialize the matched extent */ | 311 | /* Drop and initialize the matched extent */ |
320 | if (fi->ext.len == 1 && fofs == start_fofs) | 312 | if (fi->ext.len == 1 && fofs == start_fofs) |
@@ -322,24 +314,24 @@ void update_extent_cache(struct dnode_of_data *dn) | |||
322 | 314 | ||
323 | /* Initial extent */ | 315 | /* Initial extent */ |
324 | if (fi->ext.len == 0) { | 316 | if (fi->ext.len == 0) { |
325 | if (dn->data_blkaddr != NULL_ADDR) { | 317 | if (blkaddr != NULL_ADDR) { |
326 | fi->ext.fofs = fofs; | 318 | fi->ext.fofs = fofs; |
327 | fi->ext.blk_addr = dn->data_blkaddr; | 319 | fi->ext.blk = blkaddr; |
328 | fi->ext.len = 1; | 320 | fi->ext.len = 1; |
329 | } | 321 | } |
330 | goto end_update; | 322 | goto end_update; |
331 | } | 323 | } |
332 | 324 | ||
333 | /* Front merge */ | 325 | /* Front merge */ |
334 | if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { | 326 | if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) { |
335 | fi->ext.fofs--; | 327 | fi->ext.fofs--; |
336 | fi->ext.blk_addr--; | 328 | fi->ext.blk--; |
337 | fi->ext.len++; | 329 | fi->ext.len++; |
338 | goto end_update; | 330 | goto end_update; |
339 | } | 331 | } |
340 | 332 | ||
341 | /* Back merge */ | 333 | /* Back merge */ |
342 | if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { | 334 | if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) { |
343 | fi->ext.len++; | 335 | fi->ext.len++; |
344 | goto end_update; | 336 | goto end_update; |
345 | } | 337 | } |
@@ -351,8 +343,7 @@ void update_extent_cache(struct dnode_of_data *dn) | |||
351 | fi->ext.len = fofs - start_fofs; | 343 | fi->ext.len = fofs - start_fofs; |
352 | } else { | 344 | } else { |
353 | fi->ext.fofs = fofs + 1; | 345 | fi->ext.fofs = fofs + 1; |
354 | fi->ext.blk_addr = start_blkaddr + | 346 | fi->ext.blk = start_blkaddr + fofs - start_fofs + 1; |
355 | fofs - start_fofs + 1; | ||
356 | fi->ext.len -= fofs - start_fofs + 1; | 347 | fi->ext.len -= fofs - start_fofs + 1; |
357 | } | 348 | } |
358 | } else { | 349 | } else { |
@@ -366,27 +357,583 @@ void update_extent_cache(struct dnode_of_data *dn) | |||
366 | need_update = true; | 357 | need_update = true; |
367 | } | 358 | } |
368 | end_update: | 359 | end_update: |
369 | write_unlock(&fi->ext.ext_lock); | 360 | write_unlock(&fi->ext_lock); |
370 | if (need_update) | 361 | return need_update; |
371 | sync_inode_page(dn); | 362 | } |
363 | |||
364 | static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, | ||
365 | struct extent_tree *et, struct extent_info *ei, | ||
366 | struct rb_node *parent, struct rb_node **p) | ||
367 | { | ||
368 | struct extent_node *en; | ||
369 | |||
370 | en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); | ||
371 | if (!en) | ||
372 | return NULL; | ||
373 | |||
374 | en->ei = *ei; | ||
375 | INIT_LIST_HEAD(&en->list); | ||
376 | |||
377 | rb_link_node(&en->rb_node, parent, p); | ||
378 | rb_insert_color(&en->rb_node, &et->root); | ||
379 | et->count++; | ||
380 | atomic_inc(&sbi->total_ext_node); | ||
381 | return en; | ||
382 | } | ||
383 | |||
384 | static void __detach_extent_node(struct f2fs_sb_info *sbi, | ||
385 | struct extent_tree *et, struct extent_node *en) | ||
386 | { | ||
387 | rb_erase(&en->rb_node, &et->root); | ||
388 | et->count--; | ||
389 | atomic_dec(&sbi->total_ext_node); | ||
390 | |||
391 | if (et->cached_en == en) | ||
392 | et->cached_en = NULL; | ||
393 | } | ||
394 | |||
395 | static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi, | ||
396 | nid_t ino) | ||
397 | { | ||
398 | struct extent_tree *et; | ||
399 | |||
400 | down_read(&sbi->extent_tree_lock); | ||
401 | et = radix_tree_lookup(&sbi->extent_tree_root, ino); | ||
402 | if (!et) { | ||
403 | up_read(&sbi->extent_tree_lock); | ||
404 | return NULL; | ||
405 | } | ||
406 | atomic_inc(&et->refcount); | ||
407 | up_read(&sbi->extent_tree_lock); | ||
408 | |||
409 | return et; | ||
410 | } | ||
411 | |||
412 | static struct extent_tree *__grab_extent_tree(struct inode *inode) | ||
413 | { | ||
414 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
415 | struct extent_tree *et; | ||
416 | nid_t ino = inode->i_ino; | ||
417 | |||
418 | down_write(&sbi->extent_tree_lock); | ||
419 | et = radix_tree_lookup(&sbi->extent_tree_root, ino); | ||
420 | if (!et) { | ||
421 | et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); | ||
422 | f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); | ||
423 | memset(et, 0, sizeof(struct extent_tree)); | ||
424 | et->ino = ino; | ||
425 | et->root = RB_ROOT; | ||
426 | et->cached_en = NULL; | ||
427 | rwlock_init(&et->lock); | ||
428 | atomic_set(&et->refcount, 0); | ||
429 | et->count = 0; | ||
430 | sbi->total_ext_tree++; | ||
431 | } | ||
432 | atomic_inc(&et->refcount); | ||
433 | up_write(&sbi->extent_tree_lock); | ||
434 | |||
435 | return et; | ||
436 | } | ||
437 | |||
438 | static struct extent_node *__lookup_extent_tree(struct extent_tree *et, | ||
439 | unsigned int fofs) | ||
440 | { | ||
441 | struct rb_node *node = et->root.rb_node; | ||
442 | struct extent_node *en; | ||
443 | |||
444 | if (et->cached_en) { | ||
445 | struct extent_info *cei = &et->cached_en->ei; | ||
446 | |||
447 | if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) | ||
448 | return et->cached_en; | ||
449 | } | ||
450 | |||
451 | while (node) { | ||
452 | en = rb_entry(node, struct extent_node, rb_node); | ||
453 | |||
454 | if (fofs < en->ei.fofs) { | ||
455 | node = node->rb_left; | ||
456 | } else if (fofs >= en->ei.fofs + en->ei.len) { | ||
457 | node = node->rb_right; | ||
458 | } else { | ||
459 | et->cached_en = en; | ||
460 | return en; | ||
461 | } | ||
462 | } | ||
463 | return NULL; | ||
464 | } | ||
465 | |||
466 | static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi, | ||
467 | struct extent_tree *et, struct extent_node *en) | ||
468 | { | ||
469 | struct extent_node *prev; | ||
470 | struct rb_node *node; | ||
471 | |||
472 | node = rb_prev(&en->rb_node); | ||
473 | if (!node) | ||
474 | return NULL; | ||
475 | |||
476 | prev = rb_entry(node, struct extent_node, rb_node); | ||
477 | if (__is_back_mergeable(&en->ei, &prev->ei)) { | ||
478 | en->ei.fofs = prev->ei.fofs; | ||
479 | en->ei.blk = prev->ei.blk; | ||
480 | en->ei.len += prev->ei.len; | ||
481 | __detach_extent_node(sbi, et, prev); | ||
482 | return prev; | ||
483 | } | ||
484 | return NULL; | ||
485 | } | ||
486 | |||
487 | static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi, | ||
488 | struct extent_tree *et, struct extent_node *en) | ||
489 | { | ||
490 | struct extent_node *next; | ||
491 | struct rb_node *node; | ||
492 | |||
493 | node = rb_next(&en->rb_node); | ||
494 | if (!node) | ||
495 | return NULL; | ||
496 | |||
497 | next = rb_entry(node, struct extent_node, rb_node); | ||
498 | if (__is_front_mergeable(&en->ei, &next->ei)) { | ||
499 | en->ei.len += next->ei.len; | ||
500 | __detach_extent_node(sbi, et, next); | ||
501 | return next; | ||
502 | } | ||
503 | return NULL; | ||
504 | } | ||
505 | |||
506 | static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, | ||
507 | struct extent_tree *et, struct extent_info *ei, | ||
508 | struct extent_node **den) | ||
509 | { | ||
510 | struct rb_node **p = &et->root.rb_node; | ||
511 | struct rb_node *parent = NULL; | ||
512 | struct extent_node *en; | ||
513 | |||
514 | while (*p) { | ||
515 | parent = *p; | ||
516 | en = rb_entry(parent, struct extent_node, rb_node); | ||
517 | |||
518 | if (ei->fofs < en->ei.fofs) { | ||
519 | if (__is_front_mergeable(ei, &en->ei)) { | ||
520 | f2fs_bug_on(sbi, !den); | ||
521 | en->ei.fofs = ei->fofs; | ||
522 | en->ei.blk = ei->blk; | ||
523 | en->ei.len += ei->len; | ||
524 | *den = __try_back_merge(sbi, et, en); | ||
525 | return en; | ||
526 | } | ||
527 | p = &(*p)->rb_left; | ||
528 | } else if (ei->fofs >= en->ei.fofs + en->ei.len) { | ||
529 | if (__is_back_mergeable(ei, &en->ei)) { | ||
530 | f2fs_bug_on(sbi, !den); | ||
531 | en->ei.len += ei->len; | ||
532 | *den = __try_front_merge(sbi, et, en); | ||
533 | return en; | ||
534 | } | ||
535 | p = &(*p)->rb_right; | ||
536 | } else { | ||
537 | f2fs_bug_on(sbi, 1); | ||
538 | } | ||
539 | } | ||
540 | |||
541 | return __attach_extent_node(sbi, et, ei, parent, p); | ||
542 | } | ||
543 | |||
544 | static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, | ||
545 | struct extent_tree *et, bool free_all) | ||
546 | { | ||
547 | struct rb_node *node, *next; | ||
548 | struct extent_node *en; | ||
549 | unsigned int count = et->count; | ||
550 | |||
551 | node = rb_first(&et->root); | ||
552 | while (node) { | ||
553 | next = rb_next(node); | ||
554 | en = rb_entry(node, struct extent_node, rb_node); | ||
555 | |||
556 | if (free_all) { | ||
557 | spin_lock(&sbi->extent_lock); | ||
558 | if (!list_empty(&en->list)) | ||
559 | list_del_init(&en->list); | ||
560 | spin_unlock(&sbi->extent_lock); | ||
561 | } | ||
562 | |||
563 | if (free_all || list_empty(&en->list)) { | ||
564 | __detach_extent_node(sbi, et, en); | ||
565 | kmem_cache_free(extent_node_slab, en); | ||
566 | } | ||
567 | node = next; | ||
568 | } | ||
569 | |||
570 | return count - et->count; | ||
571 | } | ||
572 | |||
573 | static void f2fs_init_extent_tree(struct inode *inode, | ||
574 | struct f2fs_extent *i_ext) | ||
575 | { | ||
576 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
577 | struct extent_tree *et; | ||
578 | struct extent_node *en; | ||
579 | struct extent_info ei; | ||
580 | |||
581 | if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) | ||
582 | return; | ||
583 | |||
584 | et = __grab_extent_tree(inode); | ||
585 | |||
586 | write_lock(&et->lock); | ||
587 | if (et->count) | ||
588 | goto out; | ||
589 | |||
590 | set_extent_info(&ei, le32_to_cpu(i_ext->fofs), | ||
591 | le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); | ||
592 | |||
593 | en = __insert_extent_tree(sbi, et, &ei, NULL); | ||
594 | if (en) { | ||
595 | et->cached_en = en; | ||
596 | |||
597 | spin_lock(&sbi->extent_lock); | ||
598 | list_add_tail(&en->list, &sbi->extent_list); | ||
599 | spin_unlock(&sbi->extent_lock); | ||
600 | } | ||
601 | out: | ||
602 | write_unlock(&et->lock); | ||
603 | atomic_dec(&et->refcount); | ||
604 | } | ||
605 | |||
606 | static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, | ||
607 | struct extent_info *ei) | ||
608 | { | ||
609 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
610 | struct extent_tree *et; | ||
611 | struct extent_node *en; | ||
612 | |||
613 | trace_f2fs_lookup_extent_tree_start(inode, pgofs); | ||
614 | |||
615 | et = __find_extent_tree(sbi, inode->i_ino); | ||
616 | if (!et) | ||
617 | return false; | ||
618 | |||
619 | read_lock(&et->lock); | ||
620 | en = __lookup_extent_tree(et, pgofs); | ||
621 | if (en) { | ||
622 | *ei = en->ei; | ||
623 | spin_lock(&sbi->extent_lock); | ||
624 | if (!list_empty(&en->list)) | ||
625 | list_move_tail(&en->list, &sbi->extent_list); | ||
626 | spin_unlock(&sbi->extent_lock); | ||
627 | stat_inc_read_hit(sbi->sb); | ||
628 | } | ||
629 | stat_inc_total_hit(sbi->sb); | ||
630 | read_unlock(&et->lock); | ||
631 | |||
632 | trace_f2fs_lookup_extent_tree_end(inode, pgofs, en); | ||
633 | |||
634 | atomic_dec(&et->refcount); | ||
635 | return en ? true : false; | ||
636 | } | ||
637 | |||
638 | static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, | ||
639 | block_t blkaddr) | ||
640 | { | ||
641 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
642 | struct extent_tree *et; | ||
643 | struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; | ||
644 | struct extent_node *den = NULL; | ||
645 | struct extent_info ei, dei; | ||
646 | unsigned int endofs; | ||
647 | |||
648 | trace_f2fs_update_extent_tree(inode, fofs, blkaddr); | ||
649 | |||
650 | et = __grab_extent_tree(inode); | ||
651 | |||
652 | write_lock(&et->lock); | ||
653 | |||
654 | /* 1. lookup and remove existing extent info in cache */ | ||
655 | en = __lookup_extent_tree(et, fofs); | ||
656 | if (!en) | ||
657 | goto update_extent; | ||
658 | |||
659 | dei = en->ei; | ||
660 | __detach_extent_node(sbi, et, en); | ||
661 | |||
662 | /* 2. if extent can be split more, split and insert the left part */ | ||
663 | if (dei.len > 1) { | ||
664 | /* insert left part of split extent into cache */ | ||
665 | if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) { | ||
666 | set_extent_info(&ei, dei.fofs, dei.blk, | ||
667 | fofs - dei.fofs); | ||
668 | en1 = __insert_extent_tree(sbi, et, &ei, NULL); | ||
669 | } | ||
670 | |||
671 | /* insert right part of split extent into cache */ | ||
672 | endofs = dei.fofs + dei.len - 1; | ||
673 | if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) { | ||
674 | set_extent_info(&ei, fofs + 1, | ||
675 | fofs - dei.fofs + dei.blk, endofs - fofs); | ||
676 | en2 = __insert_extent_tree(sbi, et, &ei, NULL); | ||
677 | } | ||
678 | } | ||
679 | |||
680 | update_extent: | ||
681 | /* 3. update extent in extent cache */ | ||
682 | if (blkaddr) { | ||
683 | set_extent_info(&ei, fofs, blkaddr, 1); | ||
684 | en3 = __insert_extent_tree(sbi, et, &ei, &den); | ||
685 | } | ||
686 | |||
687 | /* 4. update in global extent list */ | ||
688 | spin_lock(&sbi->extent_lock); | ||
689 | if (en && !list_empty(&en->list)) | ||
690 | list_del(&en->list); | ||
691 | /* | ||
692 | * en1 and en2 split from en, they will become more and more smaller | ||
693 | * fragments after splitting several times. So if the length is smaller | ||
694 | * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree. | ||
695 | */ | ||
696 | if (en1) | ||
697 | list_add_tail(&en1->list, &sbi->extent_list); | ||
698 | if (en2) | ||
699 | list_add_tail(&en2->list, &sbi->extent_list); | ||
700 | if (en3) { | ||
701 | if (list_empty(&en3->list)) | ||
702 | list_add_tail(&en3->list, &sbi->extent_list); | ||
703 | else | ||
704 | list_move_tail(&en3->list, &sbi->extent_list); | ||
705 | } | ||
706 | if (den && !list_empty(&den->list)) | ||
707 | list_del(&den->list); | ||
708 | spin_unlock(&sbi->extent_lock); | ||
709 | |||
710 | /* 5. release extent node */ | ||
711 | if (en) | ||
712 | kmem_cache_free(extent_node_slab, en); | ||
713 | if (den) | ||
714 | kmem_cache_free(extent_node_slab, den); | ||
715 | |||
716 | write_unlock(&et->lock); | ||
717 | atomic_dec(&et->refcount); | ||
718 | } | ||
719 | |||
720 | void f2fs_preserve_extent_tree(struct inode *inode) | ||
721 | { | ||
722 | struct extent_tree *et; | ||
723 | struct extent_info *ext = &F2FS_I(inode)->ext; | ||
724 | bool sync = false; | ||
725 | |||
726 | if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) | ||
727 | return; | ||
728 | |||
729 | et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino); | ||
730 | if (!et) { | ||
731 | if (ext->len) { | ||
732 | ext->len = 0; | ||
733 | update_inode_page(inode); | ||
734 | } | ||
735 | return; | ||
736 | } | ||
737 | |||
738 | read_lock(&et->lock); | ||
739 | if (et->count) { | ||
740 | struct extent_node *en; | ||
741 | |||
742 | if (et->cached_en) { | ||
743 | en = et->cached_en; | ||
744 | } else { | ||
745 | struct rb_node *node = rb_first(&et->root); | ||
746 | |||
747 | if (!node) | ||
748 | node = rb_last(&et->root); | ||
749 | en = rb_entry(node, struct extent_node, rb_node); | ||
750 | } | ||
751 | |||
752 | if (__is_extent_same(ext, &en->ei)) | ||
753 | goto out; | ||
754 | |||
755 | *ext = en->ei; | ||
756 | sync = true; | ||
757 | } else if (ext->len) { | ||
758 | ext->len = 0; | ||
759 | sync = true; | ||
760 | } | ||
761 | out: | ||
762 | read_unlock(&et->lock); | ||
763 | atomic_dec(&et->refcount); | ||
764 | |||
765 | if (sync) | ||
766 | update_inode_page(inode); | ||
767 | } | ||
768 | |||
769 | void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) | ||
770 | { | ||
771 | struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; | ||
772 | struct extent_node *en, *tmp; | ||
773 | unsigned long ino = F2FS_ROOT_INO(sbi); | ||
774 | struct radix_tree_iter iter; | ||
775 | void **slot; | ||
776 | unsigned int found; | ||
777 | unsigned int node_cnt = 0, tree_cnt = 0; | ||
778 | |||
779 | if (!test_opt(sbi, EXTENT_CACHE)) | ||
780 | return; | ||
781 | |||
782 | if (available_free_memory(sbi, EXTENT_CACHE)) | ||
783 | return; | ||
784 | |||
785 | spin_lock(&sbi->extent_lock); | ||
786 | list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) { | ||
787 | if (!nr_shrink--) | ||
788 | break; | ||
789 | list_del_init(&en->list); | ||
790 | } | ||
791 | spin_unlock(&sbi->extent_lock); | ||
792 | |||
793 | down_read(&sbi->extent_tree_lock); | ||
794 | while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root, | ||
795 | (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { | ||
796 | unsigned i; | ||
797 | |||
798 | ino = treevec[found - 1]->ino + 1; | ||
799 | for (i = 0; i < found; i++) { | ||
800 | struct extent_tree *et = treevec[i]; | ||
801 | |||
802 | atomic_inc(&et->refcount); | ||
803 | write_lock(&et->lock); | ||
804 | node_cnt += __free_extent_tree(sbi, et, false); | ||
805 | write_unlock(&et->lock); | ||
806 | atomic_dec(&et->refcount); | ||
807 | } | ||
808 | } | ||
809 | up_read(&sbi->extent_tree_lock); | ||
810 | |||
811 | down_write(&sbi->extent_tree_lock); | ||
812 | radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter, | ||
813 | F2FS_ROOT_INO(sbi)) { | ||
814 | struct extent_tree *et = (struct extent_tree *)*slot; | ||
815 | |||
816 | if (!atomic_read(&et->refcount) && !et->count) { | ||
817 | radix_tree_delete(&sbi->extent_tree_root, et->ino); | ||
818 | kmem_cache_free(extent_tree_slab, et); | ||
819 | sbi->total_ext_tree--; | ||
820 | tree_cnt++; | ||
821 | } | ||
822 | } | ||
823 | up_write(&sbi->extent_tree_lock); | ||
824 | |||
825 | trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); | ||
826 | } | ||
827 | |||
828 | void f2fs_destroy_extent_tree(struct inode *inode) | ||
829 | { | ||
830 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
831 | struct extent_tree *et; | ||
832 | unsigned int node_cnt = 0; | ||
833 | |||
834 | if (!test_opt(sbi, EXTENT_CACHE)) | ||
835 | return; | ||
836 | |||
837 | et = __find_extent_tree(sbi, inode->i_ino); | ||
838 | if (!et) | ||
839 | goto out; | ||
840 | |||
841 | /* free all extent info belong to this extent tree */ | ||
842 | write_lock(&et->lock); | ||
843 | node_cnt = __free_extent_tree(sbi, et, true); | ||
844 | write_unlock(&et->lock); | ||
845 | |||
846 | atomic_dec(&et->refcount); | ||
847 | |||
848 | /* try to find and delete extent tree entry in radix tree */ | ||
849 | down_write(&sbi->extent_tree_lock); | ||
850 | et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); | ||
851 | if (!et) { | ||
852 | up_write(&sbi->extent_tree_lock); | ||
853 | goto out; | ||
854 | } | ||
855 | f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); | ||
856 | radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); | ||
857 | kmem_cache_free(extent_tree_slab, et); | ||
858 | sbi->total_ext_tree--; | ||
859 | up_write(&sbi->extent_tree_lock); | ||
860 | out: | ||
861 | trace_f2fs_destroy_extent_tree(inode, node_cnt); | ||
372 | return; | 862 | return; |
373 | } | 863 | } |
374 | 864 | ||
865 | void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext) | ||
866 | { | ||
867 | if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) | ||
868 | f2fs_init_extent_tree(inode, i_ext); | ||
869 | |||
870 | write_lock(&F2FS_I(inode)->ext_lock); | ||
871 | get_extent_info(&F2FS_I(inode)->ext, *i_ext); | ||
872 | write_unlock(&F2FS_I(inode)->ext_lock); | ||
873 | } | ||
874 | |||
875 | static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, | ||
876 | struct extent_info *ei) | ||
877 | { | ||
878 | if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) | ||
879 | return false; | ||
880 | |||
881 | if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) | ||
882 | return f2fs_lookup_extent_tree(inode, pgofs, ei); | ||
883 | |||
884 | return lookup_extent_info(inode, pgofs, ei); | ||
885 | } | ||
886 | |||
887 | void f2fs_update_extent_cache(struct dnode_of_data *dn) | ||
888 | { | ||
889 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); | ||
890 | pgoff_t fofs; | ||
891 | |||
892 | f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); | ||
893 | |||
894 | if (is_inode_flag_set(fi, FI_NO_EXTENT)) | ||
895 | return; | ||
896 | |||
897 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | ||
898 | dn->ofs_in_node; | ||
899 | |||
900 | if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE)) | ||
901 | return f2fs_update_extent_tree(dn->inode, fofs, | ||
902 | dn->data_blkaddr); | ||
903 | |||
904 | if (update_extent_info(dn->inode, fofs, dn->data_blkaddr)) | ||
905 | sync_inode_page(dn); | ||
906 | } | ||
907 | |||
375 | struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | 908 | struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) |
376 | { | 909 | { |
377 | struct address_space *mapping = inode->i_mapping; | 910 | struct address_space *mapping = inode->i_mapping; |
378 | struct dnode_of_data dn; | 911 | struct dnode_of_data dn; |
379 | struct page *page; | 912 | struct page *page; |
913 | struct extent_info ei; | ||
380 | int err; | 914 | int err; |
381 | struct f2fs_io_info fio = { | 915 | struct f2fs_io_info fio = { |
382 | .type = DATA, | 916 | .type = DATA, |
383 | .rw = sync ? READ_SYNC : READA, | 917 | .rw = sync ? READ_SYNC : READA, |
384 | }; | 918 | }; |
385 | 919 | ||
920 | /* | ||
921 | * If sync is false, it needs to check its block allocation. | ||
922 | * This is need and triggered by two flows: | ||
923 | * gc and truncate_partial_data_page. | ||
924 | */ | ||
925 | if (!sync) | ||
926 | goto search; | ||
927 | |||
386 | page = find_get_page(mapping, index); | 928 | page = find_get_page(mapping, index); |
387 | if (page && PageUptodate(page)) | 929 | if (page && PageUptodate(page)) |
388 | return page; | 930 | return page; |
389 | f2fs_put_page(page, 0); | 931 | f2fs_put_page(page, 0); |
932 | search: | ||
933 | if (f2fs_lookup_extent_cache(inode, index, &ei)) { | ||
934 | dn.data_blkaddr = ei.blk + index - ei.fofs; | ||
935 | goto got_it; | ||
936 | } | ||
390 | 937 | ||
391 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 938 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
392 | err = get_dnode_of_data(&dn, index, LOOKUP_NODE); | 939 | err = get_dnode_of_data(&dn, index, LOOKUP_NODE); |
@@ -401,6 +948,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |||
401 | if (unlikely(dn.data_blkaddr == NEW_ADDR)) | 948 | if (unlikely(dn.data_blkaddr == NEW_ADDR)) |
402 | return ERR_PTR(-EINVAL); | 949 | return ERR_PTR(-EINVAL); |
403 | 950 | ||
951 | got_it: | ||
404 | page = grab_cache_page(mapping, index); | 952 | page = grab_cache_page(mapping, index); |
405 | if (!page) | 953 | if (!page) |
406 | return ERR_PTR(-ENOMEM); | 954 | return ERR_PTR(-ENOMEM); |
@@ -435,6 +983,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) | |||
435 | struct address_space *mapping = inode->i_mapping; | 983 | struct address_space *mapping = inode->i_mapping; |
436 | struct dnode_of_data dn; | 984 | struct dnode_of_data dn; |
437 | struct page *page; | 985 | struct page *page; |
986 | struct extent_info ei; | ||
438 | int err; | 987 | int err; |
439 | struct f2fs_io_info fio = { | 988 | struct f2fs_io_info fio = { |
440 | .type = DATA, | 989 | .type = DATA, |
@@ -445,6 +994,11 @@ repeat: | |||
445 | if (!page) | 994 | if (!page) |
446 | return ERR_PTR(-ENOMEM); | 995 | return ERR_PTR(-ENOMEM); |
447 | 996 | ||
997 | if (f2fs_lookup_extent_cache(inode, index, &ei)) { | ||
998 | dn.data_blkaddr = ei.blk + index - ei.fofs; | ||
999 | goto got_it; | ||
1000 | } | ||
1001 | |||
448 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 1002 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
449 | err = get_dnode_of_data(&dn, index, LOOKUP_NODE); | 1003 | err = get_dnode_of_data(&dn, index, LOOKUP_NODE); |
450 | if (err) { | 1004 | if (err) { |
@@ -458,6 +1012,7 @@ repeat: | |||
458 | return ERR_PTR(-ENOENT); | 1012 | return ERR_PTR(-ENOENT); |
459 | } | 1013 | } |
460 | 1014 | ||
1015 | got_it: | ||
461 | if (PageUptodate(page)) | 1016 | if (PageUptodate(page)) |
462 | return page; | 1017 | return page; |
463 | 1018 | ||
@@ -569,19 +1124,26 @@ static int __allocate_data_block(struct dnode_of_data *dn) | |||
569 | 1124 | ||
570 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) | 1125 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) |
571 | return -EPERM; | 1126 | return -EPERM; |
1127 | |||
1128 | dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); | ||
1129 | if (dn->data_blkaddr == NEW_ADDR) | ||
1130 | goto alloc; | ||
1131 | |||
572 | if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) | 1132 | if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) |
573 | return -ENOSPC; | 1133 | return -ENOSPC; |
574 | 1134 | ||
1135 | alloc: | ||
575 | get_node_info(sbi, dn->nid, &ni); | 1136 | get_node_info(sbi, dn->nid, &ni); |
576 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); | 1137 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); |
577 | 1138 | ||
578 | if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) | 1139 | if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) |
579 | seg = CURSEG_DIRECT_IO; | 1140 | seg = CURSEG_DIRECT_IO; |
580 | 1141 | ||
581 | allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); | 1142 | allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, |
1143 | &sum, seg); | ||
582 | 1144 | ||
583 | /* direct IO doesn't use extent cache to maximize the performance */ | 1145 | /* direct IO doesn't use extent cache to maximize the performance */ |
584 | __set_data_blkaddr(dn); | 1146 | set_data_blkaddr(dn); |
585 | 1147 | ||
586 | /* update i_size */ | 1148 | /* update i_size */ |
587 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | 1149 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + |
@@ -615,7 +1177,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset, | |||
615 | end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); | 1177 | end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); |
616 | 1178 | ||
617 | while (dn.ofs_in_node < end_offset && len) { | 1179 | while (dn.ofs_in_node < end_offset && len) { |
618 | if (dn.data_blkaddr == NULL_ADDR) { | 1180 | block_t blkaddr; |
1181 | |||
1182 | blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); | ||
1183 | if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) { | ||
619 | if (__allocate_data_block(&dn)) | 1184 | if (__allocate_data_block(&dn)) |
620 | goto sync_out; | 1185 | goto sync_out; |
621 | allocated = true; | 1186 | allocated = true; |
@@ -659,13 +1224,16 @@ static int __get_data_block(struct inode *inode, sector_t iblock, | |||
659 | int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; | 1224 | int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; |
660 | pgoff_t pgofs, end_offset; | 1225 | pgoff_t pgofs, end_offset; |
661 | int err = 0, ofs = 1; | 1226 | int err = 0, ofs = 1; |
1227 | struct extent_info ei; | ||
662 | bool allocated = false; | 1228 | bool allocated = false; |
663 | 1229 | ||
664 | /* Get the page offset from the block offset(iblock) */ | 1230 | /* Get the page offset from the block offset(iblock) */ |
665 | pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); | 1231 | pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); |
666 | 1232 | ||
667 | if (check_extent_cache(inode, pgofs, bh_result)) | 1233 | if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) { |
1234 | f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result); | ||
668 | goto out; | 1235 | goto out; |
1236 | } | ||
669 | 1237 | ||
670 | if (create) | 1238 | if (create) |
671 | f2fs_lock_op(F2FS_I_SB(inode)); | 1239 | f2fs_lock_op(F2FS_I_SB(inode)); |
@@ -682,7 +1250,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, | |||
682 | goto put_out; | 1250 | goto put_out; |
683 | 1251 | ||
684 | if (dn.data_blkaddr != NULL_ADDR) { | 1252 | if (dn.data_blkaddr != NULL_ADDR) { |
685 | set_buffer_new(bh_result); | 1253 | clear_buffer_new(bh_result); |
686 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); | 1254 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); |
687 | } else if (create) { | 1255 | } else if (create) { |
688 | err = __allocate_data_block(&dn); | 1256 | err = __allocate_data_block(&dn); |
@@ -727,6 +1295,7 @@ get_next: | |||
727 | if (err) | 1295 | if (err) |
728 | goto sync_out; | 1296 | goto sync_out; |
729 | allocated = true; | 1297 | allocated = true; |
1298 | set_buffer_new(bh_result); | ||
730 | blkaddr = dn.data_blkaddr; | 1299 | blkaddr = dn.data_blkaddr; |
731 | } | 1300 | } |
732 | /* Give more consecutive addresses for the readahead */ | 1301 | /* Give more consecutive addresses for the readahead */ |
@@ -813,8 +1382,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) | |||
813 | fio->blk_addr = dn.data_blkaddr; | 1382 | fio->blk_addr = dn.data_blkaddr; |
814 | 1383 | ||
815 | /* This page is already truncated */ | 1384 | /* This page is already truncated */ |
816 | if (fio->blk_addr == NULL_ADDR) | 1385 | if (fio->blk_addr == NULL_ADDR) { |
1386 | ClearPageUptodate(page); | ||
817 | goto out_writepage; | 1387 | goto out_writepage; |
1388 | } | ||
818 | 1389 | ||
819 | set_page_writeback(page); | 1390 | set_page_writeback(page); |
820 | 1391 | ||
@@ -827,10 +1398,15 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) | |||
827 | need_inplace_update(inode))) { | 1398 | need_inplace_update(inode))) { |
828 | rewrite_data_page(page, fio); | 1399 | rewrite_data_page(page, fio); |
829 | set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); | 1400 | set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); |
1401 | trace_f2fs_do_write_data_page(page, IPU); | ||
830 | } else { | 1402 | } else { |
831 | write_data_page(page, &dn, fio); | 1403 | write_data_page(page, &dn, fio); |
832 | update_extent_cache(&dn); | 1404 | set_data_blkaddr(&dn); |
1405 | f2fs_update_extent_cache(&dn); | ||
1406 | trace_f2fs_do_write_data_page(page, OPU); | ||
833 | set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); | 1407 | set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); |
1408 | if (page->index == 0) | ||
1409 | set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); | ||
834 | } | 1410 | } |
835 | out_writepage: | 1411 | out_writepage: |
836 | f2fs_put_dnode(&dn); | 1412 | f2fs_put_dnode(&dn); |
@@ -909,6 +1485,8 @@ done: | |||
909 | clear_cold_data(page); | 1485 | clear_cold_data(page); |
910 | out: | 1486 | out: |
911 | inode_dec_dirty_pages(inode); | 1487 | inode_dec_dirty_pages(inode); |
1488 | if (err) | ||
1489 | ClearPageUptodate(page); | ||
912 | unlock_page(page); | 1490 | unlock_page(page); |
913 | if (need_balance_fs) | 1491 | if (need_balance_fs) |
914 | f2fs_balance_fs(sbi); | 1492 | f2fs_balance_fs(sbi); |
@@ -935,7 +1513,6 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
935 | { | 1513 | { |
936 | struct inode *inode = mapping->host; | 1514 | struct inode *inode = mapping->host; |
937 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 1515 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
938 | bool locked = false; | ||
939 | int ret; | 1516 | int ret; |
940 | long diff; | 1517 | long diff; |
941 | 1518 | ||
@@ -950,15 +1527,13 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
950 | available_free_memory(sbi, DIRTY_DENTS)) | 1527 | available_free_memory(sbi, DIRTY_DENTS)) |
951 | goto skip_write; | 1528 | goto skip_write; |
952 | 1529 | ||
1530 | /* during POR, we don't need to trigger writepage at all. */ | ||
1531 | if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) | ||
1532 | goto skip_write; | ||
1533 | |||
953 | diff = nr_pages_to_write(sbi, DATA, wbc); | 1534 | diff = nr_pages_to_write(sbi, DATA, wbc); |
954 | 1535 | ||
955 | if (!S_ISDIR(inode->i_mode)) { | ||
956 | mutex_lock(&sbi->writepages); | ||
957 | locked = true; | ||
958 | } | ||
959 | ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); | 1536 | ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); |
960 | if (locked) | ||
961 | mutex_unlock(&sbi->writepages); | ||
962 | 1537 | ||
963 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | 1538 | f2fs_submit_merged_bio(sbi, DATA, WRITE); |
964 | 1539 | ||
@@ -1236,6 +1811,37 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) | |||
1236 | return generic_block_bmap(mapping, block, get_data_block); | 1811 | return generic_block_bmap(mapping, block, get_data_block); |
1237 | } | 1812 | } |
1238 | 1813 | ||
1814 | void init_extent_cache_info(struct f2fs_sb_info *sbi) | ||
1815 | { | ||
1816 | INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); | ||
1817 | init_rwsem(&sbi->extent_tree_lock); | ||
1818 | INIT_LIST_HEAD(&sbi->extent_list); | ||
1819 | spin_lock_init(&sbi->extent_lock); | ||
1820 | sbi->total_ext_tree = 0; | ||
1821 | atomic_set(&sbi->total_ext_node, 0); | ||
1822 | } | ||
1823 | |||
1824 | int __init create_extent_cache(void) | ||
1825 | { | ||
1826 | extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", | ||
1827 | sizeof(struct extent_tree)); | ||
1828 | if (!extent_tree_slab) | ||
1829 | return -ENOMEM; | ||
1830 | extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", | ||
1831 | sizeof(struct extent_node)); | ||
1832 | if (!extent_node_slab) { | ||
1833 | kmem_cache_destroy(extent_tree_slab); | ||
1834 | return -ENOMEM; | ||
1835 | } | ||
1836 | return 0; | ||
1837 | } | ||
1838 | |||
1839 | void destroy_extent_cache(void) | ||
1840 | { | ||
1841 | kmem_cache_destroy(extent_node_slab); | ||
1842 | kmem_cache_destroy(extent_tree_slab); | ||
1843 | } | ||
1844 | |||
1239 | const struct address_space_operations f2fs_dblock_aops = { | 1845 | const struct address_space_operations f2fs_dblock_aops = { |
1240 | .readpage = f2fs_read_data_page, | 1846 | .readpage = f2fs_read_data_page, |
1241 | .readpages = f2fs_read_data_pages, | 1847 | .readpages = f2fs_read_data_pages, |
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index e671373cc8ab..f5388f37217e 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c | |||
@@ -35,6 +35,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) | |||
35 | /* validation check of the segment numbers */ | 35 | /* validation check of the segment numbers */ |
36 | si->hit_ext = sbi->read_hit_ext; | 36 | si->hit_ext = sbi->read_hit_ext; |
37 | si->total_ext = sbi->total_hit_ext; | 37 | si->total_ext = sbi->total_hit_ext; |
38 | si->ext_tree = sbi->total_ext_tree; | ||
39 | si->ext_node = atomic_read(&sbi->total_ext_node); | ||
38 | si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); | 40 | si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); |
39 | si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); | 41 | si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); |
40 | si->ndirty_dirs = sbi->n_dirty_dirs; | 42 | si->ndirty_dirs = sbi->n_dirty_dirs; |
@@ -185,6 +187,9 @@ get_cache: | |||
185 | si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); | 187 | si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); |
186 | for (i = 0; i <= UPDATE_INO; i++) | 188 | for (i = 0; i <= UPDATE_INO; i++) |
187 | si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); | 189 | si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); |
190 | si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree); | ||
191 | si->cache_mem += atomic_read(&sbi->total_ext_node) * | ||
192 | sizeof(struct extent_node); | ||
188 | 193 | ||
189 | si->page_mem = 0; | 194 | si->page_mem = 0; |
190 | npages = NODE_MAPPING(sbi)->nrpages; | 195 | npages = NODE_MAPPING(sbi)->nrpages; |
@@ -260,13 +265,20 @@ static int stat_show(struct seq_file *s, void *v) | |||
260 | seq_printf(s, "CP calls: %d\n", si->cp_count); | 265 | seq_printf(s, "CP calls: %d\n", si->cp_count); |
261 | seq_printf(s, "GC calls: %d (BG: %d)\n", | 266 | seq_printf(s, "GC calls: %d (BG: %d)\n", |
262 | si->call_count, si->bg_gc); | 267 | si->call_count, si->bg_gc); |
263 | seq_printf(s, " - data segments : %d\n", si->data_segs); | 268 | seq_printf(s, " - data segments : %d (%d)\n", |
264 | seq_printf(s, " - node segments : %d\n", si->node_segs); | 269 | si->data_segs, si->bg_data_segs); |
265 | seq_printf(s, "Try to move %d blocks\n", si->tot_blks); | 270 | seq_printf(s, " - node segments : %d (%d)\n", |
266 | seq_printf(s, " - data blocks : %d\n", si->data_blks); | 271 | si->node_segs, si->bg_node_segs); |
267 | seq_printf(s, " - node blocks : %d\n", si->node_blks); | 272 | seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks, |
273 | si->bg_data_blks + si->bg_node_blks); | ||
274 | seq_printf(s, " - data blocks : %d (%d)\n", si->data_blks, | ||
275 | si->bg_data_blks); | ||
276 | seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, | ||
277 | si->bg_node_blks); | ||
268 | seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", | 278 | seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", |
269 | si->hit_ext, si->total_ext); | 279 | si->hit_ext, si->total_ext); |
280 | seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree); | ||
281 | seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node); | ||
270 | seq_puts(s, "\nBalancing F2FS Async:\n"); | 282 | seq_puts(s, "\nBalancing F2FS Async:\n"); |
271 | seq_printf(s, " - inmem: %4d, wb: %4d\n", | 283 | seq_printf(s, " - inmem: %4d, wb: %4d\n", |
272 | si->inmem_pages, si->wb_pages); | 284 | si->inmem_pages, si->wb_pages); |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b74097a7f6d9..3a3302ab7871 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
@@ -59,9 +59,8 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
59 | [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, | 59 | [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, |
60 | }; | 60 | }; |
61 | 61 | ||
62 | void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) | 62 | void set_de_type(struct f2fs_dir_entry *de, umode_t mode) |
63 | { | 63 | { |
64 | umode_t mode = inode->i_mode; | ||
65 | de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; | 64 | de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; |
66 | } | 65 | } |
67 | 66 | ||
@@ -127,22 +126,19 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, | |||
127 | *max_slots = 0; | 126 | *max_slots = 0; |
128 | while (bit_pos < d->max) { | 127 | while (bit_pos < d->max) { |
129 | if (!test_bit_le(bit_pos, d->bitmap)) { | 128 | if (!test_bit_le(bit_pos, d->bitmap)) { |
130 | if (bit_pos == 0) | ||
131 | max_len = 1; | ||
132 | else if (!test_bit_le(bit_pos - 1, d->bitmap)) | ||
133 | max_len++; | ||
134 | bit_pos++; | 129 | bit_pos++; |
130 | max_len++; | ||
135 | continue; | 131 | continue; |
136 | } | 132 | } |
133 | |||
137 | de = &d->dentry[bit_pos]; | 134 | de = &d->dentry[bit_pos]; |
138 | if (early_match_name(name->len, namehash, de) && | 135 | if (early_match_name(name->len, namehash, de) && |
139 | !memcmp(d->filename[bit_pos], name->name, name->len)) | 136 | !memcmp(d->filename[bit_pos], name->name, name->len)) |
140 | goto found; | 137 | goto found; |
141 | 138 | ||
142 | if (max_slots && *max_slots >= 0 && max_len > *max_slots) { | 139 | if (max_slots && max_len > *max_slots) |
143 | *max_slots = max_len; | 140 | *max_slots = max_len; |
144 | max_len = 0; | 141 | max_len = 0; |
145 | } | ||
146 | 142 | ||
147 | /* remain bug on condition */ | 143 | /* remain bug on condition */ |
148 | if (unlikely(!de->name_len)) | 144 | if (unlikely(!de->name_len)) |
@@ -219,14 +215,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, | |||
219 | unsigned int max_depth; | 215 | unsigned int max_depth; |
220 | unsigned int level; | 216 | unsigned int level; |
221 | 217 | ||
218 | *res_page = NULL; | ||
219 | |||
222 | if (f2fs_has_inline_dentry(dir)) | 220 | if (f2fs_has_inline_dentry(dir)) |
223 | return find_in_inline_dir(dir, child, res_page); | 221 | return find_in_inline_dir(dir, child, res_page); |
224 | 222 | ||
225 | if (npages == 0) | 223 | if (npages == 0) |
226 | return NULL; | 224 | return NULL; |
227 | 225 | ||
228 | *res_page = NULL; | ||
229 | |||
230 | name_hash = f2fs_dentry_hash(child); | 226 | name_hash = f2fs_dentry_hash(child); |
231 | max_depth = F2FS_I(dir)->i_current_depth; | 227 | max_depth = F2FS_I(dir)->i_current_depth; |
232 | 228 | ||
@@ -285,7 +281,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, | |||
285 | lock_page(page); | 281 | lock_page(page); |
286 | f2fs_wait_on_page_writeback(page, type); | 282 | f2fs_wait_on_page_writeback(page, type); |
287 | de->ino = cpu_to_le32(inode->i_ino); | 283 | de->ino = cpu_to_le32(inode->i_ino); |
288 | set_de_type(de, inode); | 284 | set_de_type(de, inode->i_mode); |
289 | f2fs_dentry_kunmap(dir, page); | 285 | f2fs_dentry_kunmap(dir, page); |
290 | set_page_dirty(page); | 286 | set_page_dirty(page); |
291 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 287 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
@@ -331,14 +327,14 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent, | |||
331 | de->hash_code = 0; | 327 | de->hash_code = 0; |
332 | de->ino = cpu_to_le32(inode->i_ino); | 328 | de->ino = cpu_to_le32(inode->i_ino); |
333 | memcpy(d->filename[0], ".", 1); | 329 | memcpy(d->filename[0], ".", 1); |
334 | set_de_type(de, inode); | 330 | set_de_type(de, inode->i_mode); |
335 | 331 | ||
336 | de = &d->dentry[1]; | 332 | de = &d->dentry[1]; |
337 | de->hash_code = 0; | 333 | de->hash_code = 0; |
338 | de->name_len = cpu_to_le16(2); | 334 | de->name_len = cpu_to_le16(2); |
339 | de->ino = cpu_to_le32(parent->i_ino); | 335 | de->ino = cpu_to_le32(parent->i_ino); |
340 | memcpy(d->filename[1], "..", 2); | 336 | memcpy(d->filename[1], "..", 2); |
341 | set_de_type(de, inode); | 337 | set_de_type(de, parent->i_mode); |
342 | 338 | ||
343 | test_and_set_bit_le(0, (void *)d->bitmap); | 339 | test_and_set_bit_le(0, (void *)d->bitmap); |
344 | test_and_set_bit_le(1, (void *)d->bitmap); | 340 | test_and_set_bit_le(1, (void *)d->bitmap); |
@@ -435,7 +431,7 @@ error: | |||
435 | void update_parent_metadata(struct inode *dir, struct inode *inode, | 431 | void update_parent_metadata(struct inode *dir, struct inode *inode, |
436 | unsigned int current_depth) | 432 | unsigned int current_depth) |
437 | { | 433 | { |
438 | if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { | 434 | if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { |
439 | if (S_ISDIR(inode->i_mode)) { | 435 | if (S_ISDIR(inode->i_mode)) { |
440 | inc_nlink(dir); | 436 | inc_nlink(dir); |
441 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); | 437 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); |
@@ -450,7 +446,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, | |||
450 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); | 446 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); |
451 | } | 447 | } |
452 | 448 | ||
453 | if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) | 449 | if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) |
454 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | 450 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); |
455 | } | 451 | } |
456 | 452 | ||
@@ -474,30 +470,47 @@ next: | |||
474 | goto next; | 470 | goto next; |
475 | } | 471 | } |
476 | 472 | ||
473 | void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, | ||
474 | const struct qstr *name, f2fs_hash_t name_hash, | ||
475 | unsigned int bit_pos) | ||
476 | { | ||
477 | struct f2fs_dir_entry *de; | ||
478 | int slots = GET_DENTRY_SLOTS(name->len); | ||
479 | int i; | ||
480 | |||
481 | de = &d->dentry[bit_pos]; | ||
482 | de->hash_code = name_hash; | ||
483 | de->name_len = cpu_to_le16(name->len); | ||
484 | memcpy(d->filename[bit_pos], name->name, name->len); | ||
485 | de->ino = cpu_to_le32(ino); | ||
486 | set_de_type(de, mode); | ||
487 | for (i = 0; i < slots; i++) | ||
488 | test_and_set_bit_le(bit_pos + i, (void *)d->bitmap); | ||
489 | } | ||
490 | |||
477 | /* | 491 | /* |
478 | * Caller should grab and release a rwsem by calling f2fs_lock_op() and | 492 | * Caller should grab and release a rwsem by calling f2fs_lock_op() and |
479 | * f2fs_unlock_op(). | 493 | * f2fs_unlock_op(). |
480 | */ | 494 | */ |
481 | int __f2fs_add_link(struct inode *dir, const struct qstr *name, | 495 | int __f2fs_add_link(struct inode *dir, const struct qstr *name, |
482 | struct inode *inode) | 496 | struct inode *inode, nid_t ino, umode_t mode) |
483 | { | 497 | { |
484 | unsigned int bit_pos; | 498 | unsigned int bit_pos; |
485 | unsigned int level; | 499 | unsigned int level; |
486 | unsigned int current_depth; | 500 | unsigned int current_depth; |
487 | unsigned long bidx, block; | 501 | unsigned long bidx, block; |
488 | f2fs_hash_t dentry_hash; | 502 | f2fs_hash_t dentry_hash; |
489 | struct f2fs_dir_entry *de; | ||
490 | unsigned int nbucket, nblock; | 503 | unsigned int nbucket, nblock; |
491 | size_t namelen = name->len; | 504 | size_t namelen = name->len; |
492 | struct page *dentry_page = NULL; | 505 | struct page *dentry_page = NULL; |
493 | struct f2fs_dentry_block *dentry_blk = NULL; | 506 | struct f2fs_dentry_block *dentry_blk = NULL; |
507 | struct f2fs_dentry_ptr d; | ||
494 | int slots = GET_DENTRY_SLOTS(namelen); | 508 | int slots = GET_DENTRY_SLOTS(namelen); |
495 | struct page *page; | 509 | struct page *page = NULL; |
496 | int err = 0; | 510 | int err = 0; |
497 | int i; | ||
498 | 511 | ||
499 | if (f2fs_has_inline_dentry(dir)) { | 512 | if (f2fs_has_inline_dentry(dir)) { |
500 | err = f2fs_add_inline_entry(dir, name, inode); | 513 | err = f2fs_add_inline_entry(dir, name, inode, ino, mode); |
501 | if (!err || err != -EAGAIN) | 514 | if (!err || err != -EAGAIN) |
502 | return err; | 515 | return err; |
503 | else | 516 | else |
@@ -547,30 +560,31 @@ start: | |||
547 | add_dentry: | 560 | add_dentry: |
548 | f2fs_wait_on_page_writeback(dentry_page, DATA); | 561 | f2fs_wait_on_page_writeback(dentry_page, DATA); |
549 | 562 | ||
550 | down_write(&F2FS_I(inode)->i_sem); | 563 | if (inode) { |
551 | page = init_inode_metadata(inode, dir, name, NULL); | 564 | down_write(&F2FS_I(inode)->i_sem); |
552 | if (IS_ERR(page)) { | 565 | page = init_inode_metadata(inode, dir, name, NULL); |
553 | err = PTR_ERR(page); | 566 | if (IS_ERR(page)) { |
554 | goto fail; | 567 | err = PTR_ERR(page); |
568 | goto fail; | ||
569 | } | ||
555 | } | 570 | } |
556 | de = &dentry_blk->dentry[bit_pos]; | 571 | |
557 | de->hash_code = dentry_hash; | 572 | make_dentry_ptr(&d, (void *)dentry_blk, 1); |
558 | de->name_len = cpu_to_le16(namelen); | 573 | f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos); |
559 | memcpy(dentry_blk->filename[bit_pos], name->name, name->len); | 574 | |
560 | de->ino = cpu_to_le32(inode->i_ino); | ||
561 | set_de_type(de, inode); | ||
562 | for (i = 0; i < slots; i++) | ||
563 | test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); | ||
564 | set_page_dirty(dentry_page); | 575 | set_page_dirty(dentry_page); |
565 | 576 | ||
566 | /* we don't need to mark_inode_dirty now */ | 577 | if (inode) { |
567 | F2FS_I(inode)->i_pino = dir->i_ino; | 578 | /* we don't need to mark_inode_dirty now */ |
568 | update_inode(inode, page); | 579 | F2FS_I(inode)->i_pino = dir->i_ino; |
569 | f2fs_put_page(page, 1); | 580 | update_inode(inode, page); |
581 | f2fs_put_page(page, 1); | ||
582 | } | ||
570 | 583 | ||
571 | update_parent_metadata(dir, inode, current_depth); | 584 | update_parent_metadata(dir, inode, current_depth); |
572 | fail: | 585 | fail: |
573 | up_write(&F2FS_I(inode)->i_sem); | 586 | if (inode) |
587 | up_write(&F2FS_I(inode)->i_sem); | ||
574 | 588 | ||
575 | if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { | 589 | if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { |
576 | update_inode_page(dir); | 590 | update_inode_page(dir); |
@@ -669,6 +683,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
669 | if (bit_pos == NR_DENTRY_IN_BLOCK) { | 683 | if (bit_pos == NR_DENTRY_IN_BLOCK) { |
670 | truncate_hole(dir, page->index, page->index + 1); | 684 | truncate_hole(dir, page->index, page->index + 1); |
671 | clear_page_dirty_for_io(page); | 685 | clear_page_dirty_for_io(page); |
686 | ClearPagePrivate(page); | ||
672 | ClearPageUptodate(page); | 687 | ClearPageUptodate(page); |
673 | inode_dec_dirty_pages(dir); | 688 | inode_dec_dirty_pages(dir); |
674 | } | 689 | } |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7fa3313ab0e2..c06a25e5cec3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -50,6 +50,7 @@ | |||
50 | #define F2FS_MOUNT_FLUSH_MERGE 0x00000400 | 50 | #define F2FS_MOUNT_FLUSH_MERGE 0x00000400 |
51 | #define F2FS_MOUNT_NOBARRIER 0x00000800 | 51 | #define F2FS_MOUNT_NOBARRIER 0x00000800 |
52 | #define F2FS_MOUNT_FASTBOOT 0x00001000 | 52 | #define F2FS_MOUNT_FASTBOOT 0x00001000 |
53 | #define F2FS_MOUNT_EXTENT_CACHE 0x00002000 | ||
53 | 54 | ||
54 | #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) | 55 | #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) |
55 | #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) | 56 | #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) |
@@ -102,6 +103,7 @@ enum { | |||
102 | CP_UMOUNT, | 103 | CP_UMOUNT, |
103 | CP_FASTBOOT, | 104 | CP_FASTBOOT, |
104 | CP_SYNC, | 105 | CP_SYNC, |
106 | CP_RECOVERY, | ||
105 | CP_DISCARD, | 107 | CP_DISCARD, |
106 | }; | 108 | }; |
107 | 109 | ||
@@ -216,6 +218,15 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, | |||
216 | #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) | 218 | #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) |
217 | #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) | 219 | #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) |
218 | 220 | ||
221 | /* | ||
222 | * should be same as XFS_IOC_GOINGDOWN. | ||
223 | * Flags for going down operation used by FS_IOC_GOINGDOWN | ||
224 | */ | ||
225 | #define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */ | ||
226 | #define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ | ||
227 | #define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ | ||
228 | #define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ | ||
229 | |||
219 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 230 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
220 | /* | 231 | /* |
221 | * ioctl commands in 32 bit emulation | 232 | * ioctl commands in 32 bit emulation |
@@ -273,14 +284,34 @@ enum { | |||
273 | 284 | ||
274 | #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ | 285 | #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ |
275 | 286 | ||
287 | /* vector size for gang look-up from extent cache that consists of radix tree */ | ||
288 | #define EXT_TREE_VEC_SIZE 64 | ||
289 | |||
276 | /* for in-memory extent cache entry */ | 290 | /* for in-memory extent cache entry */ |
277 | #define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ | 291 | #define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ |
292 | |||
293 | /* number of extent info in extent cache we try to shrink */ | ||
294 | #define EXTENT_CACHE_SHRINK_NUMBER 128 | ||
278 | 295 | ||
279 | struct extent_info { | 296 | struct extent_info { |
280 | rwlock_t ext_lock; /* rwlock for consistency */ | 297 | unsigned int fofs; /* start offset in a file */ |
281 | unsigned int fofs; /* start offset in a file */ | 298 | u32 blk; /* start block address of the extent */ |
282 | u32 blk_addr; /* start block address of the extent */ | 299 | unsigned int len; /* length of the extent */ |
283 | unsigned int len; /* length of the extent */ | 300 | }; |
301 | |||
302 | struct extent_node { | ||
303 | struct rb_node rb_node; /* rb node located in rb-tree */ | ||
304 | struct list_head list; /* node in global extent list of sbi */ | ||
305 | struct extent_info ei; /* extent info */ | ||
306 | }; | ||
307 | |||
308 | struct extent_tree { | ||
309 | nid_t ino; /* inode number */ | ||
310 | struct rb_root root; /* root of extent info rb-tree */ | ||
311 | struct extent_node *cached_en; /* recently accessed extent node */ | ||
312 | rwlock_t lock; /* protect extent info rb-tree */ | ||
313 | atomic_t refcount; /* reference count of rb-tree */ | ||
314 | unsigned int count; /* # of extent node in rb-tree*/ | ||
284 | }; | 315 | }; |
285 | 316 | ||
286 | /* | 317 | /* |
@@ -309,6 +340,7 @@ struct f2fs_inode_info { | |||
309 | nid_t i_xattr_nid; /* node id that contains xattrs */ | 340 | nid_t i_xattr_nid; /* node id that contains xattrs */ |
310 | unsigned long long xattr_ver; /* cp version of xattr modification */ | 341 | unsigned long long xattr_ver; /* cp version of xattr modification */ |
311 | struct extent_info ext; /* in-memory extent cache entry */ | 342 | struct extent_info ext; /* in-memory extent cache entry */ |
343 | rwlock_t ext_lock; /* rwlock for single extent cache */ | ||
312 | struct inode_entry *dirty_dir; /* the pointer of dirty dir */ | 344 | struct inode_entry *dirty_dir; /* the pointer of dirty dir */ |
313 | 345 | ||
314 | struct radix_tree_root inmem_root; /* radix tree for inmem pages */ | 346 | struct radix_tree_root inmem_root; /* radix tree for inmem pages */ |
@@ -319,21 +351,51 @@ struct f2fs_inode_info { | |||
319 | static inline void get_extent_info(struct extent_info *ext, | 351 | static inline void get_extent_info(struct extent_info *ext, |
320 | struct f2fs_extent i_ext) | 352 | struct f2fs_extent i_ext) |
321 | { | 353 | { |
322 | write_lock(&ext->ext_lock); | ||
323 | ext->fofs = le32_to_cpu(i_ext.fofs); | 354 | ext->fofs = le32_to_cpu(i_ext.fofs); |
324 | ext->blk_addr = le32_to_cpu(i_ext.blk_addr); | 355 | ext->blk = le32_to_cpu(i_ext.blk); |
325 | ext->len = le32_to_cpu(i_ext.len); | 356 | ext->len = le32_to_cpu(i_ext.len); |
326 | write_unlock(&ext->ext_lock); | ||
327 | } | 357 | } |
328 | 358 | ||
329 | static inline void set_raw_extent(struct extent_info *ext, | 359 | static inline void set_raw_extent(struct extent_info *ext, |
330 | struct f2fs_extent *i_ext) | 360 | struct f2fs_extent *i_ext) |
331 | { | 361 | { |
332 | read_lock(&ext->ext_lock); | ||
333 | i_ext->fofs = cpu_to_le32(ext->fofs); | 362 | i_ext->fofs = cpu_to_le32(ext->fofs); |
334 | i_ext->blk_addr = cpu_to_le32(ext->blk_addr); | 363 | i_ext->blk = cpu_to_le32(ext->blk); |
335 | i_ext->len = cpu_to_le32(ext->len); | 364 | i_ext->len = cpu_to_le32(ext->len); |
336 | read_unlock(&ext->ext_lock); | 365 | } |
366 | |||
367 | static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, | ||
368 | u32 blk, unsigned int len) | ||
369 | { | ||
370 | ei->fofs = fofs; | ||
371 | ei->blk = blk; | ||
372 | ei->len = len; | ||
373 | } | ||
374 | |||
375 | static inline bool __is_extent_same(struct extent_info *ei1, | ||
376 | struct extent_info *ei2) | ||
377 | { | ||
378 | return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk && | ||
379 | ei1->len == ei2->len); | ||
380 | } | ||
381 | |||
382 | static inline bool __is_extent_mergeable(struct extent_info *back, | ||
383 | struct extent_info *front) | ||
384 | { | ||
385 | return (back->fofs + back->len == front->fofs && | ||
386 | back->blk + back->len == front->blk); | ||
387 | } | ||
388 | |||
389 | static inline bool __is_back_mergeable(struct extent_info *cur, | ||
390 | struct extent_info *back) | ||
391 | { | ||
392 | return __is_extent_mergeable(back, cur); | ||
393 | } | ||
394 | |||
395 | static inline bool __is_front_mergeable(struct extent_info *cur, | ||
396 | struct extent_info *front) | ||
397 | { | ||
398 | return __is_extent_mergeable(cur, front); | ||
337 | } | 399 | } |
338 | 400 | ||
339 | struct f2fs_nm_info { | 401 | struct f2fs_nm_info { |
@@ -502,6 +564,10 @@ enum page_type { | |||
502 | META, | 564 | META, |
503 | NR_PAGE_TYPE, | 565 | NR_PAGE_TYPE, |
504 | META_FLUSH, | 566 | META_FLUSH, |
567 | INMEM, /* the below types are used by tracepoints only. */ | ||
568 | INMEM_DROP, | ||
569 | IPU, | ||
570 | OPU, | ||
505 | }; | 571 | }; |
506 | 572 | ||
507 | struct f2fs_io_info { | 573 | struct f2fs_io_info { |
@@ -559,7 +625,6 @@ struct f2fs_sb_info { | |||
559 | struct mutex cp_mutex; /* checkpoint procedure lock */ | 625 | struct mutex cp_mutex; /* checkpoint procedure lock */ |
560 | struct rw_semaphore cp_rwsem; /* blocking FS operations */ | 626 | struct rw_semaphore cp_rwsem; /* blocking FS operations */ |
561 | struct rw_semaphore node_write; /* locking node writes */ | 627 | struct rw_semaphore node_write; /* locking node writes */ |
562 | struct mutex writepages; /* mutex for writepages() */ | ||
563 | wait_queue_head_t cp_wait; | 628 | wait_queue_head_t cp_wait; |
564 | 629 | ||
565 | struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ | 630 | struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ |
@@ -571,6 +636,14 @@ struct f2fs_sb_info { | |||
571 | struct list_head dir_inode_list; /* dir inode list */ | 636 | struct list_head dir_inode_list; /* dir inode list */ |
572 | spinlock_t dir_inode_lock; /* for dir inode list lock */ | 637 | spinlock_t dir_inode_lock; /* for dir inode list lock */ |
573 | 638 | ||
639 | /* for extent tree cache */ | ||
640 | struct radix_tree_root extent_tree_root;/* cache extent cache entries */ | ||
641 | struct rw_semaphore extent_tree_lock; /* locking extent radix tree */ | ||
642 | struct list_head extent_list; /* lru list for shrinker */ | ||
643 | spinlock_t extent_lock; /* locking extent lru list */ | ||
644 | int total_ext_tree; /* extent tree count */ | ||
645 | atomic_t total_ext_node; /* extent info count */ | ||
646 | |||
574 | /* basic filesystem units */ | 647 | /* basic filesystem units */ |
575 | unsigned int log_sectors_per_block; /* log2 sectors per block */ | 648 | unsigned int log_sectors_per_block; /* log2 sectors per block */ |
576 | unsigned int log_blocksize; /* log2 block size */ | 649 | unsigned int log_blocksize; /* log2 block size */ |
@@ -920,12 +993,17 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) | |||
920 | return 0; | 993 | return 0; |
921 | } | 994 | } |
922 | 995 | ||
996 | static inline block_t __cp_payload(struct f2fs_sb_info *sbi) | ||
997 | { | ||
998 | return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); | ||
999 | } | ||
1000 | |||
923 | static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) | 1001 | static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) |
924 | { | 1002 | { |
925 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | 1003 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); |
926 | int offset; | 1004 | int offset; |
927 | 1005 | ||
928 | if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { | 1006 | if (__cp_payload(sbi) > 0) { |
929 | if (flag == NAT_BITMAP) | 1007 | if (flag == NAT_BITMAP) |
930 | return &ckpt->sit_nat_version_bitmap; | 1008 | return &ckpt->sit_nat_version_bitmap; |
931 | else | 1009 | else |
@@ -1166,8 +1244,10 @@ enum { | |||
1166 | FI_NEED_IPU, /* used for ipu per file */ | 1244 | FI_NEED_IPU, /* used for ipu per file */ |
1167 | FI_ATOMIC_FILE, /* indicate atomic file */ | 1245 | FI_ATOMIC_FILE, /* indicate atomic file */ |
1168 | FI_VOLATILE_FILE, /* indicate volatile file */ | 1246 | FI_VOLATILE_FILE, /* indicate volatile file */ |
1247 | FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ | ||
1169 | FI_DROP_CACHE, /* drop dirty page cache */ | 1248 | FI_DROP_CACHE, /* drop dirty page cache */ |
1170 | FI_DATA_EXIST, /* indicate data exists */ | 1249 | FI_DATA_EXIST, /* indicate data exists */ |
1250 | FI_INLINE_DOTS, /* indicate inline dot dentries */ | ||
1171 | }; | 1251 | }; |
1172 | 1252 | ||
1173 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) | 1253 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) |
@@ -1204,6 +1284,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, | |||
1204 | set_inode_flag(fi, FI_INLINE_DENTRY); | 1284 | set_inode_flag(fi, FI_INLINE_DENTRY); |
1205 | if (ri->i_inline & F2FS_DATA_EXIST) | 1285 | if (ri->i_inline & F2FS_DATA_EXIST) |
1206 | set_inode_flag(fi, FI_DATA_EXIST); | 1286 | set_inode_flag(fi, FI_DATA_EXIST); |
1287 | if (ri->i_inline & F2FS_INLINE_DOTS) | ||
1288 | set_inode_flag(fi, FI_INLINE_DOTS); | ||
1207 | } | 1289 | } |
1208 | 1290 | ||
1209 | static inline void set_raw_inline(struct f2fs_inode_info *fi, | 1291 | static inline void set_raw_inline(struct f2fs_inode_info *fi, |
@@ -1219,6 +1301,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, | |||
1219 | ri->i_inline |= F2FS_INLINE_DENTRY; | 1301 | ri->i_inline |= F2FS_INLINE_DENTRY; |
1220 | if (is_inode_flag_set(fi, FI_DATA_EXIST)) | 1302 | if (is_inode_flag_set(fi, FI_DATA_EXIST)) |
1221 | ri->i_inline |= F2FS_DATA_EXIST; | 1303 | ri->i_inline |= F2FS_DATA_EXIST; |
1304 | if (is_inode_flag_set(fi, FI_INLINE_DOTS)) | ||
1305 | ri->i_inline |= F2FS_INLINE_DOTS; | ||
1222 | } | 1306 | } |
1223 | 1307 | ||
1224 | static inline int f2fs_has_inline_xattr(struct inode *inode) | 1308 | static inline int f2fs_has_inline_xattr(struct inode *inode) |
@@ -1264,6 +1348,11 @@ static inline int f2fs_exist_data(struct inode *inode) | |||
1264 | return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); | 1348 | return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); |
1265 | } | 1349 | } |
1266 | 1350 | ||
1351 | static inline int f2fs_has_inline_dots(struct inode *inode) | ||
1352 | { | ||
1353 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS); | ||
1354 | } | ||
1355 | |||
1267 | static inline bool f2fs_is_atomic_file(struct inode *inode) | 1356 | static inline bool f2fs_is_atomic_file(struct inode *inode) |
1268 | { | 1357 | { |
1269 | return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); | 1358 | return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); |
@@ -1274,6 +1363,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode) | |||
1274 | return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); | 1363 | return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); |
1275 | } | 1364 | } |
1276 | 1365 | ||
1366 | static inline bool f2fs_is_first_block_written(struct inode *inode) | ||
1367 | { | ||
1368 | return is_inode_flag_set(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); | ||
1369 | } | ||
1370 | |||
1277 | static inline bool f2fs_is_drop_cache(struct inode *inode) | 1371 | static inline bool f2fs_is_drop_cache(struct inode *inode) |
1278 | { | 1372 | { |
1279 | return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); | 1373 | return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); |
@@ -1290,12 +1384,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode) | |||
1290 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); | 1384 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); |
1291 | } | 1385 | } |
1292 | 1386 | ||
1293 | static inline void *inline_dentry_addr(struct page *page) | ||
1294 | { | ||
1295 | struct f2fs_inode *ri = F2FS_INODE(page); | ||
1296 | return (void *)&(ri->i_addr[1]); | ||
1297 | } | ||
1298 | |||
1299 | static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) | 1387 | static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) |
1300 | { | 1388 | { |
1301 | if (!f2fs_has_inline_dentry(dir)) | 1389 | if (!f2fs_has_inline_dentry(dir)) |
@@ -1363,7 +1451,7 @@ struct dentry *f2fs_get_parent(struct dentry *child); | |||
1363 | * dir.c | 1451 | * dir.c |
1364 | */ | 1452 | */ |
1365 | extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; | 1453 | extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; |
1366 | void set_de_type(struct f2fs_dir_entry *, struct inode *); | 1454 | void set_de_type(struct f2fs_dir_entry *, umode_t); |
1367 | struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, | 1455 | struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, |
1368 | struct f2fs_dentry_ptr *); | 1456 | struct f2fs_dentry_ptr *); |
1369 | bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, | 1457 | bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, |
@@ -1382,7 +1470,10 @@ ino_t f2fs_inode_by_name(struct inode *, struct qstr *); | |||
1382 | void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, | 1470 | void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, |
1383 | struct page *, struct inode *); | 1471 | struct page *, struct inode *); |
1384 | int update_dent_inode(struct inode *, const struct qstr *); | 1472 | int update_dent_inode(struct inode *, const struct qstr *); |
1385 | int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); | 1473 | void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, |
1474 | const struct qstr *, f2fs_hash_t , unsigned int); | ||
1475 | int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, | ||
1476 | umode_t); | ||
1386 | void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, | 1477 | void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, |
1387 | struct inode *); | 1478 | struct inode *); |
1388 | int f2fs_do_tmpfile(struct inode *, struct inode *); | 1479 | int f2fs_do_tmpfile(struct inode *, struct inode *); |
@@ -1392,7 +1483,7 @@ bool f2fs_empty_dir(struct inode *); | |||
1392 | static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) | 1483 | static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) |
1393 | { | 1484 | { |
1394 | return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, | 1485 | return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, |
1395 | inode); | 1486 | inode, inode->i_ino, inode->i_mode); |
1396 | } | 1487 | } |
1397 | 1488 | ||
1398 | /* | 1489 | /* |
@@ -1519,14 +1610,22 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, | |||
1519 | struct f2fs_io_info *); | 1610 | struct f2fs_io_info *); |
1520 | void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, | 1611 | void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, |
1521 | struct f2fs_io_info *); | 1612 | struct f2fs_io_info *); |
1613 | void set_data_blkaddr(struct dnode_of_data *); | ||
1522 | int reserve_new_block(struct dnode_of_data *); | 1614 | int reserve_new_block(struct dnode_of_data *); |
1523 | int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); | 1615 | int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); |
1524 | void update_extent_cache(struct dnode_of_data *); | 1616 | void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); |
1617 | void f2fs_destroy_extent_tree(struct inode *); | ||
1618 | void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *); | ||
1619 | void f2fs_update_extent_cache(struct dnode_of_data *); | ||
1620 | void f2fs_preserve_extent_tree(struct inode *); | ||
1525 | struct page *find_data_page(struct inode *, pgoff_t, bool); | 1621 | struct page *find_data_page(struct inode *, pgoff_t, bool); |
1526 | struct page *get_lock_data_page(struct inode *, pgoff_t); | 1622 | struct page *get_lock_data_page(struct inode *, pgoff_t); |
1527 | struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); | 1623 | struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); |
1528 | int do_write_data_page(struct page *, struct f2fs_io_info *); | 1624 | int do_write_data_page(struct page *, struct f2fs_io_info *); |
1529 | int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); | 1625 | int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); |
1626 | void init_extent_cache_info(struct f2fs_sb_info *); | ||
1627 | int __init create_extent_cache(void); | ||
1628 | void destroy_extent_cache(void); | ||
1530 | void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); | 1629 | void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); |
1531 | int f2fs_release_page(struct page *, gfp_t); | 1630 | int f2fs_release_page(struct page *, gfp_t); |
1532 | 1631 | ||
@@ -1554,7 +1653,7 @@ struct f2fs_stat_info { | |||
1554 | struct f2fs_sb_info *sbi; | 1653 | struct f2fs_sb_info *sbi; |
1555 | int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; | 1654 | int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; |
1556 | int main_area_segs, main_area_sections, main_area_zones; | 1655 | int main_area_segs, main_area_sections, main_area_zones; |
1557 | int hit_ext, total_ext; | 1656 | int hit_ext, total_ext, ext_tree, ext_node; |
1558 | int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; | 1657 | int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; |
1559 | int nats, dirty_nats, sits, dirty_sits, fnids; | 1658 | int nats, dirty_nats, sits, dirty_sits, fnids; |
1560 | int total_count, utilization; | 1659 | int total_count, utilization; |
@@ -1566,7 +1665,9 @@ struct f2fs_stat_info { | |||
1566 | int dirty_count, node_pages, meta_pages; | 1665 | int dirty_count, node_pages, meta_pages; |
1567 | int prefree_count, call_count, cp_count; | 1666 | int prefree_count, call_count, cp_count; |
1568 | int tot_segs, node_segs, data_segs, free_segs, free_secs; | 1667 | int tot_segs, node_segs, data_segs, free_segs, free_secs; |
1668 | int bg_node_segs, bg_data_segs; | ||
1569 | int tot_blks, data_blks, node_blks; | 1669 | int tot_blks, data_blks, node_blks; |
1670 | int bg_data_blks, bg_node_blks; | ||
1570 | int curseg[NR_CURSEG_TYPE]; | 1671 | int curseg[NR_CURSEG_TYPE]; |
1571 | int cursec[NR_CURSEG_TYPE]; | 1672 | int cursec[NR_CURSEG_TYPE]; |
1572 | int curzone[NR_CURSEG_TYPE]; | 1673 | int curzone[NR_CURSEG_TYPE]; |
@@ -1615,31 +1716,36 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) | |||
1615 | ((sbi)->block_count[(curseg)->alloc_type]++) | 1716 | ((sbi)->block_count[(curseg)->alloc_type]++) |
1616 | #define stat_inc_inplace_blocks(sbi) \ | 1717 | #define stat_inc_inplace_blocks(sbi) \ |
1617 | (atomic_inc(&(sbi)->inplace_count)) | 1718 | (atomic_inc(&(sbi)->inplace_count)) |
1618 | #define stat_inc_seg_count(sbi, type) \ | 1719 | #define stat_inc_seg_count(sbi, type, gc_type) \ |
1619 | do { \ | 1720 | do { \ |
1620 | struct f2fs_stat_info *si = F2FS_STAT(sbi); \ | 1721 | struct f2fs_stat_info *si = F2FS_STAT(sbi); \ |
1621 | (si)->tot_segs++; \ | 1722 | (si)->tot_segs++; \ |
1622 | if (type == SUM_TYPE_DATA) \ | 1723 | if (type == SUM_TYPE_DATA) { \ |
1623 | si->data_segs++; \ | 1724 | si->data_segs++; \ |
1624 | else \ | 1725 | si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \ |
1726 | } else { \ | ||
1625 | si->node_segs++; \ | 1727 | si->node_segs++; \ |
1728 | si->bg_node_segs += (gc_type == BG_GC) ? 1 : 0; \ | ||
1729 | } \ | ||
1626 | } while (0) | 1730 | } while (0) |
1627 | 1731 | ||
1628 | #define stat_inc_tot_blk_count(si, blks) \ | 1732 | #define stat_inc_tot_blk_count(si, blks) \ |
1629 | (si->tot_blks += (blks)) | 1733 | (si->tot_blks += (blks)) |
1630 | 1734 | ||
1631 | #define stat_inc_data_blk_count(sbi, blks) \ | 1735 | #define stat_inc_data_blk_count(sbi, blks, gc_type) \ |
1632 | do { \ | 1736 | do { \ |
1633 | struct f2fs_stat_info *si = F2FS_STAT(sbi); \ | 1737 | struct f2fs_stat_info *si = F2FS_STAT(sbi); \ |
1634 | stat_inc_tot_blk_count(si, blks); \ | 1738 | stat_inc_tot_blk_count(si, blks); \ |
1635 | si->data_blks += (blks); \ | 1739 | si->data_blks += (blks); \ |
1740 | si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \ | ||
1636 | } while (0) | 1741 | } while (0) |
1637 | 1742 | ||
1638 | #define stat_inc_node_blk_count(sbi, blks) \ | 1743 | #define stat_inc_node_blk_count(sbi, blks, gc_type) \ |
1639 | do { \ | 1744 | do { \ |
1640 | struct f2fs_stat_info *si = F2FS_STAT(sbi); \ | 1745 | struct f2fs_stat_info *si = F2FS_STAT(sbi); \ |
1641 | stat_inc_tot_blk_count(si, blks); \ | 1746 | stat_inc_tot_blk_count(si, blks); \ |
1642 | si->node_blks += (blks); \ | 1747 | si->node_blks += (blks); \ |
1748 | si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \ | ||
1643 | } while (0) | 1749 | } while (0) |
1644 | 1750 | ||
1645 | int f2fs_build_stats(struct f2fs_sb_info *); | 1751 | int f2fs_build_stats(struct f2fs_sb_info *); |
@@ -1661,10 +1767,10 @@ void f2fs_destroy_root_stats(void); | |||
1661 | #define stat_inc_seg_type(sbi, curseg) | 1767 | #define stat_inc_seg_type(sbi, curseg) |
1662 | #define stat_inc_block_count(sbi, curseg) | 1768 | #define stat_inc_block_count(sbi, curseg) |
1663 | #define stat_inc_inplace_blocks(sbi) | 1769 | #define stat_inc_inplace_blocks(sbi) |
1664 | #define stat_inc_seg_count(si, type) | 1770 | #define stat_inc_seg_count(sbi, type, gc_type) |
1665 | #define stat_inc_tot_blk_count(si, blks) | 1771 | #define stat_inc_tot_blk_count(si, blks) |
1666 | #define stat_inc_data_blk_count(si, blks) | 1772 | #define stat_inc_data_blk_count(sbi, blks, gc_type) |
1667 | #define stat_inc_node_blk_count(sbi, blks) | 1773 | #define stat_inc_node_blk_count(sbi, blks, gc_type) |
1668 | 1774 | ||
1669 | static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } | 1775 | static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } |
1670 | static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } | 1776 | static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } |
@@ -1688,6 +1794,7 @@ extern struct kmem_cache *inode_entry_slab; | |||
1688 | */ | 1794 | */ |
1689 | bool f2fs_may_inline(struct inode *); | 1795 | bool f2fs_may_inline(struct inode *); |
1690 | void read_inline_data(struct page *, struct page *); | 1796 | void read_inline_data(struct page *, struct page *); |
1797 | bool truncate_inline_inode(struct page *, u64); | ||
1691 | int f2fs_read_inline_data(struct inode *, struct page *); | 1798 | int f2fs_read_inline_data(struct inode *, struct page *); |
1692 | int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); | 1799 | int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); |
1693 | int f2fs_convert_inline_inode(struct inode *); | 1800 | int f2fs_convert_inline_inode(struct inode *); |
@@ -1697,7 +1804,8 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, | |||
1697 | struct page **); | 1804 | struct page **); |
1698 | struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); | 1805 | struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); |
1699 | int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); | 1806 | int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); |
1700 | int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); | 1807 | int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, |
1808 | nid_t, umode_t); | ||
1701 | void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, | 1809 | void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, |
1702 | struct inode *, struct inode *); | 1810 | struct inode *, struct inode *); |
1703 | bool f2fs_empty_inline_dir(struct inode *); | 1811 | bool f2fs_empty_inline_dir(struct inode *); |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index df6a0596eccf..a6f3f6186588 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -241,6 +241,8 @@ go_write: | |||
241 | * will be used only for fsynced inodes after checkpoint. | 241 | * will be used only for fsynced inodes after checkpoint. |
242 | */ | 242 | */ |
243 | try_to_fix_pino(inode); | 243 | try_to_fix_pino(inode); |
244 | clear_inode_flag(fi, FI_APPEND_WRITE); | ||
245 | clear_inode_flag(fi, FI_UPDATE_WRITE); | ||
244 | goto out; | 246 | goto out; |
245 | } | 247 | } |
246 | sync_nodes: | 248 | sync_nodes: |
@@ -433,8 +435,12 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | |||
433 | continue; | 435 | continue; |
434 | 436 | ||
435 | dn->data_blkaddr = NULL_ADDR; | 437 | dn->data_blkaddr = NULL_ADDR; |
436 | update_extent_cache(dn); | 438 | set_data_blkaddr(dn); |
439 | f2fs_update_extent_cache(dn); | ||
437 | invalidate_blocks(sbi, blkaddr); | 440 | invalidate_blocks(sbi, blkaddr); |
441 | if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) | ||
442 | clear_inode_flag(F2FS_I(dn->inode), | ||
443 | FI_FIRST_BLOCK_WRITTEN); | ||
438 | nr_free++; | 444 | nr_free++; |
439 | } | 445 | } |
440 | if (nr_free) { | 446 | if (nr_free) { |
@@ -454,15 +460,16 @@ void truncate_data_blocks(struct dnode_of_data *dn) | |||
454 | truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); | 460 | truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); |
455 | } | 461 | } |
456 | 462 | ||
457 | static int truncate_partial_data_page(struct inode *inode, u64 from) | 463 | static int truncate_partial_data_page(struct inode *inode, u64 from, |
464 | bool force) | ||
458 | { | 465 | { |
459 | unsigned offset = from & (PAGE_CACHE_SIZE - 1); | 466 | unsigned offset = from & (PAGE_CACHE_SIZE - 1); |
460 | struct page *page; | 467 | struct page *page; |
461 | 468 | ||
462 | if (!offset) | 469 | if (!offset && !force) |
463 | return 0; | 470 | return 0; |
464 | 471 | ||
465 | page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); | 472 | page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, force); |
466 | if (IS_ERR(page)) | 473 | if (IS_ERR(page)) |
467 | return 0; | 474 | return 0; |
468 | 475 | ||
@@ -473,7 +480,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from) | |||
473 | 480 | ||
474 | f2fs_wait_on_page_writeback(page, DATA); | 481 | f2fs_wait_on_page_writeback(page, DATA); |
475 | zero_user(page, offset, PAGE_CACHE_SIZE - offset); | 482 | zero_user(page, offset, PAGE_CACHE_SIZE - offset); |
476 | set_page_dirty(page); | 483 | if (!force) |
484 | set_page_dirty(page); | ||
477 | out: | 485 | out: |
478 | f2fs_put_page(page, 1); | 486 | f2fs_put_page(page, 1); |
479 | return 0; | 487 | return 0; |
@@ -487,6 +495,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) | |||
487 | pgoff_t free_from; | 495 | pgoff_t free_from; |
488 | int count = 0, err = 0; | 496 | int count = 0, err = 0; |
489 | struct page *ipage; | 497 | struct page *ipage; |
498 | bool truncate_page = false; | ||
490 | 499 | ||
491 | trace_f2fs_truncate_blocks_enter(inode, from); | 500 | trace_f2fs_truncate_blocks_enter(inode, from); |
492 | 501 | ||
@@ -502,7 +511,10 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) | |||
502 | } | 511 | } |
503 | 512 | ||
504 | if (f2fs_has_inline_data(inode)) { | 513 | if (f2fs_has_inline_data(inode)) { |
514 | if (truncate_inline_inode(ipage, from)) | ||
515 | set_page_dirty(ipage); | ||
505 | f2fs_put_page(ipage, 1); | 516 | f2fs_put_page(ipage, 1); |
517 | truncate_page = true; | ||
506 | goto out; | 518 | goto out; |
507 | } | 519 | } |
508 | 520 | ||
@@ -533,7 +545,7 @@ out: | |||
533 | 545 | ||
534 | /* lastly zero out the first data page */ | 546 | /* lastly zero out the first data page */ |
535 | if (!err) | 547 | if (!err) |
536 | err = truncate_partial_data_page(inode, from); | 548 | err = truncate_partial_data_page(inode, from, truncate_page); |
537 | 549 | ||
538 | trace_f2fs_truncate_blocks_exit(inode, err); | 550 | trace_f2fs_truncate_blocks_exit(inode, err); |
539 | return err; | 551 | return err; |
@@ -997,6 +1009,9 @@ static int f2fs_ioc_release_volatile_write(struct file *filp) | |||
997 | if (!f2fs_is_volatile_file(inode)) | 1009 | if (!f2fs_is_volatile_file(inode)) |
998 | return 0; | 1010 | return 0; |
999 | 1011 | ||
1012 | if (!f2fs_is_first_block_written(inode)) | ||
1013 | return truncate_partial_data_page(inode, 0, true); | ||
1014 | |||
1000 | punch_hole(inode, 0, F2FS_BLKSIZE); | 1015 | punch_hole(inode, 0, F2FS_BLKSIZE); |
1001 | return 0; | 1016 | return 0; |
1002 | } | 1017 | } |
@@ -1029,6 +1044,41 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) | |||
1029 | return ret; | 1044 | return ret; |
1030 | } | 1045 | } |
1031 | 1046 | ||
1047 | static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) | ||
1048 | { | ||
1049 | struct inode *inode = file_inode(filp); | ||
1050 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
1051 | struct super_block *sb = sbi->sb; | ||
1052 | __u32 in; | ||
1053 | |||
1054 | if (!capable(CAP_SYS_ADMIN)) | ||
1055 | return -EPERM; | ||
1056 | |||
1057 | if (get_user(in, (__u32 __user *)arg)) | ||
1058 | return -EFAULT; | ||
1059 | |||
1060 | switch (in) { | ||
1061 | case F2FS_GOING_DOWN_FULLSYNC: | ||
1062 | sb = freeze_bdev(sb->s_bdev); | ||
1063 | if (sb && !IS_ERR(sb)) { | ||
1064 | f2fs_stop_checkpoint(sbi); | ||
1065 | thaw_bdev(sb->s_bdev, sb); | ||
1066 | } | ||
1067 | break; | ||
1068 | case F2FS_GOING_DOWN_METASYNC: | ||
1069 | /* do checkpoint only */ | ||
1070 | f2fs_sync_fs(sb, 1); | ||
1071 | f2fs_stop_checkpoint(sbi); | ||
1072 | break; | ||
1073 | case F2FS_GOING_DOWN_NOSYNC: | ||
1074 | f2fs_stop_checkpoint(sbi); | ||
1075 | break; | ||
1076 | default: | ||
1077 | return -EINVAL; | ||
1078 | } | ||
1079 | return 0; | ||
1080 | } | ||
1081 | |||
1032 | static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) | 1082 | static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) |
1033 | { | 1083 | { |
1034 | struct inode *inode = file_inode(filp); | 1084 | struct inode *inode = file_inode(filp); |
@@ -1078,6 +1128,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
1078 | return f2fs_ioc_release_volatile_write(filp); | 1128 | return f2fs_ioc_release_volatile_write(filp); |
1079 | case F2FS_IOC_ABORT_VOLATILE_WRITE: | 1129 | case F2FS_IOC_ABORT_VOLATILE_WRITE: |
1080 | return f2fs_ioc_abort_volatile_write(filp); | 1130 | return f2fs_ioc_abort_volatile_write(filp); |
1131 | case F2FS_IOC_SHUTDOWN: | ||
1132 | return f2fs_ioc_shutdown(filp, arg); | ||
1081 | case FITRIM: | 1133 | case FITRIM: |
1082 | return f2fs_ioc_fitrim(filp, arg); | 1134 | return f2fs_ioc_fitrim(filp, arg); |
1083 | default: | 1135 | default: |
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 76adbc3641f1..ed58211fe79b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c | |||
@@ -435,7 +435,7 @@ next_step: | |||
435 | set_page_dirty(node_page); | 435 | set_page_dirty(node_page); |
436 | } | 436 | } |
437 | f2fs_put_page(node_page, 1); | 437 | f2fs_put_page(node_page, 1); |
438 | stat_inc_node_blk_count(sbi, 1); | 438 | stat_inc_node_blk_count(sbi, 1, gc_type); |
439 | } | 439 | } |
440 | 440 | ||
441 | if (initial) { | 441 | if (initial) { |
@@ -622,7 +622,7 @@ next_step: | |||
622 | if (IS_ERR(data_page)) | 622 | if (IS_ERR(data_page)) |
623 | continue; | 623 | continue; |
624 | move_data_page(inode, data_page, gc_type); | 624 | move_data_page(inode, data_page, gc_type); |
625 | stat_inc_data_blk_count(sbi, 1); | 625 | stat_inc_data_blk_count(sbi, 1, gc_type); |
626 | } | 626 | } |
627 | } | 627 | } |
628 | 628 | ||
@@ -680,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, | |||
680 | } | 680 | } |
681 | blk_finish_plug(&plug); | 681 | blk_finish_plug(&plug); |
682 | 682 | ||
683 | stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); | 683 | stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type); |
684 | stat_inc_call_count(sbi->stat_info); | 684 | stat_inc_call_count(sbi->stat_info); |
685 | 685 | ||
686 | f2fs_put_page(sum_page, 1); | 686 | f2fs_put_page(sum_page, 1); |
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 1484c00133cd..8140e4f0e538 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c | |||
@@ -21,7 +21,7 @@ bool f2fs_may_inline(struct inode *inode) | |||
21 | if (f2fs_is_atomic_file(inode)) | 21 | if (f2fs_is_atomic_file(inode)) |
22 | return false; | 22 | return false; |
23 | 23 | ||
24 | if (!S_ISREG(inode->i_mode)) | 24 | if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) |
25 | return false; | 25 | return false; |
26 | 26 | ||
27 | if (i_size_read(inode) > MAX_INLINE_DATA) | 27 | if (i_size_read(inode) > MAX_INLINE_DATA) |
@@ -50,10 +50,19 @@ void read_inline_data(struct page *page, struct page *ipage) | |||
50 | SetPageUptodate(page); | 50 | SetPageUptodate(page); |
51 | } | 51 | } |
52 | 52 | ||
53 | static void truncate_inline_data(struct page *ipage) | 53 | bool truncate_inline_inode(struct page *ipage, u64 from) |
54 | { | 54 | { |
55 | void *addr; | ||
56 | |||
57 | if (from >= MAX_INLINE_DATA) | ||
58 | return false; | ||
59 | |||
60 | addr = inline_data_addr(ipage); | ||
61 | |||
55 | f2fs_wait_on_page_writeback(ipage, NODE); | 62 | f2fs_wait_on_page_writeback(ipage, NODE); |
56 | memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA); | 63 | memset(addr + from, 0, MAX_INLINE_DATA - from); |
64 | |||
65 | return true; | ||
57 | } | 66 | } |
58 | 67 | ||
59 | int f2fs_read_inline_data(struct inode *inode, struct page *page) | 68 | int f2fs_read_inline_data(struct inode *inode, struct page *page) |
@@ -122,7 +131,8 @@ no_update: | |||
122 | set_page_writeback(page); | 131 | set_page_writeback(page); |
123 | fio.blk_addr = dn->data_blkaddr; | 132 | fio.blk_addr = dn->data_blkaddr; |
124 | write_data_page(page, dn, &fio); | 133 | write_data_page(page, dn, &fio); |
125 | update_extent_cache(dn); | 134 | set_data_blkaddr(dn); |
135 | f2fs_update_extent_cache(dn); | ||
126 | f2fs_wait_on_page_writeback(page, DATA); | 136 | f2fs_wait_on_page_writeback(page, DATA); |
127 | if (dirty) | 137 | if (dirty) |
128 | inode_dec_dirty_pages(dn->inode); | 138 | inode_dec_dirty_pages(dn->inode); |
@@ -131,7 +141,7 @@ no_update: | |||
131 | set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); | 141 | set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); |
132 | 142 | ||
133 | /* clear inline data and flag after data writeback */ | 143 | /* clear inline data and flag after data writeback */ |
134 | truncate_inline_data(dn->inode_page); | 144 | truncate_inline_inode(dn->inode_page, 0); |
135 | clear_out: | 145 | clear_out: |
136 | stat_dec_inline_inode(dn->inode); | 146 | stat_dec_inline_inode(dn->inode); |
137 | f2fs_clear_inline_inode(dn->inode); | 147 | f2fs_clear_inline_inode(dn->inode); |
@@ -245,7 +255,7 @@ process_inline: | |||
245 | if (f2fs_has_inline_data(inode)) { | 255 | if (f2fs_has_inline_data(inode)) { |
246 | ipage = get_node_page(sbi, inode->i_ino); | 256 | ipage = get_node_page(sbi, inode->i_ino); |
247 | f2fs_bug_on(sbi, IS_ERR(ipage)); | 257 | f2fs_bug_on(sbi, IS_ERR(ipage)); |
248 | truncate_inline_data(ipage); | 258 | truncate_inline_inode(ipage, 0); |
249 | f2fs_clear_inline_inode(inode); | 259 | f2fs_clear_inline_inode(inode); |
250 | update_inode(inode, ipage); | 260 | update_inode(inode, ipage); |
251 | f2fs_put_page(ipage, 1); | 261 | f2fs_put_page(ipage, 1); |
@@ -363,7 +373,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, | |||
363 | set_page_dirty(page); | 373 | set_page_dirty(page); |
364 | 374 | ||
365 | /* clear inline dir and flag after data writeback */ | 375 | /* clear inline dir and flag after data writeback */ |
366 | truncate_inline_data(ipage); | 376 | truncate_inline_inode(ipage, 0); |
367 | 377 | ||
368 | stat_dec_inline_dir(dir); | 378 | stat_dec_inline_dir(dir); |
369 | clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); | 379 | clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); |
@@ -380,21 +390,18 @@ out: | |||
380 | } | 390 | } |
381 | 391 | ||
382 | int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, | 392 | int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, |
383 | struct inode *inode) | 393 | struct inode *inode, nid_t ino, umode_t mode) |
384 | { | 394 | { |
385 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); | 395 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); |
386 | struct page *ipage; | 396 | struct page *ipage; |
387 | unsigned int bit_pos; | 397 | unsigned int bit_pos; |
388 | f2fs_hash_t name_hash; | 398 | f2fs_hash_t name_hash; |
389 | struct f2fs_dir_entry *de; | ||
390 | size_t namelen = name->len; | 399 | size_t namelen = name->len; |
391 | struct f2fs_inline_dentry *dentry_blk = NULL; | 400 | struct f2fs_inline_dentry *dentry_blk = NULL; |
401 | struct f2fs_dentry_ptr d; | ||
392 | int slots = GET_DENTRY_SLOTS(namelen); | 402 | int slots = GET_DENTRY_SLOTS(namelen); |
393 | struct page *page; | 403 | struct page *page = NULL; |
394 | int err = 0; | 404 | int err = 0; |
395 | int i; | ||
396 | |||
397 | name_hash = f2fs_dentry_hash(name); | ||
398 | 405 | ||
399 | ipage = get_node_page(sbi, dir->i_ino); | 406 | ipage = get_node_page(sbi, dir->i_ino); |
400 | if (IS_ERR(ipage)) | 407 | if (IS_ERR(ipage)) |
@@ -410,32 +417,34 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, | |||
410 | goto out; | 417 | goto out; |
411 | } | 418 | } |
412 | 419 | ||
413 | down_write(&F2FS_I(inode)->i_sem); | 420 | if (inode) { |
414 | page = init_inode_metadata(inode, dir, name, ipage); | 421 | down_write(&F2FS_I(inode)->i_sem); |
415 | if (IS_ERR(page)) { | 422 | page = init_inode_metadata(inode, dir, name, ipage); |
416 | err = PTR_ERR(page); | 423 | if (IS_ERR(page)) { |
417 | goto fail; | 424 | err = PTR_ERR(page); |
425 | goto fail; | ||
426 | } | ||
418 | } | 427 | } |
419 | 428 | ||
420 | f2fs_wait_on_page_writeback(ipage, NODE); | 429 | f2fs_wait_on_page_writeback(ipage, NODE); |
421 | de = &dentry_blk->dentry[bit_pos]; | 430 | |
422 | de->hash_code = name_hash; | 431 | name_hash = f2fs_dentry_hash(name); |
423 | de->name_len = cpu_to_le16(namelen); | 432 | make_dentry_ptr(&d, (void *)dentry_blk, 2); |
424 | memcpy(dentry_blk->filename[bit_pos], name->name, name->len); | 433 | f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos); |
425 | de->ino = cpu_to_le32(inode->i_ino); | 434 | |
426 | set_de_type(de, inode); | ||
427 | for (i = 0; i < slots; i++) | ||
428 | test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); | ||
429 | set_page_dirty(ipage); | 435 | set_page_dirty(ipage); |
430 | 436 | ||
431 | /* we don't need to mark_inode_dirty now */ | 437 | /* we don't need to mark_inode_dirty now */ |
432 | F2FS_I(inode)->i_pino = dir->i_ino; | 438 | if (inode) { |
433 | update_inode(inode, page); | 439 | F2FS_I(inode)->i_pino = dir->i_ino; |
434 | f2fs_put_page(page, 1); | 440 | update_inode(inode, page); |
441 | f2fs_put_page(page, 1); | ||
442 | } | ||
435 | 443 | ||
436 | update_parent_metadata(dir, inode, 0); | 444 | update_parent_metadata(dir, inode, 0); |
437 | fail: | 445 | fail: |
438 | up_write(&F2FS_I(inode)->i_sem); | 446 | if (inode) |
447 | up_write(&F2FS_I(inode)->i_sem); | ||
439 | 448 | ||
440 | if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { | 449 | if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { |
441 | update_inode(dir, ipage); | 450 | update_inode(dir, ipage); |
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2d002e3738a7..e622ec95409e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c | |||
@@ -51,6 +51,15 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) | |||
51 | } | 51 | } |
52 | } | 52 | } |
53 | 53 | ||
54 | static bool __written_first_block(struct f2fs_inode *ri) | ||
55 | { | ||
56 | block_t addr = le32_to_cpu(ri->i_addr[0]); | ||
57 | |||
58 | if (addr != NEW_ADDR && addr != NULL_ADDR) | ||
59 | return true; | ||
60 | return false; | ||
61 | } | ||
62 | |||
54 | static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) | 63 | static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) |
55 | { | 64 | { |
56 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | 65 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { |
@@ -130,7 +139,8 @@ static int do_read_inode(struct inode *inode) | |||
130 | fi->i_pino = le32_to_cpu(ri->i_pino); | 139 | fi->i_pino = le32_to_cpu(ri->i_pino); |
131 | fi->i_dir_level = ri->i_dir_level; | 140 | fi->i_dir_level = ri->i_dir_level; |
132 | 141 | ||
133 | get_extent_info(&fi->ext, ri->i_ext); | 142 | f2fs_init_extent_cache(inode, &ri->i_ext); |
143 | |||
134 | get_inline_info(fi, ri); | 144 | get_inline_info(fi, ri); |
135 | 145 | ||
136 | /* check data exist */ | 146 | /* check data exist */ |
@@ -140,6 +150,9 @@ static int do_read_inode(struct inode *inode) | |||
140 | /* get rdev by using inline_info */ | 150 | /* get rdev by using inline_info */ |
141 | __get_inode_rdev(inode, ri); | 151 | __get_inode_rdev(inode, ri); |
142 | 152 | ||
153 | if (__written_first_block(ri)) | ||
154 | set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); | ||
155 | |||
143 | f2fs_put_page(node_page, 1); | 156 | f2fs_put_page(node_page, 1); |
144 | 157 | ||
145 | stat_inc_inline_inode(inode); | 158 | stat_inc_inline_inode(inode); |
@@ -220,7 +233,11 @@ void update_inode(struct inode *inode, struct page *node_page) | |||
220 | ri->i_links = cpu_to_le32(inode->i_nlink); | 233 | ri->i_links = cpu_to_le32(inode->i_nlink); |
221 | ri->i_size = cpu_to_le64(i_size_read(inode)); | 234 | ri->i_size = cpu_to_le64(i_size_read(inode)); |
222 | ri->i_blocks = cpu_to_le64(inode->i_blocks); | 235 | ri->i_blocks = cpu_to_le64(inode->i_blocks); |
236 | |||
237 | read_lock(&F2FS_I(inode)->ext_lock); | ||
223 | set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); | 238 | set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); |
239 | read_unlock(&F2FS_I(inode)->ext_lock); | ||
240 | |||
224 | set_raw_inline(F2FS_I(inode), ri); | 241 | set_raw_inline(F2FS_I(inode), ri); |
225 | 242 | ||
226 | ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); | 243 | ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); |
@@ -328,6 +345,12 @@ void f2fs_evict_inode(struct inode *inode) | |||
328 | no_delete: | 345 | no_delete: |
329 | stat_dec_inline_dir(inode); | 346 | stat_dec_inline_dir(inode); |
330 | stat_dec_inline_inode(inode); | 347 | stat_dec_inline_inode(inode); |
348 | |||
349 | /* update extent info in inode */ | ||
350 | if (inode->i_nlink) | ||
351 | f2fs_preserve_extent_tree(inode); | ||
352 | f2fs_destroy_extent_tree(inode); | ||
353 | |||
331 | invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); | 354 | invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); |
332 | if (xnid) | 355 | if (xnid) |
333 | invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); | 356 | invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); |
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e79639a9787a..407dde3d7a92 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/ctype.h> | 15 | #include <linux/ctype.h> |
16 | #include <linux/dcache.h> | 16 | #include <linux/dcache.h> |
17 | #include <linux/namei.h> | ||
17 | 18 | ||
18 | #include "f2fs.h" | 19 | #include "f2fs.h" |
19 | #include "node.h" | 20 | #include "node.h" |
@@ -187,6 +188,44 @@ struct dentry *f2fs_get_parent(struct dentry *child) | |||
187 | return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); | 188 | return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); |
188 | } | 189 | } |
189 | 190 | ||
191 | static int __recover_dot_dentries(struct inode *dir, nid_t pino) | ||
192 | { | ||
193 | struct f2fs_sb_info *sbi = F2FS_I_SB(dir); | ||
194 | struct qstr dot = QSTR_INIT(".", 1); | ||
195 | struct qstr dotdot = QSTR_INIT("..", 2); | ||
196 | struct f2fs_dir_entry *de; | ||
197 | struct page *page; | ||
198 | int err = 0; | ||
199 | |||
200 | f2fs_lock_op(sbi); | ||
201 | |||
202 | de = f2fs_find_entry(dir, &dot, &page); | ||
203 | if (de) { | ||
204 | f2fs_dentry_kunmap(dir, page); | ||
205 | f2fs_put_page(page, 0); | ||
206 | } else { | ||
207 | err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); | ||
208 | if (err) | ||
209 | goto out; | ||
210 | } | ||
211 | |||
212 | de = f2fs_find_entry(dir, &dotdot, &page); | ||
213 | if (de) { | ||
214 | f2fs_dentry_kunmap(dir, page); | ||
215 | f2fs_put_page(page, 0); | ||
216 | } else { | ||
217 | err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); | ||
218 | } | ||
219 | out: | ||
220 | if (!err) { | ||
221 | clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS); | ||
222 | mark_inode_dirty(dir); | ||
223 | } | ||
224 | |||
225 | f2fs_unlock_op(sbi); | ||
226 | return err; | ||
227 | } | ||
228 | |||
190 | static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, | 229 | static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, |
191 | unsigned int flags) | 230 | unsigned int flags) |
192 | { | 231 | { |
@@ -206,6 +245,16 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, | |||
206 | inode = f2fs_iget(dir->i_sb, ino); | 245 | inode = f2fs_iget(dir->i_sb, ino); |
207 | if (IS_ERR(inode)) | 246 | if (IS_ERR(inode)) |
208 | return ERR_CAST(inode); | 247 | return ERR_CAST(inode); |
248 | |||
249 | if (f2fs_has_inline_dots(inode)) { | ||
250 | int err; | ||
251 | |||
252 | err = __recover_dot_dentries(inode, dir->i_ino); | ||
253 | if (err) { | ||
254 | iget_failed(inode); | ||
255 | return ERR_PTR(err); | ||
256 | } | ||
257 | } | ||
209 | } | 258 | } |
210 | 259 | ||
211 | return d_splice_alias(inode, dentry); | 260 | return d_splice_alias(inode, dentry); |
@@ -247,6 +296,23 @@ fail: | |||
247 | return err; | 296 | return err; |
248 | } | 297 | } |
249 | 298 | ||
299 | static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
300 | { | ||
301 | struct page *page; | ||
302 | |||
303 | page = page_follow_link_light(dentry, nd); | ||
304 | if (IS_ERR(page)) | ||
305 | return page; | ||
306 | |||
307 | /* this is broken symlink case */ | ||
308 | if (*nd_get_link(nd) == 0) { | ||
309 | kunmap(page); | ||
310 | page_cache_release(page); | ||
311 | return ERR_PTR(-ENOENT); | ||
312 | } | ||
313 | return page; | ||
314 | } | ||
315 | |||
250 | static int f2fs_symlink(struct inode *dir, struct dentry *dentry, | 316 | static int f2fs_symlink(struct inode *dir, struct dentry *dentry, |
251 | const char *symname) | 317 | const char *symname) |
252 | { | 318 | { |
@@ -276,6 +342,17 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, | |||
276 | d_instantiate(dentry, inode); | 342 | d_instantiate(dentry, inode); |
277 | unlock_new_inode(inode); | 343 | unlock_new_inode(inode); |
278 | 344 | ||
345 | /* | ||
346 | * Let's flush symlink data in order to avoid broken symlink as much as | ||
347 | * possible. Nevertheless, fsyncing is the best way, but there is no | ||
348 | * way to get a file descriptor in order to flush that. | ||
349 | * | ||
350 | * Note that, it needs to do dir->fsync to make this recoverable. | ||
351 | * If the symlink path is stored into inline_data, there is no | ||
352 | * performance regression. | ||
353 | */ | ||
354 | filemap_write_and_wait_range(inode->i_mapping, 0, symlen - 1); | ||
355 | |||
279 | if (IS_DIRSYNC(dir)) | 356 | if (IS_DIRSYNC(dir)) |
280 | f2fs_sync_fs(sbi->sb, 1); | 357 | f2fs_sync_fs(sbi->sb, 1); |
281 | return err; | 358 | return err; |
@@ -693,6 +770,8 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
693 | f2fs_unlock_op(sbi); | 770 | f2fs_unlock_op(sbi); |
694 | 771 | ||
695 | alloc_nid_done(sbi, inode->i_ino); | 772 | alloc_nid_done(sbi, inode->i_ino); |
773 | |||
774 | stat_inc_inline_inode(inode); | ||
696 | d_tmpfile(dentry, inode); | 775 | d_tmpfile(dentry, inode); |
697 | unlock_new_inode(inode); | 776 | unlock_new_inode(inode); |
698 | return 0; | 777 | return 0; |
@@ -729,7 +808,7 @@ const struct inode_operations f2fs_dir_inode_operations = { | |||
729 | 808 | ||
730 | const struct inode_operations f2fs_symlink_inode_operations = { | 809 | const struct inode_operations f2fs_symlink_inode_operations = { |
731 | .readlink = generic_readlink, | 810 | .readlink = generic_readlink, |
732 | .follow_link = page_follow_link_light, | 811 | .follow_link = f2fs_follow_link, |
733 | .put_link = page_put_link, | 812 | .put_link = page_put_link, |
734 | .getattr = f2fs_getattr, | 813 | .getattr = f2fs_getattr, |
735 | .setattr = f2fs_setattr, | 814 | .setattr = f2fs_setattr, |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 97bd9d3db882..8ab0cf1930bd 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -41,7 +41,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) | |||
41 | /* only uses low memory */ | 41 | /* only uses low memory */ |
42 | avail_ram = val.totalram - val.totalhigh; | 42 | avail_ram = val.totalram - val.totalhigh; |
43 | 43 | ||
44 | /* give 25%, 25%, 50%, 50% memory for each components respectively */ | 44 | /* |
45 | * give 25%, 25%, 50%, 50%, 50% memory for each components respectively | ||
46 | */ | ||
45 | if (type == FREE_NIDS) { | 47 | if (type == FREE_NIDS) { |
46 | mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> | 48 | mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> |
47 | PAGE_CACHE_SHIFT; | 49 | PAGE_CACHE_SHIFT; |
@@ -62,6 +64,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) | |||
62 | mem_size += (sbi->im[i].ino_num * | 64 | mem_size += (sbi->im[i].ino_num * |
63 | sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; | 65 | sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; |
64 | res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); | 66 | res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); |
67 | } else if (type == EXTENT_CACHE) { | ||
68 | mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) + | ||
69 | atomic_read(&sbi->total_ext_node) * | ||
70 | sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT; | ||
71 | res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); | ||
65 | } else { | 72 | } else { |
66 | if (sbi->sb->s_bdi->dirty_exceeded) | 73 | if (sbi->sb->s_bdi->dirty_exceeded) |
67 | return false; | 74 | return false; |
@@ -494,7 +501,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) | |||
494 | 501 | ||
495 | /* if inline_data is set, should not report any block indices */ | 502 | /* if inline_data is set, should not report any block indices */ |
496 | if (f2fs_has_inline_data(dn->inode) && index) { | 503 | if (f2fs_has_inline_data(dn->inode) && index) { |
497 | err = -EINVAL; | 504 | err = -ENOENT; |
498 | f2fs_put_page(npage[0], 1); | 505 | f2fs_put_page(npage[0], 1); |
499 | goto release_out; | 506 | goto release_out; |
500 | } | 507 | } |
@@ -995,6 +1002,7 @@ static int read_node_page(struct page *page, int rw) | |||
995 | get_node_info(sbi, page->index, &ni); | 1002 | get_node_info(sbi, page->index, &ni); |
996 | 1003 | ||
997 | if (unlikely(ni.blk_addr == NULL_ADDR)) { | 1004 | if (unlikely(ni.blk_addr == NULL_ADDR)) { |
1005 | ClearPageUptodate(page); | ||
998 | f2fs_put_page(page, 1); | 1006 | f2fs_put_page(page, 1); |
999 | return -ENOENT; | 1007 | return -ENOENT; |
1000 | } | 1008 | } |
@@ -1306,6 +1314,7 @@ static int f2fs_write_node_page(struct page *page, | |||
1306 | 1314 | ||
1307 | /* This page is already truncated */ | 1315 | /* This page is already truncated */ |
1308 | if (unlikely(ni.blk_addr == NULL_ADDR)) { | 1316 | if (unlikely(ni.blk_addr == NULL_ADDR)) { |
1317 | ClearPageUptodate(page); | ||
1309 | dec_page_count(sbi, F2FS_DIRTY_NODES); | 1318 | dec_page_count(sbi, F2FS_DIRTY_NODES); |
1310 | unlock_page(page); | 1319 | unlock_page(page); |
1311 | return 0; | 1320 | return 0; |
@@ -1821,6 +1830,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, | |||
1821 | struct f2fs_nat_block *nat_blk; | 1830 | struct f2fs_nat_block *nat_blk; |
1822 | struct nat_entry *ne, *cur; | 1831 | struct nat_entry *ne, *cur; |
1823 | struct page *page = NULL; | 1832 | struct page *page = NULL; |
1833 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
1824 | 1834 | ||
1825 | /* | 1835 | /* |
1826 | * there are two steps to flush nat entries: | 1836 | * there are two steps to flush nat entries: |
@@ -1874,7 +1884,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, | |||
1874 | 1884 | ||
1875 | f2fs_bug_on(sbi, set->entry_cnt); | 1885 | f2fs_bug_on(sbi, set->entry_cnt); |
1876 | 1886 | ||
1887 | down_write(&nm_i->nat_tree_lock); | ||
1877 | radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); | 1888 | radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); |
1889 | up_write(&nm_i->nat_tree_lock); | ||
1878 | kmem_cache_free(nat_entry_set_slab, set); | 1890 | kmem_cache_free(nat_entry_set_slab, set); |
1879 | } | 1891 | } |
1880 | 1892 | ||
@@ -1902,6 +1914,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) | |||
1902 | if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) | 1914 | if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) |
1903 | remove_nats_in_journal(sbi); | 1915 | remove_nats_in_journal(sbi); |
1904 | 1916 | ||
1917 | down_write(&nm_i->nat_tree_lock); | ||
1905 | while ((found = __gang_lookup_nat_set(nm_i, | 1918 | while ((found = __gang_lookup_nat_set(nm_i, |
1906 | set_idx, SETVEC_SIZE, setvec))) { | 1919 | set_idx, SETVEC_SIZE, setvec))) { |
1907 | unsigned idx; | 1920 | unsigned idx; |
@@ -1910,6 +1923,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) | |||
1910 | __adjust_nat_entry_set(setvec[idx], &sets, | 1923 | __adjust_nat_entry_set(setvec[idx], &sets, |
1911 | MAX_NAT_JENTRIES(sum)); | 1924 | MAX_NAT_JENTRIES(sum)); |
1912 | } | 1925 | } |
1926 | up_write(&nm_i->nat_tree_lock); | ||
1913 | 1927 | ||
1914 | /* flush dirty nats in nat entry set */ | 1928 | /* flush dirty nats in nat entry set */ |
1915 | list_for_each_entry_safe(set, tmp, &sets, set_list) | 1929 | list_for_each_entry_safe(set, tmp, &sets, set_list) |
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index f405bbf2435a..c56026f1725c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h | |||
@@ -120,6 +120,7 @@ enum mem_type { | |||
120 | NAT_ENTRIES, /* indicates the cached nat entry */ | 120 | NAT_ENTRIES, /* indicates the cached nat entry */ |
121 | DIRTY_DENTS, /* indicates dirty dentry pages */ | 121 | DIRTY_DENTS, /* indicates dirty dentry pages */ |
122 | INO_ENTRIES, /* indicates inode entries */ | 122 | INO_ENTRIES, /* indicates inode entries */ |
123 | EXTENT_CACHE, /* indicates extent cache */ | ||
123 | BASE_CHECK, /* check kernel status */ | 124 | BASE_CHECK, /* check kernel status */ |
124 | }; | 125 | }; |
125 | 126 | ||
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 41afb9534bbd..8d8ea99f2156 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c | |||
@@ -93,10 +93,9 @@ static int recover_dentry(struct inode *inode, struct page *ipage) | |||
93 | } | 93 | } |
94 | retry: | 94 | retry: |
95 | de = f2fs_find_entry(dir, &name, &page); | 95 | de = f2fs_find_entry(dir, &name, &page); |
96 | if (de && inode->i_ino == le32_to_cpu(de->ino)) { | 96 | if (de && inode->i_ino == le32_to_cpu(de->ino)) |
97 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | ||
98 | goto out_unmap_put; | 97 | goto out_unmap_put; |
99 | } | 98 | |
100 | if (de) { | 99 | if (de) { |
101 | einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); | 100 | einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); |
102 | if (IS_ERR(einode)) { | 101 | if (IS_ERR(einode)) { |
@@ -115,7 +114,7 @@ retry: | |||
115 | iput(einode); | 114 | iput(einode); |
116 | goto retry; | 115 | goto retry; |
117 | } | 116 | } |
118 | err = __f2fs_add_link(dir, &name, inode); | 117 | err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); |
119 | if (err) | 118 | if (err) |
120 | goto out_err; | 119 | goto out_err; |
121 | 120 | ||
@@ -187,11 +186,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
187 | goto next; | 186 | goto next; |
188 | 187 | ||
189 | entry = get_fsync_inode(head, ino_of_node(page)); | 188 | entry = get_fsync_inode(head, ino_of_node(page)); |
190 | if (entry) { | 189 | if (!entry) { |
191 | if (IS_INODE(page) && is_dent_dnode(page)) | ||
192 | set_inode_flag(F2FS_I(entry->inode), | ||
193 | FI_INC_LINK); | ||
194 | } else { | ||
195 | if (IS_INODE(page) && is_dent_dnode(page)) { | 190 | if (IS_INODE(page) && is_dent_dnode(page)) { |
196 | err = recover_inode_page(sbi, page); | 191 | err = recover_inode_page(sbi, page); |
197 | if (err) | 192 | if (err) |
@@ -212,8 +207,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
212 | if (IS_ERR(entry->inode)) { | 207 | if (IS_ERR(entry->inode)) { |
213 | err = PTR_ERR(entry->inode); | 208 | err = PTR_ERR(entry->inode); |
214 | kmem_cache_free(fsync_entry_slab, entry); | 209 | kmem_cache_free(fsync_entry_slab, entry); |
215 | if (err == -ENOENT) | 210 | if (err == -ENOENT) { |
211 | err = 0; | ||
216 | goto next; | 212 | goto next; |
213 | } | ||
217 | break; | 214 | break; |
218 | } | 215 | } |
219 | list_add_tail(&entry->list, head); | 216 | list_add_tail(&entry->list, head); |
@@ -256,6 +253,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, | |||
256 | struct f2fs_summary_block *sum_node; | 253 | struct f2fs_summary_block *sum_node; |
257 | struct f2fs_summary sum; | 254 | struct f2fs_summary sum; |
258 | struct page *sum_page, *node_page; | 255 | struct page *sum_page, *node_page; |
256 | struct dnode_of_data tdn = *dn; | ||
259 | nid_t ino, nid; | 257 | nid_t ino, nid; |
260 | struct inode *inode; | 258 | struct inode *inode; |
261 | unsigned int offset; | 259 | unsigned int offset; |
@@ -283,17 +281,15 @@ got_it: | |||
283 | /* Use the locked dnode page and inode */ | 281 | /* Use the locked dnode page and inode */ |
284 | nid = le32_to_cpu(sum.nid); | 282 | nid = le32_to_cpu(sum.nid); |
285 | if (dn->inode->i_ino == nid) { | 283 | if (dn->inode->i_ino == nid) { |
286 | struct dnode_of_data tdn = *dn; | ||
287 | tdn.nid = nid; | 284 | tdn.nid = nid; |
285 | if (!dn->inode_page_locked) | ||
286 | lock_page(dn->inode_page); | ||
288 | tdn.node_page = dn->inode_page; | 287 | tdn.node_page = dn->inode_page; |
289 | tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); | 288 | tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); |
290 | truncate_data_blocks_range(&tdn, 1); | 289 | goto truncate_out; |
291 | return 0; | ||
292 | } else if (dn->nid == nid) { | 290 | } else if (dn->nid == nid) { |
293 | struct dnode_of_data tdn = *dn; | ||
294 | tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); | 291 | tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); |
295 | truncate_data_blocks_range(&tdn, 1); | 292 | goto truncate_out; |
296 | return 0; | ||
297 | } | 293 | } |
298 | 294 | ||
299 | /* Get the node page */ | 295 | /* Get the node page */ |
@@ -317,18 +313,33 @@ got_it: | |||
317 | bidx = start_bidx_of_node(offset, F2FS_I(inode)) + | 313 | bidx = start_bidx_of_node(offset, F2FS_I(inode)) + |
318 | le16_to_cpu(sum.ofs_in_node); | 314 | le16_to_cpu(sum.ofs_in_node); |
319 | 315 | ||
320 | if (ino != dn->inode->i_ino) { | 316 | /* |
321 | truncate_hole(inode, bidx, bidx + 1); | 317 | * if inode page is locked, unlock temporarily, but its reference |
318 | * count keeps alive. | ||
319 | */ | ||
320 | if (ino == dn->inode->i_ino && dn->inode_page_locked) | ||
321 | unlock_page(dn->inode_page); | ||
322 | |||
323 | set_new_dnode(&tdn, inode, NULL, NULL, 0); | ||
324 | if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) | ||
325 | goto out; | ||
326 | |||
327 | if (tdn.data_blkaddr == blkaddr) | ||
328 | truncate_data_blocks_range(&tdn, 1); | ||
329 | |||
330 | f2fs_put_dnode(&tdn); | ||
331 | out: | ||
332 | if (ino != dn->inode->i_ino) | ||
322 | iput(inode); | 333 | iput(inode); |
323 | } else { | 334 | else if (dn->inode_page_locked) |
324 | struct dnode_of_data tdn; | 335 | lock_page(dn->inode_page); |
325 | set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0); | 336 | return 0; |
326 | if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) | 337 | |
327 | return 0; | 338 | truncate_out: |
328 | if (tdn.data_blkaddr != NULL_ADDR) | 339 | if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr) |
329 | truncate_data_blocks_range(&tdn, 1); | 340 | truncate_data_blocks_range(&tdn, 1); |
330 | f2fs_put_page(tdn.node_page, 1); | 341 | if (dn->inode->i_ino == nid && !dn->inode_page_locked) |
331 | } | 342 | unlock_page(dn->inode_page); |
332 | return 0; | 343 | return 0; |
333 | } | 344 | } |
334 | 345 | ||
@@ -384,7 +395,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
384 | src = datablock_addr(dn.node_page, dn.ofs_in_node); | 395 | src = datablock_addr(dn.node_page, dn.ofs_in_node); |
385 | dest = datablock_addr(page, dn.ofs_in_node); | 396 | dest = datablock_addr(page, dn.ofs_in_node); |
386 | 397 | ||
387 | if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { | 398 | if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR && |
399 | dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) { | ||
400 | |||
388 | if (src == NULL_ADDR) { | 401 | if (src == NULL_ADDR) { |
389 | err = reserve_new_block(&dn); | 402 | err = reserve_new_block(&dn); |
390 | /* We should not get -ENOSPC */ | 403 | /* We should not get -ENOSPC */ |
@@ -401,14 +414,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
401 | /* write dummy data page */ | 414 | /* write dummy data page */ |
402 | recover_data_page(sbi, NULL, &sum, src, dest); | 415 | recover_data_page(sbi, NULL, &sum, src, dest); |
403 | dn.data_blkaddr = dest; | 416 | dn.data_blkaddr = dest; |
404 | update_extent_cache(&dn); | 417 | set_data_blkaddr(&dn); |
418 | f2fs_update_extent_cache(&dn); | ||
405 | recovered++; | 419 | recovered++; |
406 | } | 420 | } |
407 | dn.ofs_in_node++; | 421 | dn.ofs_in_node++; |
408 | } | 422 | } |
409 | 423 | ||
410 | /* write node page in place */ | ||
411 | set_summary(&sum, dn.nid, 0, 0); | ||
412 | if (IS_INODE(dn.node_page)) | 424 | if (IS_INODE(dn.node_page)) |
413 | sync_inode_page(&dn); | 425 | sync_inode_page(&dn); |
414 | 426 | ||
@@ -552,7 +564,7 @@ out: | |||
552 | mutex_unlock(&sbi->cp_mutex); | 564 | mutex_unlock(&sbi->cp_mutex); |
553 | } else if (need_writecp) { | 565 | } else if (need_writecp) { |
554 | struct cp_control cpc = { | 566 | struct cp_control cpc = { |
555 | .reason = CP_SYNC, | 567 | .reason = CP_RECOVERY, |
556 | }; | 568 | }; |
557 | mutex_unlock(&sbi->cp_mutex); | 569 | mutex_unlock(&sbi->cp_mutex); |
558 | write_checkpoint(sbi, &cpc); | 570 | write_checkpoint(sbi, &cpc); |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index daee4ab913da..f939660941bb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -205,6 +205,8 @@ retry: | |||
205 | list_add_tail(&new->list, &fi->inmem_pages); | 205 | list_add_tail(&new->list, &fi->inmem_pages); |
206 | inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); | 206 | inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); |
207 | mutex_unlock(&fi->inmem_lock); | 207 | mutex_unlock(&fi->inmem_lock); |
208 | |||
209 | trace_f2fs_register_inmem_page(page, INMEM); | ||
208 | } | 210 | } |
209 | 211 | ||
210 | void commit_inmem_pages(struct inode *inode, bool abort) | 212 | void commit_inmem_pages(struct inode *inode, bool abort) |
@@ -238,11 +240,13 @@ void commit_inmem_pages(struct inode *inode, bool abort) | |||
238 | f2fs_wait_on_page_writeback(cur->page, DATA); | 240 | f2fs_wait_on_page_writeback(cur->page, DATA); |
239 | if (clear_page_dirty_for_io(cur->page)) | 241 | if (clear_page_dirty_for_io(cur->page)) |
240 | inode_dec_dirty_pages(inode); | 242 | inode_dec_dirty_pages(inode); |
243 | trace_f2fs_commit_inmem_page(cur->page, INMEM); | ||
241 | do_write_data_page(cur->page, &fio); | 244 | do_write_data_page(cur->page, &fio); |
242 | submit_bio = true; | 245 | submit_bio = true; |
243 | } | 246 | } |
244 | f2fs_put_page(cur->page, 1); | 247 | f2fs_put_page(cur->page, 1); |
245 | } else { | 248 | } else { |
249 | trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); | ||
246 | put_page(cur->page); | 250 | put_page(cur->page); |
247 | } | 251 | } |
248 | radix_tree_delete(&fi->inmem_root, cur->page->index); | 252 | radix_tree_delete(&fi->inmem_root, cur->page->index); |
@@ -277,6 +281,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) | |||
277 | 281 | ||
278 | void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) | 282 | void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) |
279 | { | 283 | { |
284 | /* try to shrink extent cache when there is no enough memory */ | ||
285 | f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); | ||
286 | |||
280 | /* check the # of cached NAT entries and prefree segments */ | 287 | /* check the # of cached NAT entries and prefree segments */ |
281 | if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || | 288 | if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || |
282 | excess_prefree_segs(sbi) || | 289 | excess_prefree_segs(sbi) || |
@@ -549,7 +556,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
549 | 556 | ||
550 | end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); | 557 | end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); |
551 | 558 | ||
552 | if (end - start < cpc->trim_minlen) | 559 | if (force && end - start < cpc->trim_minlen) |
553 | continue; | 560 | continue; |
554 | 561 | ||
555 | __add_discard_entry(sbi, cpc, start, end); | 562 | __add_discard_entry(sbi, cpc, start, end); |
@@ -1164,6 +1171,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, | |||
1164 | curseg = CURSEG_I(sbi, type); | 1171 | curseg = CURSEG_I(sbi, type); |
1165 | 1172 | ||
1166 | mutex_lock(&curseg->curseg_mutex); | 1173 | mutex_lock(&curseg->curseg_mutex); |
1174 | mutex_lock(&sit_i->sentry_lock); | ||
1167 | 1175 | ||
1168 | /* direct_io'ed data is aligned to the segment for better performance */ | 1176 | /* direct_io'ed data is aligned to the segment for better performance */ |
1169 | if (direct_io && curseg->next_blkoff) | 1177 | if (direct_io && curseg->next_blkoff) |
@@ -1178,7 +1186,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, | |||
1178 | */ | 1186 | */ |
1179 | __add_sum_entry(sbi, type, sum); | 1187 | __add_sum_entry(sbi, type, sum); |
1180 | 1188 | ||
1181 | mutex_lock(&sit_i->sentry_lock); | ||
1182 | __refresh_next_blkoff(sbi, curseg); | 1189 | __refresh_next_blkoff(sbi, curseg); |
1183 | 1190 | ||
1184 | stat_inc_block_count(sbi, curseg); | 1191 | stat_inc_block_count(sbi, curseg); |
@@ -1730,6 +1737,9 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1730 | mutex_lock(&curseg->curseg_mutex); | 1737 | mutex_lock(&curseg->curseg_mutex); |
1731 | mutex_lock(&sit_i->sentry_lock); | 1738 | mutex_lock(&sit_i->sentry_lock); |
1732 | 1739 | ||
1740 | if (!sit_i->dirty_sentries) | ||
1741 | goto out; | ||
1742 | |||
1733 | /* | 1743 | /* |
1734 | * add and account sit entries of dirty bitmap in sit entry | 1744 | * add and account sit entries of dirty bitmap in sit entry |
1735 | * set temporarily | 1745 | * set temporarily |
@@ -1744,9 +1754,6 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1744 | if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) | 1754 | if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) |
1745 | remove_sits_in_journal(sbi); | 1755 | remove_sits_in_journal(sbi); |
1746 | 1756 | ||
1747 | if (!sit_i->dirty_sentries) | ||
1748 | goto out; | ||
1749 | |||
1750 | /* | 1757 | /* |
1751 | * there are two steps to flush sit entries: | 1758 | * there are two steps to flush sit entries: |
1752 | * #1, flush sit entries to journal in current cold data summary block. | 1759 | * #1, flush sit entries to journal in current cold data summary block. |
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7fd35111cf62..85d7fa7514b2 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h | |||
@@ -336,7 +336,8 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) | |||
336 | clear_bit(segno, free_i->free_segmap); | 336 | clear_bit(segno, free_i->free_segmap); |
337 | free_i->free_segments++; | 337 | free_i->free_segments++; |
338 | 338 | ||
339 | next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno); | 339 | next = find_next_bit(free_i->free_segmap, |
340 | start_segno + sbi->segs_per_sec, start_segno); | ||
340 | if (next >= start_segno + sbi->segs_per_sec) { | 341 | if (next >= start_segno + sbi->segs_per_sec) { |
341 | clear_bit(secno, free_i->free_secmap); | 342 | clear_bit(secno, free_i->free_secmap); |
342 | free_i->free_sections++; | 343 | free_i->free_sections++; |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f2fe666a6ea9..160b88346b24 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -57,6 +57,8 @@ enum { | |||
57 | Opt_flush_merge, | 57 | Opt_flush_merge, |
58 | Opt_nobarrier, | 58 | Opt_nobarrier, |
59 | Opt_fastboot, | 59 | Opt_fastboot, |
60 | Opt_extent_cache, | ||
61 | Opt_noinline_data, | ||
60 | Opt_err, | 62 | Opt_err, |
61 | }; | 63 | }; |
62 | 64 | ||
@@ -78,6 +80,8 @@ static match_table_t f2fs_tokens = { | |||
78 | {Opt_flush_merge, "flush_merge"}, | 80 | {Opt_flush_merge, "flush_merge"}, |
79 | {Opt_nobarrier, "nobarrier"}, | 81 | {Opt_nobarrier, "nobarrier"}, |
80 | {Opt_fastboot, "fastboot"}, | 82 | {Opt_fastboot, "fastboot"}, |
83 | {Opt_extent_cache, "extent_cache"}, | ||
84 | {Opt_noinline_data, "noinline_data"}, | ||
81 | {Opt_err, NULL}, | 85 | {Opt_err, NULL}, |
82 | }; | 86 | }; |
83 | 87 | ||
@@ -367,6 +371,12 @@ static int parse_options(struct super_block *sb, char *options) | |||
367 | case Opt_fastboot: | 371 | case Opt_fastboot: |
368 | set_opt(sbi, FASTBOOT); | 372 | set_opt(sbi, FASTBOOT); |
369 | break; | 373 | break; |
374 | case Opt_extent_cache: | ||
375 | set_opt(sbi, EXTENT_CACHE); | ||
376 | break; | ||
377 | case Opt_noinline_data: | ||
378 | clear_opt(sbi, INLINE_DATA); | ||
379 | break; | ||
370 | default: | 380 | default: |
371 | f2fs_msg(sb, KERN_ERR, | 381 | f2fs_msg(sb, KERN_ERR, |
372 | "Unrecognized mount option \"%s\" or missing value", | 382 | "Unrecognized mount option \"%s\" or missing value", |
@@ -392,7 +402,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) | |||
392 | atomic_set(&fi->dirty_pages, 0); | 402 | atomic_set(&fi->dirty_pages, 0); |
393 | fi->i_current_depth = 1; | 403 | fi->i_current_depth = 1; |
394 | fi->i_advise = 0; | 404 | fi->i_advise = 0; |
395 | rwlock_init(&fi->ext.ext_lock); | 405 | rwlock_init(&fi->ext_lock); |
396 | init_rwsem(&fi->i_sem); | 406 | init_rwsem(&fi->i_sem); |
397 | INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); | 407 | INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); |
398 | INIT_LIST_HEAD(&fi->inmem_pages); | 408 | INIT_LIST_HEAD(&fi->inmem_pages); |
@@ -591,6 +601,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) | |||
591 | seq_puts(seq, ",disable_ext_identify"); | 601 | seq_puts(seq, ",disable_ext_identify"); |
592 | if (test_opt(sbi, INLINE_DATA)) | 602 | if (test_opt(sbi, INLINE_DATA)) |
593 | seq_puts(seq, ",inline_data"); | 603 | seq_puts(seq, ",inline_data"); |
604 | else | ||
605 | seq_puts(seq, ",noinline_data"); | ||
594 | if (test_opt(sbi, INLINE_DENTRY)) | 606 | if (test_opt(sbi, INLINE_DENTRY)) |
595 | seq_puts(seq, ",inline_dentry"); | 607 | seq_puts(seq, ",inline_dentry"); |
596 | if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) | 608 | if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) |
@@ -599,6 +611,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) | |||
599 | seq_puts(seq, ",nobarrier"); | 611 | seq_puts(seq, ",nobarrier"); |
600 | if (test_opt(sbi, FASTBOOT)) | 612 | if (test_opt(sbi, FASTBOOT)) |
601 | seq_puts(seq, ",fastboot"); | 613 | seq_puts(seq, ",fastboot"); |
614 | if (test_opt(sbi, EXTENT_CACHE)) | ||
615 | seq_puts(seq, ",extent_cache"); | ||
602 | seq_printf(seq, ",active_logs=%u", sbi->active_logs); | 616 | seq_printf(seq, ",active_logs=%u", sbi->active_logs); |
603 | 617 | ||
604 | return 0; | 618 | return 0; |
@@ -959,7 +973,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
959 | struct buffer_head *raw_super_buf; | 973 | struct buffer_head *raw_super_buf; |
960 | struct inode *root; | 974 | struct inode *root; |
961 | long err = -EINVAL; | 975 | long err = -EINVAL; |
962 | bool retry = true; | 976 | bool retry = true, need_fsck = false; |
963 | char *options = NULL; | 977 | char *options = NULL; |
964 | int i; | 978 | int i; |
965 | 979 | ||
@@ -984,6 +998,7 @@ try_onemore: | |||
984 | sbi->active_logs = NR_CURSEG_TYPE; | 998 | sbi->active_logs = NR_CURSEG_TYPE; |
985 | 999 | ||
986 | set_opt(sbi, BG_GC); | 1000 | set_opt(sbi, BG_GC); |
1001 | set_opt(sbi, INLINE_DATA); | ||
987 | 1002 | ||
988 | #ifdef CONFIG_F2FS_FS_XATTR | 1003 | #ifdef CONFIG_F2FS_FS_XATTR |
989 | set_opt(sbi, XATTR_USER); | 1004 | set_opt(sbi, XATTR_USER); |
@@ -1020,7 +1035,6 @@ try_onemore: | |||
1020 | sbi->raw_super = raw_super; | 1035 | sbi->raw_super = raw_super; |
1021 | sbi->raw_super_buf = raw_super_buf; | 1036 | sbi->raw_super_buf = raw_super_buf; |
1022 | mutex_init(&sbi->gc_mutex); | 1037 | mutex_init(&sbi->gc_mutex); |
1023 | mutex_init(&sbi->writepages); | ||
1024 | mutex_init(&sbi->cp_mutex); | 1038 | mutex_init(&sbi->cp_mutex); |
1025 | init_rwsem(&sbi->node_write); | 1039 | init_rwsem(&sbi->node_write); |
1026 | clear_sbi_flag(sbi, SBI_POR_DOING); | 1040 | clear_sbi_flag(sbi, SBI_POR_DOING); |
@@ -1072,6 +1086,8 @@ try_onemore: | |||
1072 | INIT_LIST_HEAD(&sbi->dir_inode_list); | 1086 | INIT_LIST_HEAD(&sbi->dir_inode_list); |
1073 | spin_lock_init(&sbi->dir_inode_lock); | 1087 | spin_lock_init(&sbi->dir_inode_lock); |
1074 | 1088 | ||
1089 | init_extent_cache_info(sbi); | ||
1090 | |||
1075 | init_ino_entry_info(sbi); | 1091 | init_ino_entry_info(sbi); |
1076 | 1092 | ||
1077 | /* setup f2fs internal modules */ | 1093 | /* setup f2fs internal modules */ |
@@ -1146,9 +1162,6 @@ try_onemore: | |||
1146 | if (err) | 1162 | if (err) |
1147 | goto free_proc; | 1163 | goto free_proc; |
1148 | 1164 | ||
1149 | if (!retry) | ||
1150 | set_sbi_flag(sbi, SBI_NEED_FSCK); | ||
1151 | |||
1152 | /* recover fsynced data */ | 1165 | /* recover fsynced data */ |
1153 | if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { | 1166 | if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { |
1154 | /* | 1167 | /* |
@@ -1160,8 +1173,13 @@ try_onemore: | |||
1160 | err = -EROFS; | 1173 | err = -EROFS; |
1161 | goto free_kobj; | 1174 | goto free_kobj; |
1162 | } | 1175 | } |
1176 | |||
1177 | if (need_fsck) | ||
1178 | set_sbi_flag(sbi, SBI_NEED_FSCK); | ||
1179 | |||
1163 | err = recover_fsync_data(sbi); | 1180 | err = recover_fsync_data(sbi); |
1164 | if (err) { | 1181 | if (err) { |
1182 | need_fsck = true; | ||
1165 | f2fs_msg(sb, KERN_ERR, | 1183 | f2fs_msg(sb, KERN_ERR, |
1166 | "Cannot recover all fsync data errno=%ld", err); | 1184 | "Cannot recover all fsync data errno=%ld", err); |
1167 | goto free_kobj; | 1185 | goto free_kobj; |
@@ -1212,7 +1230,7 @@ free_sbi: | |||
1212 | 1230 | ||
1213 | /* give only one another chance */ | 1231 | /* give only one another chance */ |
1214 | if (retry) { | 1232 | if (retry) { |
1215 | retry = 0; | 1233 | retry = false; |
1216 | shrink_dcache_sb(sb); | 1234 | shrink_dcache_sb(sb); |
1217 | goto try_onemore; | 1235 | goto try_onemore; |
1218 | } | 1236 | } |
@@ -1278,10 +1296,13 @@ static int __init init_f2fs_fs(void) | |||
1278 | err = create_checkpoint_caches(); | 1296 | err = create_checkpoint_caches(); |
1279 | if (err) | 1297 | if (err) |
1280 | goto free_segment_manager_caches; | 1298 | goto free_segment_manager_caches; |
1299 | err = create_extent_cache(); | ||
1300 | if (err) | ||
1301 | goto free_checkpoint_caches; | ||
1281 | f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); | 1302 | f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); |
1282 | if (!f2fs_kset) { | 1303 | if (!f2fs_kset) { |
1283 | err = -ENOMEM; | 1304 | err = -ENOMEM; |
1284 | goto free_checkpoint_caches; | 1305 | goto free_extent_cache; |
1285 | } | 1306 | } |
1286 | err = register_filesystem(&f2fs_fs_type); | 1307 | err = register_filesystem(&f2fs_fs_type); |
1287 | if (err) | 1308 | if (err) |
@@ -1292,6 +1313,8 @@ static int __init init_f2fs_fs(void) | |||
1292 | 1313 | ||
1293 | free_kset: | 1314 | free_kset: |
1294 | kset_unregister(f2fs_kset); | 1315 | kset_unregister(f2fs_kset); |
1316 | free_extent_cache: | ||
1317 | destroy_extent_cache(); | ||
1295 | free_checkpoint_caches: | 1318 | free_checkpoint_caches: |
1296 | destroy_checkpoint_caches(); | 1319 | destroy_checkpoint_caches(); |
1297 | free_segment_manager_caches: | 1320 | free_segment_manager_caches: |
@@ -1309,6 +1332,7 @@ static void __exit exit_f2fs_fs(void) | |||
1309 | remove_proc_entry("fs/f2fs", NULL); | 1332 | remove_proc_entry("fs/f2fs", NULL); |
1310 | f2fs_destroy_root_stats(); | 1333 | f2fs_destroy_root_stats(); |
1311 | unregister_filesystem(&f2fs_fs_type); | 1334 | unregister_filesystem(&f2fs_fs_type); |
1335 | destroy_extent_cache(); | ||
1312 | destroy_checkpoint_caches(); | 1336 | destroy_checkpoint_caches(); |
1313 | destroy_segment_manager_caches(); | 1337 | destroy_segment_manager_caches(); |
1314 | destroy_node_manager_caches(); | 1338 | destroy_node_manager_caches(); |
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 5072bf9ae0ef..b0fd2f2d0716 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c | |||
@@ -135,7 +135,8 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name, | |||
135 | if (strcmp(name, "") != 0) | 135 | if (strcmp(name, "") != 0) |
136 | return -EINVAL; | 136 | return -EINVAL; |
137 | 137 | ||
138 | *((char *)buffer) = F2FS_I(inode)->i_advise; | 138 | if (buffer) |
139 | *((char *)buffer) = F2FS_I(inode)->i_advise; | ||
139 | return sizeof(char); | 140 | return sizeof(char); |
140 | } | 141 | } |
141 | 142 | ||
@@ -152,6 +153,7 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, | |||
152 | return -EINVAL; | 153 | return -EINVAL; |
153 | 154 | ||
154 | F2FS_I(inode)->i_advise |= *(char *)value; | 155 | F2FS_I(inode)->i_advise |= *(char *)value; |
156 | mark_inode_dirty(inode); | ||
155 | return 0; | 157 | return 0; |
156 | } | 158 | } |
157 | 159 | ||
diff --git a/fs/fs_pin.c b/fs/fs_pin.c index b06c98796afb..611b5408f6ec 100644 --- a/fs/fs_pin.c +++ b/fs/fs_pin.c | |||
@@ -9,8 +9,8 @@ static DEFINE_SPINLOCK(pin_lock); | |||
9 | void pin_remove(struct fs_pin *pin) | 9 | void pin_remove(struct fs_pin *pin) |
10 | { | 10 | { |
11 | spin_lock(&pin_lock); | 11 | spin_lock(&pin_lock); |
12 | hlist_del(&pin->m_list); | 12 | hlist_del_init(&pin->m_list); |
13 | hlist_del(&pin->s_list); | 13 | hlist_del_init(&pin->s_list); |
14 | spin_unlock(&pin_lock); | 14 | spin_unlock(&pin_lock); |
15 | spin_lock_irq(&pin->wait.lock); | 15 | spin_lock_irq(&pin->wait.lock); |
16 | pin->done = 1; | 16 | pin->done = 1; |
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 762c7a3cf43d..2eac55379239 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c | |||
@@ -1266,7 +1266,6 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ | |||
1266 | if (rc) { | 1266 | if (rc) { |
1267 | JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n", | 1267 | JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n", |
1268 | __func__, rc, totlen); | 1268 | __func__, rc, totlen); |
1269 | rc = rc ? rc : -EBADFD; | ||
1270 | goto out; | 1269 | goto out; |
1271 | } | 1270 | } |
1272 | rc = save_xattr_ref(c, ref); | 1271 | rc = save_xattr_ref(c, ref); |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 665ef5a05183..a563ddbc19e6 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
@@ -31,7 +31,7 @@ | |||
31 | static struct hlist_head nlm_files[FILE_NRHASH]; | 31 | static struct hlist_head nlm_files[FILE_NRHASH]; |
32 | static DEFINE_MUTEX(nlm_file_mutex); | 32 | static DEFINE_MUTEX(nlm_file_mutex); |
33 | 33 | ||
34 | #ifdef NFSD_DEBUG | 34 | #ifdef CONFIG_SUNRPC_DEBUG |
35 | static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f) | 35 | static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f) |
36 | { | 36 | { |
37 | u32 *fhp = (u32*)f->data; | 37 | u32 *fhp = (u32*)f->data; |
diff --git a/fs/namespace.c b/fs/namespace.c index 82ef1405260e..1f4f9dac6e5a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -632,14 +632,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) | |||
632 | */ | 632 | */ |
633 | struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) | 633 | struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) |
634 | { | 634 | { |
635 | struct mount *p, *res; | 635 | struct mount *p, *res = NULL; |
636 | res = p = __lookup_mnt(mnt, dentry); | 636 | p = __lookup_mnt(mnt, dentry); |
637 | if (!p) | 637 | if (!p) |
638 | goto out; | 638 | goto out; |
639 | if (!(p->mnt.mnt_flags & MNT_UMOUNT)) | ||
640 | res = p; | ||
639 | hlist_for_each_entry_continue(p, mnt_hash) { | 641 | hlist_for_each_entry_continue(p, mnt_hash) { |
640 | if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) | 642 | if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) |
641 | break; | 643 | break; |
642 | res = p; | 644 | if (!(p->mnt.mnt_flags & MNT_UMOUNT)) |
645 | res = p; | ||
643 | } | 646 | } |
644 | out: | 647 | out: |
645 | return res; | 648 | return res; |
@@ -795,10 +798,8 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) | |||
795 | /* | 798 | /* |
796 | * vfsmount lock must be held for write | 799 | * vfsmount lock must be held for write |
797 | */ | 800 | */ |
798 | static void detach_mnt(struct mount *mnt, struct path *old_path) | 801 | static void unhash_mnt(struct mount *mnt) |
799 | { | 802 | { |
800 | old_path->dentry = mnt->mnt_mountpoint; | ||
801 | old_path->mnt = &mnt->mnt_parent->mnt; | ||
802 | mnt->mnt_parent = mnt; | 803 | mnt->mnt_parent = mnt; |
803 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; | 804 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
804 | list_del_init(&mnt->mnt_child); | 805 | list_del_init(&mnt->mnt_child); |
@@ -811,6 +812,26 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) | |||
811 | /* | 812 | /* |
812 | * vfsmount lock must be held for write | 813 | * vfsmount lock must be held for write |
813 | */ | 814 | */ |
815 | static void detach_mnt(struct mount *mnt, struct path *old_path) | ||
816 | { | ||
817 | old_path->dentry = mnt->mnt_mountpoint; | ||
818 | old_path->mnt = &mnt->mnt_parent->mnt; | ||
819 | unhash_mnt(mnt); | ||
820 | } | ||
821 | |||
822 | /* | ||
823 | * vfsmount lock must be held for write | ||
824 | */ | ||
825 | static void umount_mnt(struct mount *mnt) | ||
826 | { | ||
827 | /* old mountpoint will be dropped when we can do that */ | ||
828 | mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint; | ||
829 | unhash_mnt(mnt); | ||
830 | } | ||
831 | |||
832 | /* | ||
833 | * vfsmount lock must be held for write | ||
834 | */ | ||
814 | void mnt_set_mountpoint(struct mount *mnt, | 835 | void mnt_set_mountpoint(struct mount *mnt, |
815 | struct mountpoint *mp, | 836 | struct mountpoint *mp, |
816 | struct mount *child_mnt) | 837 | struct mount *child_mnt) |
@@ -1078,6 +1099,13 @@ static void mntput_no_expire(struct mount *mnt) | |||
1078 | rcu_read_unlock(); | 1099 | rcu_read_unlock(); |
1079 | 1100 | ||
1080 | list_del(&mnt->mnt_instance); | 1101 | list_del(&mnt->mnt_instance); |
1102 | |||
1103 | if (unlikely(!list_empty(&mnt->mnt_mounts))) { | ||
1104 | struct mount *p, *tmp; | ||
1105 | list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { | ||
1106 | umount_mnt(p); | ||
1107 | } | ||
1108 | } | ||
1081 | unlock_mount_hash(); | 1109 | unlock_mount_hash(); |
1082 | 1110 | ||
1083 | if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { | 1111 | if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { |
@@ -1298,17 +1326,15 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */ | |||
1298 | 1326 | ||
1299 | static void namespace_unlock(void) | 1327 | static void namespace_unlock(void) |
1300 | { | 1328 | { |
1301 | struct hlist_head head = unmounted; | 1329 | struct hlist_head head; |
1302 | 1330 | ||
1303 | if (likely(hlist_empty(&head))) { | 1331 | hlist_move_list(&unmounted, &head); |
1304 | up_write(&namespace_sem); | ||
1305 | return; | ||
1306 | } | ||
1307 | 1332 | ||
1308 | head.first->pprev = &head.first; | ||
1309 | INIT_HLIST_HEAD(&unmounted); | ||
1310 | up_write(&namespace_sem); | 1333 | up_write(&namespace_sem); |
1311 | 1334 | ||
1335 | if (likely(hlist_empty(&head))) | ||
1336 | return; | ||
1337 | |||
1312 | synchronize_rcu(); | 1338 | synchronize_rcu(); |
1313 | 1339 | ||
1314 | group_pin_kill(&head); | 1340 | group_pin_kill(&head); |
@@ -1319,49 +1345,63 @@ static inline void namespace_lock(void) | |||
1319 | down_write(&namespace_sem); | 1345 | down_write(&namespace_sem); |
1320 | } | 1346 | } |
1321 | 1347 | ||
1348 | enum umount_tree_flags { | ||
1349 | UMOUNT_SYNC = 1, | ||
1350 | UMOUNT_PROPAGATE = 2, | ||
1351 | UMOUNT_CONNECTED = 4, | ||
1352 | }; | ||
1322 | /* | 1353 | /* |
1323 | * mount_lock must be held | 1354 | * mount_lock must be held |
1324 | * namespace_sem must be held for write | 1355 | * namespace_sem must be held for write |
1325 | * how = 0 => just this tree, don't propagate | ||
1326 | * how = 1 => propagate; we know that nobody else has reference to any victims | ||
1327 | * how = 2 => lazy umount | ||
1328 | */ | 1356 | */ |
1329 | void umount_tree(struct mount *mnt, int how) | 1357 | static void umount_tree(struct mount *mnt, enum umount_tree_flags how) |
1330 | { | 1358 | { |
1331 | HLIST_HEAD(tmp_list); | 1359 | LIST_HEAD(tmp_list); |
1332 | struct mount *p; | 1360 | struct mount *p; |
1333 | 1361 | ||
1362 | if (how & UMOUNT_PROPAGATE) | ||
1363 | propagate_mount_unlock(mnt); | ||
1364 | |||
1365 | /* Gather the mounts to umount */ | ||
1334 | for (p = mnt; p; p = next_mnt(p, mnt)) { | 1366 | for (p = mnt; p; p = next_mnt(p, mnt)) { |
1335 | hlist_del_init_rcu(&p->mnt_hash); | 1367 | p->mnt.mnt_flags |= MNT_UMOUNT; |
1336 | hlist_add_head(&p->mnt_hash, &tmp_list); | 1368 | list_move(&p->mnt_list, &tmp_list); |
1337 | } | 1369 | } |
1338 | 1370 | ||
1339 | hlist_for_each_entry(p, &tmp_list, mnt_hash) | 1371 | /* Hide the mounts from mnt_mounts */ |
1372 | list_for_each_entry(p, &tmp_list, mnt_list) { | ||
1340 | list_del_init(&p->mnt_child); | 1373 | list_del_init(&p->mnt_child); |
1374 | } | ||
1341 | 1375 | ||
1342 | if (how) | 1376 | /* Add propogated mounts to the tmp_list */ |
1377 | if (how & UMOUNT_PROPAGATE) | ||
1343 | propagate_umount(&tmp_list); | 1378 | propagate_umount(&tmp_list); |
1344 | 1379 | ||
1345 | while (!hlist_empty(&tmp_list)) { | 1380 | while (!list_empty(&tmp_list)) { |
1346 | p = hlist_entry(tmp_list.first, struct mount, mnt_hash); | 1381 | bool disconnect; |
1347 | hlist_del_init_rcu(&p->mnt_hash); | 1382 | p = list_first_entry(&tmp_list, struct mount, mnt_list); |
1348 | list_del_init(&p->mnt_expire); | 1383 | list_del_init(&p->mnt_expire); |
1349 | list_del_init(&p->mnt_list); | 1384 | list_del_init(&p->mnt_list); |
1350 | __touch_mnt_namespace(p->mnt_ns); | 1385 | __touch_mnt_namespace(p->mnt_ns); |
1351 | p->mnt_ns = NULL; | 1386 | p->mnt_ns = NULL; |
1352 | if (how < 2) | 1387 | if (how & UMOUNT_SYNC) |
1353 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; | 1388 | p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; |
1354 | 1389 | ||
1355 | pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); | 1390 | disconnect = !(((how & UMOUNT_CONNECTED) && |
1391 | mnt_has_parent(p) && | ||
1392 | (p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) || | ||
1393 | IS_MNT_LOCKED_AND_LAZY(p)); | ||
1394 | |||
1395 | pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, | ||
1396 | disconnect ? &unmounted : NULL); | ||
1356 | if (mnt_has_parent(p)) { | 1397 | if (mnt_has_parent(p)) { |
1357 | hlist_del_init(&p->mnt_mp_list); | ||
1358 | put_mountpoint(p->mnt_mp); | ||
1359 | mnt_add_count(p->mnt_parent, -1); | 1398 | mnt_add_count(p->mnt_parent, -1); |
1360 | /* old mountpoint will be dropped when we can do that */ | 1399 | if (!disconnect) { |
1361 | p->mnt_ex_mountpoint = p->mnt_mountpoint; | 1400 | /* Don't forget about p */ |
1362 | p->mnt_mountpoint = p->mnt.mnt_root; | 1401 | list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts); |
1363 | p->mnt_parent = p; | 1402 | } else { |
1364 | p->mnt_mp = NULL; | 1403 | umount_mnt(p); |
1404 | } | ||
1365 | } | 1405 | } |
1366 | change_mnt_propagation(p, MS_PRIVATE); | 1406 | change_mnt_propagation(p, MS_PRIVATE); |
1367 | } | 1407 | } |
@@ -1447,14 +1487,14 @@ static int do_umount(struct mount *mnt, int flags) | |||
1447 | 1487 | ||
1448 | if (flags & MNT_DETACH) { | 1488 | if (flags & MNT_DETACH) { |
1449 | if (!list_empty(&mnt->mnt_list)) | 1489 | if (!list_empty(&mnt->mnt_list)) |
1450 | umount_tree(mnt, 2); | 1490 | umount_tree(mnt, UMOUNT_PROPAGATE); |
1451 | retval = 0; | 1491 | retval = 0; |
1452 | } else { | 1492 | } else { |
1453 | shrink_submounts(mnt); | 1493 | shrink_submounts(mnt); |
1454 | retval = -EBUSY; | 1494 | retval = -EBUSY; |
1455 | if (!propagate_mount_busy(mnt, 2)) { | 1495 | if (!propagate_mount_busy(mnt, 2)) { |
1456 | if (!list_empty(&mnt->mnt_list)) | 1496 | if (!list_empty(&mnt->mnt_list)) |
1457 | umount_tree(mnt, 1); | 1497 | umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); |
1458 | retval = 0; | 1498 | retval = 0; |
1459 | } | 1499 | } |
1460 | } | 1500 | } |
@@ -1480,13 +1520,20 @@ void __detach_mounts(struct dentry *dentry) | |||
1480 | 1520 | ||
1481 | namespace_lock(); | 1521 | namespace_lock(); |
1482 | mp = lookup_mountpoint(dentry); | 1522 | mp = lookup_mountpoint(dentry); |
1483 | if (!mp) | 1523 | if (IS_ERR_OR_NULL(mp)) |
1484 | goto out_unlock; | 1524 | goto out_unlock; |
1485 | 1525 | ||
1486 | lock_mount_hash(); | 1526 | lock_mount_hash(); |
1487 | while (!hlist_empty(&mp->m_list)) { | 1527 | while (!hlist_empty(&mp->m_list)) { |
1488 | mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); | 1528 | mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); |
1489 | umount_tree(mnt, 2); | 1529 | if (mnt->mnt.mnt_flags & MNT_UMOUNT) { |
1530 | struct mount *p, *tmp; | ||
1531 | list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { | ||
1532 | hlist_add_head(&p->mnt_umount.s_list, &unmounted); | ||
1533 | umount_mnt(p); | ||
1534 | } | ||
1535 | } | ||
1536 | else umount_tree(mnt, UMOUNT_CONNECTED); | ||
1490 | } | 1537 | } |
1491 | unlock_mount_hash(); | 1538 | unlock_mount_hash(); |
1492 | put_mountpoint(mp); | 1539 | put_mountpoint(mp); |
@@ -1648,7 +1695,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, | |||
1648 | out: | 1695 | out: |
1649 | if (res) { | 1696 | if (res) { |
1650 | lock_mount_hash(); | 1697 | lock_mount_hash(); |
1651 | umount_tree(res, 0); | 1698 | umount_tree(res, UMOUNT_SYNC); |
1652 | unlock_mount_hash(); | 1699 | unlock_mount_hash(); |
1653 | } | 1700 | } |
1654 | return q; | 1701 | return q; |
@@ -1660,8 +1707,11 @@ struct vfsmount *collect_mounts(struct path *path) | |||
1660 | { | 1707 | { |
1661 | struct mount *tree; | 1708 | struct mount *tree; |
1662 | namespace_lock(); | 1709 | namespace_lock(); |
1663 | tree = copy_tree(real_mount(path->mnt), path->dentry, | 1710 | if (!check_mnt(real_mount(path->mnt))) |
1664 | CL_COPY_ALL | CL_PRIVATE); | 1711 | tree = ERR_PTR(-EINVAL); |
1712 | else | ||
1713 | tree = copy_tree(real_mount(path->mnt), path->dentry, | ||
1714 | CL_COPY_ALL | CL_PRIVATE); | ||
1665 | namespace_unlock(); | 1715 | namespace_unlock(); |
1666 | if (IS_ERR(tree)) | 1716 | if (IS_ERR(tree)) |
1667 | return ERR_CAST(tree); | 1717 | return ERR_CAST(tree); |
@@ -1672,7 +1722,7 @@ void drop_collected_mounts(struct vfsmount *mnt) | |||
1672 | { | 1722 | { |
1673 | namespace_lock(); | 1723 | namespace_lock(); |
1674 | lock_mount_hash(); | 1724 | lock_mount_hash(); |
1675 | umount_tree(real_mount(mnt), 0); | 1725 | umount_tree(real_mount(mnt), UMOUNT_SYNC); |
1676 | unlock_mount_hash(); | 1726 | unlock_mount_hash(); |
1677 | namespace_unlock(); | 1727 | namespace_unlock(); |
1678 | } | 1728 | } |
@@ -1855,7 +1905,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1855 | out_cleanup_ids: | 1905 | out_cleanup_ids: |
1856 | while (!hlist_empty(&tree_list)) { | 1906 | while (!hlist_empty(&tree_list)) { |
1857 | child = hlist_entry(tree_list.first, struct mount, mnt_hash); | 1907 | child = hlist_entry(tree_list.first, struct mount, mnt_hash); |
1858 | umount_tree(child, 0); | 1908 | umount_tree(child, UMOUNT_SYNC); |
1859 | } | 1909 | } |
1860 | unlock_mount_hash(); | 1910 | unlock_mount_hash(); |
1861 | cleanup_group_ids(source_mnt, NULL); | 1911 | cleanup_group_ids(source_mnt, NULL); |
@@ -2035,7 +2085,7 @@ static int do_loopback(struct path *path, const char *old_name, | |||
2035 | err = graft_tree(mnt, parent, mp); | 2085 | err = graft_tree(mnt, parent, mp); |
2036 | if (err) { | 2086 | if (err) { |
2037 | lock_mount_hash(); | 2087 | lock_mount_hash(); |
2038 | umount_tree(mnt, 0); | 2088 | umount_tree(mnt, UMOUNT_SYNC); |
2039 | unlock_mount_hash(); | 2089 | unlock_mount_hash(); |
2040 | } | 2090 | } |
2041 | out2: | 2091 | out2: |
@@ -2406,7 +2456,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
2406 | while (!list_empty(&graveyard)) { | 2456 | while (!list_empty(&graveyard)) { |
2407 | mnt = list_first_entry(&graveyard, struct mount, mnt_expire); | 2457 | mnt = list_first_entry(&graveyard, struct mount, mnt_expire); |
2408 | touch_mnt_namespace(mnt->mnt_ns); | 2458 | touch_mnt_namespace(mnt->mnt_ns); |
2409 | umount_tree(mnt, 1); | 2459 | umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); |
2410 | } | 2460 | } |
2411 | unlock_mount_hash(); | 2461 | unlock_mount_hash(); |
2412 | namespace_unlock(); | 2462 | namespace_unlock(); |
@@ -2477,7 +2527,7 @@ static void shrink_submounts(struct mount *mnt) | |||
2477 | m = list_first_entry(&graveyard, struct mount, | 2527 | m = list_first_entry(&graveyard, struct mount, |
2478 | mnt_expire); | 2528 | mnt_expire); |
2479 | touch_mnt_namespace(m->mnt_ns); | 2529 | touch_mnt_namespace(m->mnt_ns); |
2480 | umount_tree(m, 1); | 2530 | umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC); |
2481 | } | 2531 | } |
2482 | } | 2532 | } |
2483 | } | 2533 | } |
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index fc2d108f5272..a0b77fc1bd39 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -108,7 +108,7 @@ config NFSD_V4_SECURITY_LABEL | |||
108 | 108 | ||
109 | config NFSD_FAULT_INJECTION | 109 | config NFSD_FAULT_INJECTION |
110 | bool "NFS server manual fault injection" | 110 | bool "NFS server manual fault injection" |
111 | depends on NFSD_V4 && DEBUG_KERNEL | 111 | depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS |
112 | help | 112 | help |
113 | This option enables support for manually injecting faults | 113 | This option enables support for manually injecting faults |
114 | into the NFS server. This is intended to be used for | 114 | into the NFS server. This is intended to be used for |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c3e3b6e55ae2..900c3ae94adc 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -691,8 +691,7 @@ static int svc_export_match(struct cache_head *a, struct cache_head *b) | |||
691 | struct svc_export *orig = container_of(a, struct svc_export, h); | 691 | struct svc_export *orig = container_of(a, struct svc_export, h); |
692 | struct svc_export *new = container_of(b, struct svc_export, h); | 692 | struct svc_export *new = container_of(b, struct svc_export, h); |
693 | return orig->ex_client == new->ex_client && | 693 | return orig->ex_client == new->ex_client && |
694 | orig->ex_path.dentry == new->ex_path.dentry && | 694 | path_equal(&orig->ex_path, &new->ex_path); |
695 | orig->ex_path.mnt == new->ex_path.mnt; | ||
696 | } | 695 | } |
697 | 696 | ||
698 | static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) | 697 | static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) |
@@ -1159,6 +1158,7 @@ static struct flags { | |||
1159 | { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, | 1158 | { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, |
1160 | { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, | 1159 | { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, |
1161 | { NFSEXP_V4ROOT, {"v4root", ""}}, | 1160 | { NFSEXP_V4ROOT, {"v4root", ""}}, |
1161 | { NFSEXP_PNFS, {"pnfs", ""}}, | ||
1162 | { 0, {"", ""}} | 1162 | { 0, {"", ""}} |
1163 | }; | 1163 | }; |
1164 | 1164 | ||
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 59fd76651781..eaf4605a4b9e 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
@@ -499,43 +499,13 @@ static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_s | |||
499 | state->mask.allow |= astate->allow; | 499 | state->mask.allow |= astate->allow; |
500 | } | 500 | } |
501 | 501 | ||
502 | /* | ||
503 | * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS, | ||
504 | * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate | ||
505 | * to traditional read/write/execute permissions. | ||
506 | * | ||
507 | * It's problematic to reject acls that use certain mode bits, because it | ||
508 | * places the burden on users to learn the rules about which bits one | ||
509 | * particular server sets, without giving the user a lot of help--we return an | ||
510 | * error that could mean any number of different things. To make matters | ||
511 | * worse, the problematic bits might be introduced by some application that's | ||
512 | * automatically mapping from some other acl model. | ||
513 | * | ||
514 | * So wherever possible we accept anything, possibly erring on the side of | ||
515 | * denying more permissions than necessary. | ||
516 | * | ||
517 | * However we do reject *explicit* DENY's of a few bits representing | ||
518 | * permissions we could never deny: | ||
519 | */ | ||
520 | |||
521 | static inline int check_deny(u32 mask, int isowner) | ||
522 | { | ||
523 | if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL)) | ||
524 | return -EINVAL; | ||
525 | if (!isowner) | ||
526 | return 0; | ||
527 | if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)) | ||
528 | return -EINVAL; | ||
529 | return 0; | ||
530 | } | ||
531 | |||
532 | static struct posix_acl * | 502 | static struct posix_acl * |
533 | posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) | 503 | posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) |
534 | { | 504 | { |
535 | struct posix_acl_entry *pace; | 505 | struct posix_acl_entry *pace; |
536 | struct posix_acl *pacl; | 506 | struct posix_acl *pacl; |
537 | int nace; | 507 | int nace; |
538 | int i, error = 0; | 508 | int i; |
539 | 509 | ||
540 | /* | 510 | /* |
541 | * ACLs with no ACEs are treated differently in the inheritable | 511 | * ACLs with no ACEs are treated differently in the inheritable |
@@ -560,17 +530,11 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) | |||
560 | 530 | ||
561 | pace = pacl->a_entries; | 531 | pace = pacl->a_entries; |
562 | pace->e_tag = ACL_USER_OBJ; | 532 | pace->e_tag = ACL_USER_OBJ; |
563 | error = check_deny(state->owner.deny, 1); | ||
564 | if (error) | ||
565 | goto out_err; | ||
566 | low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags); | 533 | low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags); |
567 | 534 | ||
568 | for (i=0; i < state->users->n; i++) { | 535 | for (i=0; i < state->users->n; i++) { |
569 | pace++; | 536 | pace++; |
570 | pace->e_tag = ACL_USER; | 537 | pace->e_tag = ACL_USER; |
571 | error = check_deny(state->users->aces[i].perms.deny, 0); | ||
572 | if (error) | ||
573 | goto out_err; | ||
574 | low_mode_from_nfs4(state->users->aces[i].perms.allow, | 538 | low_mode_from_nfs4(state->users->aces[i].perms.allow, |
575 | &pace->e_perm, flags); | 539 | &pace->e_perm, flags); |
576 | pace->e_uid = state->users->aces[i].uid; | 540 | pace->e_uid = state->users->aces[i].uid; |
@@ -579,18 +543,12 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) | |||
579 | 543 | ||
580 | pace++; | 544 | pace++; |
581 | pace->e_tag = ACL_GROUP_OBJ; | 545 | pace->e_tag = ACL_GROUP_OBJ; |
582 | error = check_deny(state->group.deny, 0); | ||
583 | if (error) | ||
584 | goto out_err; | ||
585 | low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags); | 546 | low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags); |
586 | add_to_mask(state, &state->group); | 547 | add_to_mask(state, &state->group); |
587 | 548 | ||
588 | for (i=0; i < state->groups->n; i++) { | 549 | for (i=0; i < state->groups->n; i++) { |
589 | pace++; | 550 | pace++; |
590 | pace->e_tag = ACL_GROUP; | 551 | pace->e_tag = ACL_GROUP; |
591 | error = check_deny(state->groups->aces[i].perms.deny, 0); | ||
592 | if (error) | ||
593 | goto out_err; | ||
594 | low_mode_from_nfs4(state->groups->aces[i].perms.allow, | 552 | low_mode_from_nfs4(state->groups->aces[i].perms.allow, |
595 | &pace->e_perm, flags); | 553 | &pace->e_perm, flags); |
596 | pace->e_gid = state->groups->aces[i].gid; | 554 | pace->e_gid = state->groups->aces[i].gid; |
@@ -605,15 +563,9 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) | |||
605 | 563 | ||
606 | pace++; | 564 | pace++; |
607 | pace->e_tag = ACL_OTHER; | 565 | pace->e_tag = ACL_OTHER; |
608 | error = check_deny(state->other.deny, 0); | ||
609 | if (error) | ||
610 | goto out_err; | ||
611 | low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags); | 566 | low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags); |
612 | 567 | ||
613 | return pacl; | 568 | return pacl; |
614 | out_err: | ||
615 | posix_acl_release(pacl); | ||
616 | return ERR_PTR(error); | ||
617 | } | 569 | } |
618 | 570 | ||
619 | static inline void allow_bits(struct posix_ace_state *astate, u32 mask) | 571 | static inline void allow_bits(struct posix_ace_state *astate, u32 mask) |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 92b9d97aff4f..4a8314f08a0e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -470,7 +470,7 @@ out: | |||
470 | fh_put(resfh); | 470 | fh_put(resfh); |
471 | kfree(resfh); | 471 | kfree(resfh); |
472 | } | 472 | } |
473 | nfsd4_cleanup_open_state(cstate, open, status); | 473 | nfsd4_cleanup_open_state(cstate, open); |
474 | nfsd4_bump_seqid(cstate, status); | 474 | nfsd4_bump_seqid(cstate, status); |
475 | return status; | 475 | return status; |
476 | } | 476 | } |
@@ -1030,6 +1030,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1030 | dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); | 1030 | dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); |
1031 | return status; | 1031 | return status; |
1032 | } | 1032 | } |
1033 | if (!file) | ||
1034 | return nfserr_bad_stateid; | ||
1033 | 1035 | ||
1034 | status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file, | 1036 | status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file, |
1035 | fallocate->falloc_offset, | 1037 | fallocate->falloc_offset, |
@@ -1069,6 +1071,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1069 | dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); | 1071 | dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); |
1070 | return status; | 1072 | return status; |
1071 | } | 1073 | } |
1074 | if (!file) | ||
1075 | return nfserr_bad_stateid; | ||
1072 | 1076 | ||
1073 | switch (seek->seek_whence) { | 1077 | switch (seek->seek_whence) { |
1074 | case NFS4_CONTENT_DATA: | 1078 | case NFS4_CONTENT_DATA: |
@@ -1815,7 +1819,7 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, | |||
1815 | bmap0 &= ~FATTR4_WORD0_FILEHANDLE; | 1819 | bmap0 &= ~FATTR4_WORD0_FILEHANDLE; |
1816 | } | 1820 | } |
1817 | if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) { | 1821 | if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) { |
1818 | ret += NFSD4_MAX_SEC_LABEL_LEN + 12; | 1822 | ret += NFS4_MAXLABELLEN + 12; |
1819 | bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL; | 1823 | bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL; |
1820 | } | 1824 | } |
1821 | /* | 1825 | /* |
@@ -2282,13 +2286,13 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
2282 | .op_func = (nfsd4op_func)nfsd4_allocate, | 2286 | .op_func = (nfsd4op_func)nfsd4_allocate, |
2283 | .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, | 2287 | .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, |
2284 | .op_name = "OP_ALLOCATE", | 2288 | .op_name = "OP_ALLOCATE", |
2285 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, | 2289 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, |
2286 | }, | 2290 | }, |
2287 | [OP_DEALLOCATE] = { | 2291 | [OP_DEALLOCATE] = { |
2288 | .op_func = (nfsd4op_func)nfsd4_deallocate, | 2292 | .op_func = (nfsd4op_func)nfsd4_deallocate, |
2289 | .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, | 2293 | .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, |
2290 | .op_name = "OP_DEALLOCATE", | 2294 | .op_name = "OP_DEALLOCATE", |
2291 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, | 2295 | .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, |
2292 | }, | 2296 | }, |
2293 | [OP_SEEK] = { | 2297 | [OP_SEEK] = { |
2294 | .op_func = (nfsd4op_func)nfsd4_seek, | 2298 | .op_func = (nfsd4op_func)nfsd4_seek, |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 326a545ea7b2..d42786ee39af 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -1139,7 +1139,7 @@ hash_sessionid(struct nfs4_sessionid *sessionid) | |||
1139 | return sid->sequence % SESSION_HASH_SIZE; | 1139 | return sid->sequence % SESSION_HASH_SIZE; |
1140 | } | 1140 | } |
1141 | 1141 | ||
1142 | #ifdef NFSD_DEBUG | 1142 | #ifdef CONFIG_SUNRPC_DEBUG |
1143 | static inline void | 1143 | static inline void |
1144 | dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) | 1144 | dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) |
1145 | { | 1145 | { |
@@ -4049,7 +4049,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf | |||
4049 | status = nfserr_bad_stateid; | 4049 | status = nfserr_bad_stateid; |
4050 | if (nfsd4_is_deleg_cur(open)) | 4050 | if (nfsd4_is_deleg_cur(open)) |
4051 | goto out; | 4051 | goto out; |
4052 | status = nfserr_jukebox; | ||
4053 | } | 4052 | } |
4054 | 4053 | ||
4055 | /* | 4054 | /* |
@@ -4118,7 +4117,7 @@ out: | |||
4118 | } | 4117 | } |
4119 | 4118 | ||
4120 | void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, | 4119 | void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, |
4121 | struct nfsd4_open *open, __be32 status) | 4120 | struct nfsd4_open *open) |
4122 | { | 4121 | { |
4123 | if (open->op_openowner) { | 4122 | if (open->op_openowner) { |
4124 | struct nfs4_stateowner *so = &open->op_openowner->oo_owner; | 4123 | struct nfs4_stateowner *so = &open->op_openowner->oo_owner; |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5fb7e78169a6..a45032ce7b80 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -424,7 +424,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | |||
424 | len += 4; | 424 | len += 4; |
425 | dummy32 = be32_to_cpup(p++); | 425 | dummy32 = be32_to_cpup(p++); |
426 | READ_BUF(dummy32); | 426 | READ_BUF(dummy32); |
427 | if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) | 427 | if (dummy32 > NFS4_MAXLABELLEN) |
428 | return nfserr_badlabel; | 428 | return nfserr_badlabel; |
429 | len += (XDR_QUADLEN(dummy32) << 2); | 429 | len += (XDR_QUADLEN(dummy32) << 2); |
430 | READMEM(buf, dummy32); | 430 | READMEM(buf, dummy32); |
@@ -2020,7 +2020,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr, | |||
2020 | * dentries/path components in an array. | 2020 | * dentries/path components in an array. |
2021 | */ | 2021 | */ |
2022 | for (;;) { | 2022 | for (;;) { |
2023 | if (cur.dentry == root->dentry && cur.mnt == root->mnt) | 2023 | if (path_equal(&cur, root)) |
2024 | break; | 2024 | break; |
2025 | if (cur.dentry == cur.mnt->mnt_root) { | 2025 | if (cur.dentry == cur.mnt->mnt_root) { |
2026 | if (follow_up(&cur)) | 2026 | if (follow_up(&cur)) |
@@ -3422,6 +3422,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
3422 | unsigned long maxcount; | 3422 | unsigned long maxcount; |
3423 | struct xdr_stream *xdr = &resp->xdr; | 3423 | struct xdr_stream *xdr = &resp->xdr; |
3424 | struct file *file = read->rd_filp; | 3424 | struct file *file = read->rd_filp; |
3425 | struct svc_fh *fhp = read->rd_fhp; | ||
3425 | int starting_len = xdr->buf->len; | 3426 | int starting_len = xdr->buf->len; |
3426 | struct raparms *ra; | 3427 | struct raparms *ra; |
3427 | __be32 *p; | 3428 | __be32 *p; |
@@ -3445,12 +3446,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
3445 | maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); | 3446 | maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); |
3446 | maxcount = min_t(unsigned long, maxcount, read->rd_length); | 3447 | maxcount = min_t(unsigned long, maxcount, read->rd_length); |
3447 | 3448 | ||
3448 | if (!read->rd_filp) { | 3449 | if (read->rd_filp) |
3450 | err = nfsd_permission(resp->rqstp, fhp->fh_export, | ||
3451 | fhp->fh_dentry, | ||
3452 | NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); | ||
3453 | else | ||
3449 | err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, | 3454 | err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, |
3450 | &file, &ra); | 3455 | &file, &ra); |
3451 | if (err) | 3456 | if (err) |
3452 | goto err_truncate; | 3457 | goto err_truncate; |
3453 | } | ||
3454 | 3458 | ||
3455 | if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) | 3459 | if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) |
3456 | err = nfsd4_encode_splice_read(resp, read, file, maxcount); | 3460 | err = nfsd4_encode_splice_read(resp, read, file, maxcount); |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index aa47d75ddb26..9690cb4dd588 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -1250,15 +1250,15 @@ static int __init init_nfsd(void) | |||
1250 | int retval; | 1250 | int retval; |
1251 | printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); | 1251 | printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); |
1252 | 1252 | ||
1253 | retval = register_cld_notifier(); | ||
1254 | if (retval) | ||
1255 | return retval; | ||
1256 | retval = register_pernet_subsys(&nfsd_net_ops); | 1253 | retval = register_pernet_subsys(&nfsd_net_ops); |
1257 | if (retval < 0) | 1254 | if (retval < 0) |
1258 | goto out_unregister_notifier; | 1255 | return retval; |
1259 | retval = nfsd4_init_slabs(); | 1256 | retval = register_cld_notifier(); |
1260 | if (retval) | 1257 | if (retval) |
1261 | goto out_unregister_pernet; | 1258 | goto out_unregister_pernet; |
1259 | retval = nfsd4_init_slabs(); | ||
1260 | if (retval) | ||
1261 | goto out_unregister_notifier; | ||
1262 | retval = nfsd4_init_pnfs(); | 1262 | retval = nfsd4_init_pnfs(); |
1263 | if (retval) | 1263 | if (retval) |
1264 | goto out_free_slabs; | 1264 | goto out_free_slabs; |
@@ -1290,10 +1290,10 @@ out_exit_pnfs: | |||
1290 | nfsd4_exit_pnfs(); | 1290 | nfsd4_exit_pnfs(); |
1291 | out_free_slabs: | 1291 | out_free_slabs: |
1292 | nfsd4_free_slabs(); | 1292 | nfsd4_free_slabs(); |
1293 | out_unregister_pernet: | ||
1294 | unregister_pernet_subsys(&nfsd_net_ops); | ||
1295 | out_unregister_notifier: | 1293 | out_unregister_notifier: |
1296 | unregister_cld_notifier(); | 1294 | unregister_cld_notifier(); |
1295 | out_unregister_pernet: | ||
1296 | unregister_pernet_subsys(&nfsd_net_ops); | ||
1297 | return retval; | 1297 | return retval; |
1298 | } | 1298 | } |
1299 | 1299 | ||
@@ -1308,8 +1308,8 @@ static void __exit exit_nfsd(void) | |||
1308 | nfsd4_exit_pnfs(); | 1308 | nfsd4_exit_pnfs(); |
1309 | nfsd_fault_inject_cleanup(); | 1309 | nfsd_fault_inject_cleanup(); |
1310 | unregister_filesystem(&nfsd_fs_type); | 1310 | unregister_filesystem(&nfsd_fs_type); |
1311 | unregister_pernet_subsys(&nfsd_net_ops); | ||
1312 | unregister_cld_notifier(); | 1311 | unregister_cld_notifier(); |
1312 | unregister_pernet_subsys(&nfsd_net_ops); | ||
1313 | } | 1313 | } |
1314 | 1314 | ||
1315 | MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); | 1315 | MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 565c4da1a9eb..cf980523898b 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
@@ -24,7 +24,7 @@ | |||
24 | #include "export.h" | 24 | #include "export.h" |
25 | 25 | ||
26 | #undef ifdebug | 26 | #undef ifdebug |
27 | #ifdef NFSD_DEBUG | 27 | #ifdef CONFIG_SUNRPC_DEBUG |
28 | # define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) | 28 | # define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) |
29 | #else | 29 | #else |
30 | # define ifdebug(flag) if (0) | 30 | # define ifdebug(flag) if (0) |
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 0bda93e58e1b..556ce2e47555 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
@@ -40,7 +40,6 @@ | |||
40 | #include "state.h" | 40 | #include "state.h" |
41 | #include "nfsd.h" | 41 | #include "nfsd.h" |
42 | 42 | ||
43 | #define NFSD4_MAX_SEC_LABEL_LEN 2048 | ||
44 | #define NFSD4_MAX_TAGLEN 128 | 43 | #define NFSD4_MAX_TAGLEN 128 |
45 | #define XDR_LEN(n) (((n) + 3) & ~3) | 44 | #define XDR_LEN(n) (((n) + 3) & ~3) |
46 | 45 | ||
@@ -683,7 +682,7 @@ extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, | |||
683 | struct svc_fh *current_fh, struct nfsd4_open *open); | 682 | struct svc_fh *current_fh, struct nfsd4_open *open); |
684 | extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate); | 683 | extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate); |
685 | extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, | 684 | extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, |
686 | struct nfsd4_open *open, __be32 status); | 685 | struct nfsd4_open *open); |
687 | extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, | 686 | extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, |
688 | struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); | 687 | struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); |
689 | extern __be32 nfsd4_close(struct svc_rqst *rqstp, | 688 | extern __be32 nfsd4_close(struct svc_rqst *rqstp, |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 8e19b9d7aba8..16eff45727ee 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -1312,9 +1312,7 @@ static int o2hb_debug_init(void) | |||
1312 | int ret = -ENOMEM; | 1312 | int ret = -ENOMEM; |
1313 | 1313 | ||
1314 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | 1314 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); |
1315 | if (IS_ERR_OR_NULL(o2hb_debug_dir)) { | 1315 | if (!o2hb_debug_dir) { |
1316 | ret = o2hb_debug_dir ? | ||
1317 | PTR_ERR(o2hb_debug_dir) : -ENOMEM; | ||
1318 | mlog_errno(ret); | 1316 | mlog_errno(ret); |
1319 | goto bail; | 1317 | goto bail; |
1320 | } | 1318 | } |
@@ -1327,9 +1325,7 @@ static int o2hb_debug_init(void) | |||
1327 | sizeof(o2hb_live_node_bitmap), | 1325 | sizeof(o2hb_live_node_bitmap), |
1328 | O2NM_MAX_NODES, | 1326 | O2NM_MAX_NODES, |
1329 | o2hb_live_node_bitmap); | 1327 | o2hb_live_node_bitmap); |
1330 | if (IS_ERR_OR_NULL(o2hb_debug_livenodes)) { | 1328 | if (!o2hb_debug_livenodes) { |
1331 | ret = o2hb_debug_livenodes ? | ||
1332 | PTR_ERR(o2hb_debug_livenodes) : -ENOMEM; | ||
1333 | mlog_errno(ret); | 1329 | mlog_errno(ret); |
1334 | goto bail; | 1330 | goto bail; |
1335 | } | 1331 | } |
@@ -1342,9 +1338,7 @@ static int o2hb_debug_init(void) | |||
1342 | sizeof(o2hb_live_region_bitmap), | 1338 | sizeof(o2hb_live_region_bitmap), |
1343 | O2NM_MAX_REGIONS, | 1339 | O2NM_MAX_REGIONS, |
1344 | o2hb_live_region_bitmap); | 1340 | o2hb_live_region_bitmap); |
1345 | if (IS_ERR_OR_NULL(o2hb_debug_liveregions)) { | 1341 | if (!o2hb_debug_liveregions) { |
1346 | ret = o2hb_debug_liveregions ? | ||
1347 | PTR_ERR(o2hb_debug_liveregions) : -ENOMEM; | ||
1348 | mlog_errno(ret); | 1342 | mlog_errno(ret); |
1349 | goto bail; | 1343 | goto bail; |
1350 | } | 1344 | } |
@@ -1358,9 +1352,7 @@ static int o2hb_debug_init(void) | |||
1358 | sizeof(o2hb_quorum_region_bitmap), | 1352 | sizeof(o2hb_quorum_region_bitmap), |
1359 | O2NM_MAX_REGIONS, | 1353 | O2NM_MAX_REGIONS, |
1360 | o2hb_quorum_region_bitmap); | 1354 | o2hb_quorum_region_bitmap); |
1361 | if (IS_ERR_OR_NULL(o2hb_debug_quorumregions)) { | 1355 | if (!o2hb_debug_quorumregions) { |
1362 | ret = o2hb_debug_quorumregions ? | ||
1363 | PTR_ERR(o2hb_debug_quorumregions) : -ENOMEM; | ||
1364 | mlog_errno(ret); | 1356 | mlog_errno(ret); |
1365 | goto bail; | 1357 | goto bail; |
1366 | } | 1358 | } |
@@ -1374,9 +1366,7 @@ static int o2hb_debug_init(void) | |||
1374 | sizeof(o2hb_failed_region_bitmap), | 1366 | sizeof(o2hb_failed_region_bitmap), |
1375 | O2NM_MAX_REGIONS, | 1367 | O2NM_MAX_REGIONS, |
1376 | o2hb_failed_region_bitmap); | 1368 | o2hb_failed_region_bitmap); |
1377 | if (IS_ERR_OR_NULL(o2hb_debug_failedregions)) { | 1369 | if (!o2hb_debug_failedregions) { |
1378 | ret = o2hb_debug_failedregions ? | ||
1379 | PTR_ERR(o2hb_debug_failedregions) : -ENOMEM; | ||
1380 | mlog_errno(ret); | 1370 | mlog_errno(ret); |
1381 | goto bail; | 1371 | goto bail; |
1382 | } | 1372 | } |
@@ -2010,8 +2000,7 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
2010 | 2000 | ||
2011 | reg->hr_debug_dir = | 2001 | reg->hr_debug_dir = |
2012 | debugfs_create_dir(config_item_name(®->hr_item), dir); | 2002 | debugfs_create_dir(config_item_name(®->hr_item), dir); |
2013 | if (IS_ERR_OR_NULL(reg->hr_debug_dir)) { | 2003 | if (!reg->hr_debug_dir) { |
2014 | ret = reg->hr_debug_dir ? PTR_ERR(reg->hr_debug_dir) : -ENOMEM; | ||
2015 | mlog_errno(ret); | 2004 | mlog_errno(ret); |
2016 | goto bail; | 2005 | goto bail; |
2017 | } | 2006 | } |
@@ -2024,9 +2013,7 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
2024 | O2HB_DB_TYPE_REGION_LIVENODES, | 2013 | O2HB_DB_TYPE_REGION_LIVENODES, |
2025 | sizeof(reg->hr_live_node_bitmap), | 2014 | sizeof(reg->hr_live_node_bitmap), |
2026 | O2NM_MAX_NODES, reg); | 2015 | O2NM_MAX_NODES, reg); |
2027 | if (IS_ERR_OR_NULL(reg->hr_debug_livenodes)) { | 2016 | if (!reg->hr_debug_livenodes) { |
2028 | ret = reg->hr_debug_livenodes ? | ||
2029 | PTR_ERR(reg->hr_debug_livenodes) : -ENOMEM; | ||
2030 | mlog_errno(ret); | 2017 | mlog_errno(ret); |
2031 | goto bail; | 2018 | goto bail; |
2032 | } | 2019 | } |
@@ -2038,9 +2025,7 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
2038 | sizeof(*(reg->hr_db_regnum)), | 2025 | sizeof(*(reg->hr_db_regnum)), |
2039 | O2HB_DB_TYPE_REGION_NUMBER, | 2026 | O2HB_DB_TYPE_REGION_NUMBER, |
2040 | 0, O2NM_MAX_NODES, reg); | 2027 | 0, O2NM_MAX_NODES, reg); |
2041 | if (IS_ERR_OR_NULL(reg->hr_debug_regnum)) { | 2028 | if (!reg->hr_debug_regnum) { |
2042 | ret = reg->hr_debug_regnum ? | ||
2043 | PTR_ERR(reg->hr_debug_regnum) : -ENOMEM; | ||
2044 | mlog_errno(ret); | 2029 | mlog_errno(ret); |
2045 | goto bail; | 2030 | goto bail; |
2046 | } | 2031 | } |
@@ -2052,9 +2037,7 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
2052 | sizeof(*(reg->hr_db_elapsed_time)), | 2037 | sizeof(*(reg->hr_db_elapsed_time)), |
2053 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, | 2038 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, |
2054 | 0, 0, reg); | 2039 | 0, 0, reg); |
2055 | if (IS_ERR_OR_NULL(reg->hr_debug_elapsed_time)) { | 2040 | if (!reg->hr_debug_elapsed_time) { |
2056 | ret = reg->hr_debug_elapsed_time ? | ||
2057 | PTR_ERR(reg->hr_debug_elapsed_time) : -ENOMEM; | ||
2058 | mlog_errno(ret); | 2041 | mlog_errno(ret); |
2059 | goto bail; | 2042 | goto bail; |
2060 | } | 2043 | } |
@@ -2066,16 +2049,13 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | |||
2066 | sizeof(*(reg->hr_db_pinned)), | 2049 | sizeof(*(reg->hr_db_pinned)), |
2067 | O2HB_DB_TYPE_REGION_PINNED, | 2050 | O2HB_DB_TYPE_REGION_PINNED, |
2068 | 0, 0, reg); | 2051 | 0, 0, reg); |
2069 | if (IS_ERR_OR_NULL(reg->hr_debug_pinned)) { | 2052 | if (!reg->hr_debug_pinned) { |
2070 | ret = reg->hr_debug_pinned ? | ||
2071 | PTR_ERR(reg->hr_debug_pinned) : -ENOMEM; | ||
2072 | mlog_errno(ret); | 2053 | mlog_errno(ret); |
2073 | goto bail; | 2054 | goto bail; |
2074 | } | 2055 | } |
2075 | 2056 | ||
2076 | return 0; | 2057 | ret = 0; |
2077 | bail: | 2058 | bail: |
2078 | debugfs_remove_recursive(reg->hr_debug_dir); | ||
2079 | return ret; | 2059 | return ret; |
2080 | } | 2060 | } |
2081 | 2061 | ||
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 956edf67be20..8b23aa2f52dd 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -2959,7 +2959,7 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) | |||
2959 | osb->osb_debug_root, | 2959 | osb->osb_debug_root, |
2960 | osb, | 2960 | osb, |
2961 | &ocfs2_dlm_debug_fops); | 2961 | &ocfs2_dlm_debug_fops); |
2962 | if (IS_ERR_OR_NULL(dlm_debug->d_locking_state)) { | 2962 | if (!dlm_debug->d_locking_state) { |
2963 | ret = -EINVAL; | 2963 | ret = -EINVAL; |
2964 | mlog(ML_ERROR, | 2964 | mlog(ML_ERROR, |
2965 | "Unable to create locking state debugfs file.\n"); | 2965 | "Unable to create locking state debugfs file.\n"); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 837ddce4b659..403c5660b306 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1112,7 +1112,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
1112 | 1112 | ||
1113 | osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, | 1113 | osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, |
1114 | ocfs2_debugfs_root); | 1114 | ocfs2_debugfs_root); |
1115 | if (IS_ERR_OR_NULL(osb->osb_debug_root)) { | 1115 | if (!osb->osb_debug_root) { |
1116 | status = -EINVAL; | 1116 | status = -EINVAL; |
1117 | mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); | 1117 | mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); |
1118 | goto read_super_error; | 1118 | goto read_super_error; |
@@ -1122,7 +1122,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
1122 | osb->osb_debug_root, | 1122 | osb->osb_debug_root, |
1123 | osb, | 1123 | osb, |
1124 | &ocfs2_osb_debug_fops); | 1124 | &ocfs2_osb_debug_fops); |
1125 | if (IS_ERR_OR_NULL(osb->osb_ctxt)) { | 1125 | if (!osb->osb_ctxt) { |
1126 | status = -EINVAL; | 1126 | status = -EINVAL; |
1127 | mlog_errno(status); | 1127 | mlog_errno(status); |
1128 | goto read_super_error; | 1128 | goto read_super_error; |
@@ -1606,9 +1606,8 @@ static int __init ocfs2_init(void) | |||
1606 | } | 1606 | } |
1607 | 1607 | ||
1608 | ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); | 1608 | ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); |
1609 | if (IS_ERR_OR_NULL(ocfs2_debugfs_root)) { | 1609 | if (!ocfs2_debugfs_root) { |
1610 | status = ocfs2_debugfs_root ? | 1610 | status = -ENOMEM; |
1611 | PTR_ERR(ocfs2_debugfs_root) : -ENOMEM; | ||
1612 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); | 1611 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); |
1613 | goto out4; | 1612 | goto out4; |
1614 | } | 1613 | } |
@@ -231,8 +231,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
231 | return -EINVAL; | 231 | return -EINVAL; |
232 | 232 | ||
233 | /* Return error if mode is not supported */ | 233 | /* Return error if mode is not supported */ |
234 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | | 234 | if (mode & ~FALLOC_FL_SUPPORTED_MASK) |
235 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) | ||
236 | return -EOPNOTSUPP; | 235 | return -EOPNOTSUPP; |
237 | 236 | ||
238 | /* Punch hole and zero range are mutually exclusive */ | 237 | /* Punch hole and zero range are mutually exclusive */ |
@@ -250,6 +249,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
250 | (mode & ~FALLOC_FL_COLLAPSE_RANGE)) | 249 | (mode & ~FALLOC_FL_COLLAPSE_RANGE)) |
251 | return -EINVAL; | 250 | return -EINVAL; |
252 | 251 | ||
252 | /* Insert range should only be used exclusively. */ | ||
253 | if ((mode & FALLOC_FL_INSERT_RANGE) && | ||
254 | (mode & ~FALLOC_FL_INSERT_RANGE)) | ||
255 | return -EINVAL; | ||
256 | |||
253 | if (!(file->f_mode & FMODE_WRITE)) | 257 | if (!(file->f_mode & FMODE_WRITE)) |
254 | return -EBADF; | 258 | return -EBADF; |
255 | 259 | ||
diff --git a/fs/pnode.c b/fs/pnode.c index 260ac8f898a4..6367e1e435c6 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
@@ -362,6 +362,46 @@ int propagate_mount_busy(struct mount *mnt, int refcnt) | |||
362 | } | 362 | } |
363 | 363 | ||
364 | /* | 364 | /* |
365 | * Clear MNT_LOCKED when it can be shown to be safe. | ||
366 | * | ||
367 | * mount_lock lock must be held for write | ||
368 | */ | ||
369 | void propagate_mount_unlock(struct mount *mnt) | ||
370 | { | ||
371 | struct mount *parent = mnt->mnt_parent; | ||
372 | struct mount *m, *child; | ||
373 | |||
374 | BUG_ON(parent == mnt); | ||
375 | |||
376 | for (m = propagation_next(parent, parent); m; | ||
377 | m = propagation_next(m, parent)) { | ||
378 | child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); | ||
379 | if (child) | ||
380 | child->mnt.mnt_flags &= ~MNT_LOCKED; | ||
381 | } | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted. | ||
386 | */ | ||
387 | static void mark_umount_candidates(struct mount *mnt) | ||
388 | { | ||
389 | struct mount *parent = mnt->mnt_parent; | ||
390 | struct mount *m; | ||
391 | |||
392 | BUG_ON(parent == mnt); | ||
393 | |||
394 | for (m = propagation_next(parent, parent); m; | ||
395 | m = propagation_next(m, parent)) { | ||
396 | struct mount *child = __lookup_mnt_last(&m->mnt, | ||
397 | mnt->mnt_mountpoint); | ||
398 | if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) { | ||
399 | SET_MNT_MARK(child); | ||
400 | } | ||
401 | } | ||
402 | } | ||
403 | |||
404 | /* | ||
365 | * NOTE: unmounting 'mnt' naturally propagates to all other mounts its | 405 | * NOTE: unmounting 'mnt' naturally propagates to all other mounts its |
366 | * parent propagates to. | 406 | * parent propagates to. |
367 | */ | 407 | */ |
@@ -378,13 +418,16 @@ static void __propagate_umount(struct mount *mnt) | |||
378 | struct mount *child = __lookup_mnt_last(&m->mnt, | 418 | struct mount *child = __lookup_mnt_last(&m->mnt, |
379 | mnt->mnt_mountpoint); | 419 | mnt->mnt_mountpoint); |
380 | /* | 420 | /* |
381 | * umount the child only if the child has no | 421 | * umount the child only if the child has no children |
382 | * other children | 422 | * and the child is marked safe to unmount. |
383 | */ | 423 | */ |
384 | if (child && list_empty(&child->mnt_mounts)) { | 424 | if (!child || !IS_MNT_MARKED(child)) |
425 | continue; | ||
426 | CLEAR_MNT_MARK(child); | ||
427 | if (list_empty(&child->mnt_mounts)) { | ||
385 | list_del_init(&child->mnt_child); | 428 | list_del_init(&child->mnt_child); |
386 | hlist_del_init_rcu(&child->mnt_hash); | 429 | child->mnt.mnt_flags |= MNT_UMOUNT; |
387 | hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); | 430 | list_move_tail(&child->mnt_list, &mnt->mnt_list); |
388 | } | 431 | } |
389 | } | 432 | } |
390 | } | 433 | } |
@@ -396,11 +439,14 @@ static void __propagate_umount(struct mount *mnt) | |||
396 | * | 439 | * |
397 | * vfsmount lock must be held for write | 440 | * vfsmount lock must be held for write |
398 | */ | 441 | */ |
399 | int propagate_umount(struct hlist_head *list) | 442 | int propagate_umount(struct list_head *list) |
400 | { | 443 | { |
401 | struct mount *mnt; | 444 | struct mount *mnt; |
402 | 445 | ||
403 | hlist_for_each_entry(mnt, list, mnt_hash) | 446 | list_for_each_entry_reverse(mnt, list, mnt_list) |
447 | mark_umount_candidates(mnt); | ||
448 | |||
449 | list_for_each_entry(mnt, list, mnt_list) | ||
404 | __propagate_umount(mnt); | 450 | __propagate_umount(mnt); |
405 | return 0; | 451 | return 0; |
406 | } | 452 | } |
diff --git a/fs/pnode.h b/fs/pnode.h index 4a246358b031..7114ce6e6b9e 100644 --- a/fs/pnode.h +++ b/fs/pnode.h | |||
@@ -19,6 +19,9 @@ | |||
19 | #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) | 19 | #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) |
20 | #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) | 20 | #define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) |
21 | #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) | 21 | #define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) |
22 | #define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED) | ||
23 | #define IS_MNT_LOCKED_AND_LAZY(m) \ | ||
24 | (((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED) | ||
22 | 25 | ||
23 | #define CL_EXPIRE 0x01 | 26 | #define CL_EXPIRE 0x01 |
24 | #define CL_SLAVE 0x02 | 27 | #define CL_SLAVE 0x02 |
@@ -40,14 +43,14 @@ static inline void set_mnt_shared(struct mount *mnt) | |||
40 | void change_mnt_propagation(struct mount *, int); | 43 | void change_mnt_propagation(struct mount *, int); |
41 | int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, | 44 | int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, |
42 | struct hlist_head *); | 45 | struct hlist_head *); |
43 | int propagate_umount(struct hlist_head *); | 46 | int propagate_umount(struct list_head *); |
44 | int propagate_mount_busy(struct mount *, int); | 47 | int propagate_mount_busy(struct mount *, int); |
48 | void propagate_mount_unlock(struct mount *); | ||
45 | void mnt_release_group_id(struct mount *); | 49 | void mnt_release_group_id(struct mount *); |
46 | int get_dominating_id(struct mount *mnt, const struct path *root); | 50 | int get_dominating_id(struct mount *mnt, const struct path *root); |
47 | unsigned int mnt_get_count(struct mount *mnt); | 51 | unsigned int mnt_get_count(struct mount *mnt); |
48 | void mnt_set_mountpoint(struct mount *, struct mountpoint *, | 52 | void mnt_set_mountpoint(struct mount *, struct mountpoint *, |
49 | struct mount *); | 53 | struct mount *); |
50 | void umount_tree(struct mount *, int); | ||
51 | struct mount *copy_tree(struct mount *, struct dentry *, int); | 54 | struct mount *copy_tree(struct mount *, struct dentry *, int); |
52 | bool is_path_reachable(struct mount *, struct dentry *, | 55 | bool is_path_reachable(struct mount *, struct dentry *, |
53 | const struct path *root); | 56 | const struct path *root); |
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index a6fbf4472017..516162be1398 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c | |||
@@ -260,6 +260,7 @@ xfs_alloc_fix_len( | |||
260 | rlen = rlen - (k - args->mod); | 260 | rlen = rlen - (k - args->mod); |
261 | else | 261 | else |
262 | rlen = rlen - args->prod + (args->mod - k); | 262 | rlen = rlen - args->prod + (args->mod - k); |
263 | /* casts to (int) catch length underflows */ | ||
263 | if ((int)rlen < (int)args->minlen) | 264 | if ((int)rlen < (int)args->minlen) |
264 | return; | 265 | return; |
265 | ASSERT(rlen >= args->minlen && rlen <= args->maxlen); | 266 | ASSERT(rlen >= args->minlen && rlen <= args->maxlen); |
@@ -286,7 +287,8 @@ xfs_alloc_fix_minleft( | |||
286 | if (diff >= 0) | 287 | if (diff >= 0) |
287 | return 1; | 288 | return 1; |
288 | args->len += diff; /* shrink the allocated space */ | 289 | args->len += diff; /* shrink the allocated space */ |
289 | if (args->len >= args->minlen) | 290 | /* casts to (int) catch length underflows */ |
291 | if ((int)args->len >= (int)args->minlen) | ||
290 | return 1; | 292 | return 1; |
291 | args->agbno = NULLAGBLOCK; | 293 | args->agbno = NULLAGBLOCK; |
292 | return 0; | 294 | return 0; |
@@ -315,6 +317,9 @@ xfs_alloc_fixup_trees( | |||
315 | xfs_agblock_t nfbno2; /* second new free startblock */ | 317 | xfs_agblock_t nfbno2; /* second new free startblock */ |
316 | xfs_extlen_t nflen1=0; /* first new free length */ | 318 | xfs_extlen_t nflen1=0; /* first new free length */ |
317 | xfs_extlen_t nflen2=0; /* second new free length */ | 319 | xfs_extlen_t nflen2=0; /* second new free length */ |
320 | struct xfs_mount *mp; | ||
321 | |||
322 | mp = cnt_cur->bc_mp; | ||
318 | 323 | ||
319 | /* | 324 | /* |
320 | * Look up the record in the by-size tree if necessary. | 325 | * Look up the record in the by-size tree if necessary. |
@@ -323,13 +328,13 @@ xfs_alloc_fixup_trees( | |||
323 | #ifdef DEBUG | 328 | #ifdef DEBUG |
324 | if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) | 329 | if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) |
325 | return error; | 330 | return error; |
326 | XFS_WANT_CORRUPTED_RETURN( | 331 | XFS_WANT_CORRUPTED_RETURN(mp, |
327 | i == 1 && nfbno1 == fbno && nflen1 == flen); | 332 | i == 1 && nfbno1 == fbno && nflen1 == flen); |
328 | #endif | 333 | #endif |
329 | } else { | 334 | } else { |
330 | if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) | 335 | if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) |
331 | return error; | 336 | return error; |
332 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 337 | XFS_WANT_CORRUPTED_RETURN(mp, i == 1); |
333 | } | 338 | } |
334 | /* | 339 | /* |
335 | * Look up the record in the by-block tree if necessary. | 340 | * Look up the record in the by-block tree if necessary. |
@@ -338,13 +343,13 @@ xfs_alloc_fixup_trees( | |||
338 | #ifdef DEBUG | 343 | #ifdef DEBUG |
339 | if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) | 344 | if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) |
340 | return error; | 345 | return error; |
341 | XFS_WANT_CORRUPTED_RETURN( | 346 | XFS_WANT_CORRUPTED_RETURN(mp, |
342 | i == 1 && nfbno1 == fbno && nflen1 == flen); | 347 | i == 1 && nfbno1 == fbno && nflen1 == flen); |
343 | #endif | 348 | #endif |
344 | } else { | 349 | } else { |
345 | if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) | 350 | if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) |
346 | return error; | 351 | return error; |
347 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 352 | XFS_WANT_CORRUPTED_RETURN(mp, i == 1); |
348 | } | 353 | } |
349 | 354 | ||
350 | #ifdef DEBUG | 355 | #ifdef DEBUG |
@@ -355,7 +360,7 @@ xfs_alloc_fixup_trees( | |||
355 | bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); | 360 | bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); |
356 | cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); | 361 | cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); |
357 | 362 | ||
358 | XFS_WANT_CORRUPTED_RETURN( | 363 | XFS_WANT_CORRUPTED_RETURN(mp, |
359 | bnoblock->bb_numrecs == cntblock->bb_numrecs); | 364 | bnoblock->bb_numrecs == cntblock->bb_numrecs); |
360 | } | 365 | } |
361 | #endif | 366 | #endif |
@@ -386,25 +391,25 @@ xfs_alloc_fixup_trees( | |||
386 | */ | 391 | */ |
387 | if ((error = xfs_btree_delete(cnt_cur, &i))) | 392 | if ((error = xfs_btree_delete(cnt_cur, &i))) |
388 | return error; | 393 | return error; |
389 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 394 | XFS_WANT_CORRUPTED_RETURN(mp, i == 1); |
390 | /* | 395 | /* |
391 | * Add new by-size btree entry(s). | 396 | * Add new by-size btree entry(s). |
392 | */ | 397 | */ |
393 | if (nfbno1 != NULLAGBLOCK) { | 398 | if (nfbno1 != NULLAGBLOCK) { |
394 | if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) | 399 | if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) |
395 | return error; | 400 | return error; |
396 | XFS_WANT_CORRUPTED_RETURN(i == 0); | 401 | XFS_WANT_CORRUPTED_RETURN(mp, i == 0); |
397 | if ((error = xfs_btree_insert(cnt_cur, &i))) | 402 | if ((error = xfs_btree_insert(cnt_cur, &i))) |
398 | return error; | 403 | return error; |
399 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 404 | XFS_WANT_CORRUPTED_RETURN(mp, i == 1); |
400 | } | 405 | } |
401 | if (nfbno2 != NULLAGBLOCK) { | 406 | if (nfbno2 != NULLAGBLOCK) { |
402 | if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) | 407 | if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) |
403 | return error; | 408 | return error; |
404 | XFS_WANT_CORRUPTED_RETURN(i == 0); | 409 | XFS_WANT_CORRUPTED_RETURN(mp, i == 0); |
405 | if ((error = xfs_btree_insert(cnt_cur, &i))) | 410 | if ((error = xfs_btree_insert(cnt_cur, &i))) |
406 | return error; | 411 | return error; |
407 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 412 | XFS_WANT_CORRUPTED_RETURN(mp, i == 1); |
408 | } | 413 | } |
409 | /* | 414 | /* |
410 | * Fix up the by-block btree entry(s). | 415 | * Fix up the by-block btree entry(s). |
@@ -415,7 +420,7 @@ xfs_alloc_fixup_trees( | |||
415 | */ | 420 | */ |
416 | if ((error = xfs_btree_delete(bno_cur, &i))) | 421 | if ((error = xfs_btree_delete(bno_cur, &i))) |
417 | return error; | 422 | return error; |
418 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 423 | XFS_WANT_CORRUPTED_RETURN(mp, i == 1); |
419 | } else { | 424 | } else { |
420 | /* | 425 | /* |
421 | * Update the by-block entry to start later|be shorter. | 426 | * Update the by-block entry to start later|be shorter. |
@@ -429,10 +434,10 @@ xfs_alloc_fixup_trees( | |||
429 | */ | 434 | */ |
430 | if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) | 435 | if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) |
431 | return error; | 436 | return error; |
432 | XFS_WANT_CORRUPTED_RETURN(i == 0); | 437 | XFS_WANT_CORRUPTED_RETURN(mp, i == 0); |
433 | if ((error = xfs_btree_insert(bno_cur, &i))) | 438 | if ((error = xfs_btree_insert(bno_cur, &i))) |
434 | return error; | 439 | return error; |
435 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 440 | XFS_WANT_CORRUPTED_RETURN(mp, i == 1); |
436 | } | 441 | } |
437 | return 0; | 442 | return 0; |
438 | } | 443 | } |
@@ -682,7 +687,7 @@ xfs_alloc_ag_vextent_exact( | |||
682 | error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); | 687 | error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); |
683 | if (error) | 688 | if (error) |
684 | goto error0; | 689 | goto error0; |
685 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 690 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
686 | ASSERT(fbno <= args->agbno); | 691 | ASSERT(fbno <= args->agbno); |
687 | 692 | ||
688 | /* | 693 | /* |
@@ -783,7 +788,7 @@ xfs_alloc_find_best_extent( | |||
783 | error = xfs_alloc_get_rec(*scur, sbno, slen, &i); | 788 | error = xfs_alloc_get_rec(*scur, sbno, slen, &i); |
784 | if (error) | 789 | if (error) |
785 | goto error0; | 790 | goto error0; |
786 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 791 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
787 | xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); | 792 | xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); |
788 | 793 | ||
789 | /* | 794 | /* |
@@ -946,7 +951,7 @@ restart: | |||
946 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, | 951 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, |
947 | <len, &i))) | 952 | <len, &i))) |
948 | goto error0; | 953 | goto error0; |
949 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 954 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
950 | if (ltlen >= args->minlen) | 955 | if (ltlen >= args->minlen) |
951 | break; | 956 | break; |
952 | if ((error = xfs_btree_increment(cnt_cur, 0, &i))) | 957 | if ((error = xfs_btree_increment(cnt_cur, 0, &i))) |
@@ -966,7 +971,7 @@ restart: | |||
966 | */ | 971 | */ |
967 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | 972 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) |
968 | goto error0; | 973 | goto error0; |
969 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 974 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
970 | xfs_alloc_compute_aligned(args, ltbno, ltlen, | 975 | xfs_alloc_compute_aligned(args, ltbno, ltlen, |
971 | <bnoa, <lena); | 976 | <bnoa, <lena); |
972 | if (ltlena < args->minlen) | 977 | if (ltlena < args->minlen) |
@@ -999,7 +1004,7 @@ restart: | |||
999 | cnt_cur->bc_ptrs[0] = besti; | 1004 | cnt_cur->bc_ptrs[0] = besti; |
1000 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | 1005 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) |
1001 | goto error0; | 1006 | goto error0; |
1002 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1007 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1003 | ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | 1008 | ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); |
1004 | args->len = blen; | 1009 | args->len = blen; |
1005 | if (!xfs_alloc_fix_minleft(args)) { | 1010 | if (!xfs_alloc_fix_minleft(args)) { |
@@ -1088,7 +1093,7 @@ restart: | |||
1088 | if (bno_cur_lt) { | 1093 | if (bno_cur_lt) { |
1089 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) | 1094 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) |
1090 | goto error0; | 1095 | goto error0; |
1091 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1096 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1092 | xfs_alloc_compute_aligned(args, ltbno, ltlen, | 1097 | xfs_alloc_compute_aligned(args, ltbno, ltlen, |
1093 | <bnoa, <lena); | 1098 | <bnoa, <lena); |
1094 | if (ltlena >= args->minlen) | 1099 | if (ltlena >= args->minlen) |
@@ -1104,7 +1109,7 @@ restart: | |||
1104 | if (bno_cur_gt) { | 1109 | if (bno_cur_gt) { |
1105 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) | 1110 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) |
1106 | goto error0; | 1111 | goto error0; |
1107 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1112 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1108 | xfs_alloc_compute_aligned(args, gtbno, gtlen, | 1113 | xfs_alloc_compute_aligned(args, gtbno, gtlen, |
1109 | >bnoa, >lena); | 1114 | >bnoa, >lena); |
1110 | if (gtlena >= args->minlen) | 1115 | if (gtlena >= args->minlen) |
@@ -1303,7 +1308,7 @@ restart: | |||
1303 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); | 1308 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); |
1304 | if (error) | 1309 | if (error) |
1305 | goto error0; | 1310 | goto error0; |
1306 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1311 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1307 | 1312 | ||
1308 | xfs_alloc_compute_aligned(args, fbno, flen, | 1313 | xfs_alloc_compute_aligned(args, fbno, flen, |
1309 | &rbno, &rlen); | 1314 | &rbno, &rlen); |
@@ -1342,7 +1347,7 @@ restart: | |||
1342 | * This can't happen in the second case above. | 1347 | * This can't happen in the second case above. |
1343 | */ | 1348 | */ |
1344 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1349 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1345 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | 1350 | XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || |
1346 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); | 1351 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); |
1347 | if (rlen < args->maxlen) { | 1352 | if (rlen < args->maxlen) { |
1348 | xfs_agblock_t bestfbno; | 1353 | xfs_agblock_t bestfbno; |
@@ -1362,13 +1367,13 @@ restart: | |||
1362 | if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, | 1367 | if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, |
1363 | &i))) | 1368 | &i))) |
1364 | goto error0; | 1369 | goto error0; |
1365 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1370 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1366 | if (flen < bestrlen) | 1371 | if (flen < bestrlen) |
1367 | break; | 1372 | break; |
1368 | xfs_alloc_compute_aligned(args, fbno, flen, | 1373 | xfs_alloc_compute_aligned(args, fbno, flen, |
1369 | &rbno, &rlen); | 1374 | &rbno, &rlen); |
1370 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1375 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1371 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | 1376 | XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || |
1372 | (rlen <= flen && rbno + rlen <= fbno + flen), | 1377 | (rlen <= flen && rbno + rlen <= fbno + flen), |
1373 | error0); | 1378 | error0); |
1374 | if (rlen > bestrlen) { | 1379 | if (rlen > bestrlen) { |
@@ -1383,7 +1388,7 @@ restart: | |||
1383 | if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, | 1388 | if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, |
1384 | &i))) | 1389 | &i))) |
1385 | goto error0; | 1390 | goto error0; |
1386 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1391 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1387 | rlen = bestrlen; | 1392 | rlen = bestrlen; |
1388 | rbno = bestrbno; | 1393 | rbno = bestrbno; |
1389 | flen = bestflen; | 1394 | flen = bestflen; |
@@ -1408,7 +1413,7 @@ restart: | |||
1408 | if (!xfs_alloc_fix_minleft(args)) | 1413 | if (!xfs_alloc_fix_minleft(args)) |
1409 | goto out_nominleft; | 1414 | goto out_nominleft; |
1410 | rlen = args->len; | 1415 | rlen = args->len; |
1411 | XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); | 1416 | XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); |
1412 | /* | 1417 | /* |
1413 | * Allocate and initialize a cursor for the by-block tree. | 1418 | * Allocate and initialize a cursor for the by-block tree. |
1414 | */ | 1419 | */ |
@@ -1422,7 +1427,7 @@ restart: | |||
1422 | cnt_cur = bno_cur = NULL; | 1427 | cnt_cur = bno_cur = NULL; |
1423 | args->len = rlen; | 1428 | args->len = rlen; |
1424 | args->agbno = rbno; | 1429 | args->agbno = rbno; |
1425 | XFS_WANT_CORRUPTED_GOTO( | 1430 | XFS_WANT_CORRUPTED_GOTO(args->mp, |
1426 | args->agbno + args->len <= | 1431 | args->agbno + args->len <= |
1427 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), | 1432 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), |
1428 | error0); | 1433 | error0); |
@@ -1467,7 +1472,7 @@ xfs_alloc_ag_vextent_small( | |||
1467 | if (i) { | 1472 | if (i) { |
1468 | if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) | 1473 | if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) |
1469 | goto error0; | 1474 | goto error0; |
1470 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1475 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1471 | } | 1476 | } |
1472 | /* | 1477 | /* |
1473 | * Nothing in the btree, try the freelist. Make sure | 1478 | * Nothing in the btree, try the freelist. Make sure |
@@ -1493,7 +1498,7 @@ xfs_alloc_ag_vextent_small( | |||
1493 | } | 1498 | } |
1494 | args->len = 1; | 1499 | args->len = 1; |
1495 | args->agbno = fbno; | 1500 | args->agbno = fbno; |
1496 | XFS_WANT_CORRUPTED_GOTO( | 1501 | XFS_WANT_CORRUPTED_GOTO(args->mp, |
1497 | args->agbno + args->len <= | 1502 | args->agbno + args->len <= |
1498 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), | 1503 | be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), |
1499 | error0); | 1504 | error0); |
@@ -1579,7 +1584,7 @@ xfs_free_ag_extent( | |||
1579 | */ | 1584 | */ |
1580 | if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) | 1585 | if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) |
1581 | goto error0; | 1586 | goto error0; |
1582 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1587 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1583 | /* | 1588 | /* |
1584 | * It's not contiguous, though. | 1589 | * It's not contiguous, though. |
1585 | */ | 1590 | */ |
@@ -1591,7 +1596,8 @@ xfs_free_ag_extent( | |||
1591 | * space was invalid, it's (partly) already free. | 1596 | * space was invalid, it's (partly) already free. |
1592 | * Very bad. | 1597 | * Very bad. |
1593 | */ | 1598 | */ |
1594 | XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); | 1599 | XFS_WANT_CORRUPTED_GOTO(mp, |
1600 | ltbno + ltlen <= bno, error0); | ||
1595 | } | 1601 | } |
1596 | } | 1602 | } |
1597 | /* | 1603 | /* |
@@ -1606,7 +1612,7 @@ xfs_free_ag_extent( | |||
1606 | */ | 1612 | */ |
1607 | if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) | 1613 | if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) |
1608 | goto error0; | 1614 | goto error0; |
1609 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1615 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1610 | /* | 1616 | /* |
1611 | * It's not contiguous, though. | 1617 | * It's not contiguous, though. |
1612 | */ | 1618 | */ |
@@ -1618,7 +1624,7 @@ xfs_free_ag_extent( | |||
1618 | * space was invalid, it's (partly) already free. | 1624 | * space was invalid, it's (partly) already free. |
1619 | * Very bad. | 1625 | * Very bad. |
1620 | */ | 1626 | */ |
1621 | XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); | 1627 | XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0); |
1622 | } | 1628 | } |
1623 | } | 1629 | } |
1624 | /* | 1630 | /* |
@@ -1635,31 +1641,31 @@ xfs_free_ag_extent( | |||
1635 | */ | 1641 | */ |
1636 | if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) | 1642 | if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) |
1637 | goto error0; | 1643 | goto error0; |
1638 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1644 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1639 | if ((error = xfs_btree_delete(cnt_cur, &i))) | 1645 | if ((error = xfs_btree_delete(cnt_cur, &i))) |
1640 | goto error0; | 1646 | goto error0; |
1641 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1647 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1642 | /* | 1648 | /* |
1643 | * Delete the old by-size entry on the right. | 1649 | * Delete the old by-size entry on the right. |
1644 | */ | 1650 | */ |
1645 | if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) | 1651 | if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) |
1646 | goto error0; | 1652 | goto error0; |
1647 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1653 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1648 | if ((error = xfs_btree_delete(cnt_cur, &i))) | 1654 | if ((error = xfs_btree_delete(cnt_cur, &i))) |
1649 | goto error0; | 1655 | goto error0; |
1650 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1656 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1651 | /* | 1657 | /* |
1652 | * Delete the old by-block entry for the right block. | 1658 | * Delete the old by-block entry for the right block. |
1653 | */ | 1659 | */ |
1654 | if ((error = xfs_btree_delete(bno_cur, &i))) | 1660 | if ((error = xfs_btree_delete(bno_cur, &i))) |
1655 | goto error0; | 1661 | goto error0; |
1656 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1662 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1657 | /* | 1663 | /* |
1658 | * Move the by-block cursor back to the left neighbor. | 1664 | * Move the by-block cursor back to the left neighbor. |
1659 | */ | 1665 | */ |
1660 | if ((error = xfs_btree_decrement(bno_cur, 0, &i))) | 1666 | if ((error = xfs_btree_decrement(bno_cur, 0, &i))) |
1661 | goto error0; | 1667 | goto error0; |
1662 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1668 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1663 | #ifdef DEBUG | 1669 | #ifdef DEBUG |
1664 | /* | 1670 | /* |
1665 | * Check that this is the right record: delete didn't | 1671 | * Check that this is the right record: delete didn't |
@@ -1672,7 +1678,7 @@ xfs_free_ag_extent( | |||
1672 | if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, | 1678 | if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, |
1673 | &i))) | 1679 | &i))) |
1674 | goto error0; | 1680 | goto error0; |
1675 | XFS_WANT_CORRUPTED_GOTO( | 1681 | XFS_WANT_CORRUPTED_GOTO(mp, |
1676 | i == 1 && xxbno == ltbno && xxlen == ltlen, | 1682 | i == 1 && xxbno == ltbno && xxlen == ltlen, |
1677 | error0); | 1683 | error0); |
1678 | } | 1684 | } |
@@ -1695,17 +1701,17 @@ xfs_free_ag_extent( | |||
1695 | */ | 1701 | */ |
1696 | if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) | 1702 | if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) |
1697 | goto error0; | 1703 | goto error0; |
1698 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1704 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1699 | if ((error = xfs_btree_delete(cnt_cur, &i))) | 1705 | if ((error = xfs_btree_delete(cnt_cur, &i))) |
1700 | goto error0; | 1706 | goto error0; |
1701 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1707 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1702 | /* | 1708 | /* |
1703 | * Back up the by-block cursor to the left neighbor, and | 1709 | * Back up the by-block cursor to the left neighbor, and |
1704 | * update its length. | 1710 | * update its length. |
1705 | */ | 1711 | */ |
1706 | if ((error = xfs_btree_decrement(bno_cur, 0, &i))) | 1712 | if ((error = xfs_btree_decrement(bno_cur, 0, &i))) |
1707 | goto error0; | 1713 | goto error0; |
1708 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1714 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1709 | nbno = ltbno; | 1715 | nbno = ltbno; |
1710 | nlen = len + ltlen; | 1716 | nlen = len + ltlen; |
1711 | if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) | 1717 | if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) |
@@ -1721,10 +1727,10 @@ xfs_free_ag_extent( | |||
1721 | */ | 1727 | */ |
1722 | if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) | 1728 | if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) |
1723 | goto error0; | 1729 | goto error0; |
1724 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1730 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1725 | if ((error = xfs_btree_delete(cnt_cur, &i))) | 1731 | if ((error = xfs_btree_delete(cnt_cur, &i))) |
1726 | goto error0; | 1732 | goto error0; |
1727 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1733 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1728 | /* | 1734 | /* |
1729 | * Update the starting block and length of the right | 1735 | * Update the starting block and length of the right |
1730 | * neighbor in the by-block tree. | 1736 | * neighbor in the by-block tree. |
@@ -1743,7 +1749,7 @@ xfs_free_ag_extent( | |||
1743 | nlen = len; | 1749 | nlen = len; |
1744 | if ((error = xfs_btree_insert(bno_cur, &i))) | 1750 | if ((error = xfs_btree_insert(bno_cur, &i))) |
1745 | goto error0; | 1751 | goto error0; |
1746 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1752 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1747 | } | 1753 | } |
1748 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); | 1754 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); |
1749 | bno_cur = NULL; | 1755 | bno_cur = NULL; |
@@ -1752,10 +1758,10 @@ xfs_free_ag_extent( | |||
1752 | */ | 1758 | */ |
1753 | if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) | 1759 | if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) |
1754 | goto error0; | 1760 | goto error0; |
1755 | XFS_WANT_CORRUPTED_GOTO(i == 0, error0); | 1761 | XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0); |
1756 | if ((error = xfs_btree_insert(cnt_cur, &i))) | 1762 | if ((error = xfs_btree_insert(cnt_cur, &i))) |
1757 | goto error0; | 1763 | goto error0; |
1758 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1764 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1759 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1765 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1760 | cnt_cur = NULL; | 1766 | cnt_cur = NULL; |
1761 | 1767 | ||
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 15105dbc9e28..04e79d57bca6 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c | |||
@@ -86,8 +86,83 @@ STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args, | |||
86 | int move_count); | 86 | int move_count); |
87 | STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); | 87 | STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); |
88 | 88 | ||
89 | /* | ||
90 | * attr3 block 'firstused' conversion helpers. | ||
91 | * | ||
92 | * firstused refers to the offset of the first used byte of the nameval region | ||
93 | * of an attr leaf block. The region starts at the tail of the block and expands | ||
94 | * backwards towards the middle. As such, firstused is initialized to the block | ||
95 | * size for an empty leaf block and is reduced from there. | ||
96 | * | ||
97 | * The attr3 block size is pegged to the fsb size and the maximum fsb is 64k. | ||
98 | * The in-core firstused field is 32-bit and thus supports the maximum fsb size. | ||
99 | * The on-disk field is only 16-bit, however, and overflows at 64k. Since this | ||
100 | * only occurs at exactly 64k, we use zero as a magic on-disk value to represent | ||
101 | * the attr block size. The following helpers manage the conversion between the | ||
102 | * in-core and on-disk formats. | ||
103 | */ | ||
104 | |||
105 | static void | ||
106 | xfs_attr3_leaf_firstused_from_disk( | ||
107 | struct xfs_da_geometry *geo, | ||
108 | struct xfs_attr3_icleaf_hdr *to, | ||
109 | struct xfs_attr_leafblock *from) | ||
110 | { | ||
111 | struct xfs_attr3_leaf_hdr *hdr3; | ||
112 | |||
113 | if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) { | ||
114 | hdr3 = (struct xfs_attr3_leaf_hdr *) from; | ||
115 | to->firstused = be16_to_cpu(hdr3->firstused); | ||
116 | } else { | ||
117 | to->firstused = be16_to_cpu(from->hdr.firstused); | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * Convert from the magic fsb size value to actual blocksize. This | ||
122 | * should only occur for empty blocks when the block size overflows | ||
123 | * 16-bits. | ||
124 | */ | ||
125 | if (to->firstused == XFS_ATTR3_LEAF_NULLOFF) { | ||
126 | ASSERT(!to->count && !to->usedbytes); | ||
127 | ASSERT(geo->blksize > USHRT_MAX); | ||
128 | to->firstused = geo->blksize; | ||
129 | } | ||
130 | } | ||
131 | |||
132 | static void | ||
133 | xfs_attr3_leaf_firstused_to_disk( | ||
134 | struct xfs_da_geometry *geo, | ||
135 | struct xfs_attr_leafblock *to, | ||
136 | struct xfs_attr3_icleaf_hdr *from) | ||
137 | { | ||
138 | struct xfs_attr3_leaf_hdr *hdr3; | ||
139 | uint32_t firstused; | ||
140 | |||
141 | /* magic value should only be seen on disk */ | ||
142 | ASSERT(from->firstused != XFS_ATTR3_LEAF_NULLOFF); | ||
143 | |||
144 | /* | ||
145 | * Scale down the 32-bit in-core firstused value to the 16-bit on-disk | ||
146 | * value. This only overflows at the max supported value of 64k. Use the | ||
147 | * magic on-disk value to represent block size in this case. | ||
148 | */ | ||
149 | firstused = from->firstused; | ||
150 | if (firstused > USHRT_MAX) { | ||
151 | ASSERT(from->firstused == geo->blksize); | ||
152 | firstused = XFS_ATTR3_LEAF_NULLOFF; | ||
153 | } | ||
154 | |||
155 | if (from->magic == XFS_ATTR3_LEAF_MAGIC) { | ||
156 | hdr3 = (struct xfs_attr3_leaf_hdr *) to; | ||
157 | hdr3->firstused = cpu_to_be16(firstused); | ||
158 | } else { | ||
159 | to->hdr.firstused = cpu_to_be16(firstused); | ||
160 | } | ||
161 | } | ||
162 | |||
89 | void | 163 | void |
90 | xfs_attr3_leaf_hdr_from_disk( | 164 | xfs_attr3_leaf_hdr_from_disk( |
165 | struct xfs_da_geometry *geo, | ||
91 | struct xfs_attr3_icleaf_hdr *to, | 166 | struct xfs_attr3_icleaf_hdr *to, |
92 | struct xfs_attr_leafblock *from) | 167 | struct xfs_attr_leafblock *from) |
93 | { | 168 | { |
@@ -104,7 +179,7 @@ xfs_attr3_leaf_hdr_from_disk( | |||
104 | to->magic = be16_to_cpu(hdr3->info.hdr.magic); | 179 | to->magic = be16_to_cpu(hdr3->info.hdr.magic); |
105 | to->count = be16_to_cpu(hdr3->count); | 180 | to->count = be16_to_cpu(hdr3->count); |
106 | to->usedbytes = be16_to_cpu(hdr3->usedbytes); | 181 | to->usedbytes = be16_to_cpu(hdr3->usedbytes); |
107 | to->firstused = be16_to_cpu(hdr3->firstused); | 182 | xfs_attr3_leaf_firstused_from_disk(geo, to, from); |
108 | to->holes = hdr3->holes; | 183 | to->holes = hdr3->holes; |
109 | 184 | ||
110 | for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { | 185 | for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { |
@@ -118,7 +193,7 @@ xfs_attr3_leaf_hdr_from_disk( | |||
118 | to->magic = be16_to_cpu(from->hdr.info.magic); | 193 | to->magic = be16_to_cpu(from->hdr.info.magic); |
119 | to->count = be16_to_cpu(from->hdr.count); | 194 | to->count = be16_to_cpu(from->hdr.count); |
120 | to->usedbytes = be16_to_cpu(from->hdr.usedbytes); | 195 | to->usedbytes = be16_to_cpu(from->hdr.usedbytes); |
121 | to->firstused = be16_to_cpu(from->hdr.firstused); | 196 | xfs_attr3_leaf_firstused_from_disk(geo, to, from); |
122 | to->holes = from->hdr.holes; | 197 | to->holes = from->hdr.holes; |
123 | 198 | ||
124 | for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { | 199 | for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { |
@@ -129,10 +204,11 @@ xfs_attr3_leaf_hdr_from_disk( | |||
129 | 204 | ||
130 | void | 205 | void |
131 | xfs_attr3_leaf_hdr_to_disk( | 206 | xfs_attr3_leaf_hdr_to_disk( |
207 | struct xfs_da_geometry *geo, | ||
132 | struct xfs_attr_leafblock *to, | 208 | struct xfs_attr_leafblock *to, |
133 | struct xfs_attr3_icleaf_hdr *from) | 209 | struct xfs_attr3_icleaf_hdr *from) |
134 | { | 210 | { |
135 | int i; | 211 | int i; |
136 | 212 | ||
137 | ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC || | 213 | ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC || |
138 | from->magic == XFS_ATTR3_LEAF_MAGIC); | 214 | from->magic == XFS_ATTR3_LEAF_MAGIC); |
@@ -145,7 +221,7 @@ xfs_attr3_leaf_hdr_to_disk( | |||
145 | hdr3->info.hdr.magic = cpu_to_be16(from->magic); | 221 | hdr3->info.hdr.magic = cpu_to_be16(from->magic); |
146 | hdr3->count = cpu_to_be16(from->count); | 222 | hdr3->count = cpu_to_be16(from->count); |
147 | hdr3->usedbytes = cpu_to_be16(from->usedbytes); | 223 | hdr3->usedbytes = cpu_to_be16(from->usedbytes); |
148 | hdr3->firstused = cpu_to_be16(from->firstused); | 224 | xfs_attr3_leaf_firstused_to_disk(geo, to, from); |
149 | hdr3->holes = from->holes; | 225 | hdr3->holes = from->holes; |
150 | hdr3->pad1 = 0; | 226 | hdr3->pad1 = 0; |
151 | 227 | ||
@@ -160,7 +236,7 @@ xfs_attr3_leaf_hdr_to_disk( | |||
160 | to->hdr.info.magic = cpu_to_be16(from->magic); | 236 | to->hdr.info.magic = cpu_to_be16(from->magic); |
161 | to->hdr.count = cpu_to_be16(from->count); | 237 | to->hdr.count = cpu_to_be16(from->count); |
162 | to->hdr.usedbytes = cpu_to_be16(from->usedbytes); | 238 | to->hdr.usedbytes = cpu_to_be16(from->usedbytes); |
163 | to->hdr.firstused = cpu_to_be16(from->firstused); | 239 | xfs_attr3_leaf_firstused_to_disk(geo, to, from); |
164 | to->hdr.holes = from->holes; | 240 | to->hdr.holes = from->holes; |
165 | to->hdr.pad1 = 0; | 241 | to->hdr.pad1 = 0; |
166 | 242 | ||
@@ -178,7 +254,7 @@ xfs_attr3_leaf_verify( | |||
178 | struct xfs_attr_leafblock *leaf = bp->b_addr; | 254 | struct xfs_attr_leafblock *leaf = bp->b_addr; |
179 | struct xfs_attr3_icleaf_hdr ichdr; | 255 | struct xfs_attr3_icleaf_hdr ichdr; |
180 | 256 | ||
181 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | 257 | xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); |
182 | 258 | ||
183 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | 259 | if (xfs_sb_version_hascrc(&mp->m_sb)) { |
184 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; | 260 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; |
@@ -757,9 +833,10 @@ xfs_attr_shortform_allfit( | |||
757 | struct xfs_attr3_icleaf_hdr leafhdr; | 833 | struct xfs_attr3_icleaf_hdr leafhdr; |
758 | int bytes; | 834 | int bytes; |
759 | int i; | 835 | int i; |
836 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
760 | 837 | ||
761 | leaf = bp->b_addr; | 838 | leaf = bp->b_addr; |
762 | xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); | 839 | xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); |
763 | entry = xfs_attr3_leaf_entryp(leaf); | 840 | entry = xfs_attr3_leaf_entryp(leaf); |
764 | 841 | ||
765 | bytes = sizeof(struct xfs_attr_sf_hdr); | 842 | bytes = sizeof(struct xfs_attr_sf_hdr); |
@@ -812,7 +889,7 @@ xfs_attr3_leaf_to_shortform( | |||
812 | memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); | 889 | memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); |
813 | 890 | ||
814 | leaf = (xfs_attr_leafblock_t *)tmpbuffer; | 891 | leaf = (xfs_attr_leafblock_t *)tmpbuffer; |
815 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | 892 | xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); |
816 | entry = xfs_attr3_leaf_entryp(leaf); | 893 | entry = xfs_attr3_leaf_entryp(leaf); |
817 | 894 | ||
818 | /* XXX (dgc): buffer is about to be marked stale - why zero it? */ | 895 | /* XXX (dgc): buffer is about to be marked stale - why zero it? */ |
@@ -923,7 +1000,7 @@ xfs_attr3_leaf_to_node( | |||
923 | btree = dp->d_ops->node_tree_p(node); | 1000 | btree = dp->d_ops->node_tree_p(node); |
924 | 1001 | ||
925 | leaf = bp2->b_addr; | 1002 | leaf = bp2->b_addr; |
926 | xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf); | 1003 | xfs_attr3_leaf_hdr_from_disk(args->geo, &icleafhdr, leaf); |
927 | entries = xfs_attr3_leaf_entryp(leaf); | 1004 | entries = xfs_attr3_leaf_entryp(leaf); |
928 | 1005 | ||
929 | /* both on-disk, don't endian-flip twice */ | 1006 | /* both on-disk, don't endian-flip twice */ |
@@ -988,7 +1065,7 @@ xfs_attr3_leaf_create( | |||
988 | } | 1065 | } |
989 | ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base; | 1066 | ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base; |
990 | 1067 | ||
991 | xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); | 1068 | xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr); |
992 | xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1); | 1069 | xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1); |
993 | 1070 | ||
994 | *bpp = bp; | 1071 | *bpp = bp; |
@@ -1073,7 +1150,7 @@ xfs_attr3_leaf_add( | |||
1073 | trace_xfs_attr_leaf_add(args); | 1150 | trace_xfs_attr_leaf_add(args); |
1074 | 1151 | ||
1075 | leaf = bp->b_addr; | 1152 | leaf = bp->b_addr; |
1076 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | 1153 | xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); |
1077 | ASSERT(args->index >= 0 && args->index <= ichdr.count); | 1154 | ASSERT(args->index >= 0 && args->index <= ichdr.count); |
1078 | entsize = xfs_attr_leaf_newentsize(args, NULL); | 1155 | entsize = xfs_attr_leaf_newentsize(args, NULL); |
1079 | 1156 | ||
@@ -1126,7 +1203,7 @@ xfs_attr3_leaf_add( | |||
1126 | tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); | 1203 | tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); |
1127 | 1204 | ||
1128 | out_log_hdr: | 1205 | out_log_hdr: |
1129 | xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); | 1206 | xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr); |
1130 | xfs_trans_log_buf(args->trans, bp, | 1207 | xfs_trans_log_buf(args->trans, bp, |
1131 | XFS_DA_LOGRANGE(leaf, &leaf->hdr, | 1208 | XFS_DA_LOGRANGE(leaf, &leaf->hdr, |
1132 | xfs_attr3_leaf_hdr_size(leaf))); | 1209 | xfs_attr3_leaf_hdr_size(leaf))); |
@@ -1294,7 +1371,7 @@ xfs_attr3_leaf_compact( | |||
1294 | ichdr_dst->freemap[0].base; | 1371 | ichdr_dst->freemap[0].base; |
1295 | 1372 | ||
1296 | /* write the header back to initialise the underlying buffer */ | 1373 | /* write the header back to initialise the underlying buffer */ |
1297 | xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst); | 1374 | xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst); |
1298 | 1375 | ||
1299 | /* | 1376 | /* |
1300 | * Copy all entry's in the same (sorted) order, | 1377 | * Copy all entry's in the same (sorted) order, |
@@ -1344,9 +1421,10 @@ xfs_attr_leaf_order( | |||
1344 | { | 1421 | { |
1345 | struct xfs_attr3_icleaf_hdr ichdr1; | 1422 | struct xfs_attr3_icleaf_hdr ichdr1; |
1346 | struct xfs_attr3_icleaf_hdr ichdr2; | 1423 | struct xfs_attr3_icleaf_hdr ichdr2; |
1424 | struct xfs_mount *mp = leaf1_bp->b_target->bt_mount; | ||
1347 | 1425 | ||
1348 | xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr); | 1426 | xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr); |
1349 | xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr); | 1427 | xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr); |
1350 | return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2); | 1428 | return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2); |
1351 | } | 1429 | } |
1352 | 1430 | ||
@@ -1388,8 +1466,8 @@ xfs_attr3_leaf_rebalance( | |||
1388 | ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); | 1466 | ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); |
1389 | leaf1 = blk1->bp->b_addr; | 1467 | leaf1 = blk1->bp->b_addr; |
1390 | leaf2 = blk2->bp->b_addr; | 1468 | leaf2 = blk2->bp->b_addr; |
1391 | xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); | 1469 | xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr1, leaf1); |
1392 | xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); | 1470 | xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr2, leaf2); |
1393 | ASSERT(ichdr2.count == 0); | 1471 | ASSERT(ichdr2.count == 0); |
1394 | args = state->args; | 1472 | args = state->args; |
1395 | 1473 | ||
@@ -1490,8 +1568,8 @@ xfs_attr3_leaf_rebalance( | |||
1490 | ichdr1.count, count); | 1568 | ichdr1.count, count); |
1491 | } | 1569 | } |
1492 | 1570 | ||
1493 | xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1); | 1571 | xfs_attr3_leaf_hdr_to_disk(state->args->geo, leaf1, &ichdr1); |
1494 | xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2); | 1572 | xfs_attr3_leaf_hdr_to_disk(state->args->geo, leaf2, &ichdr2); |
1495 | xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1); | 1573 | xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1); |
1496 | xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1); | 1574 | xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1); |
1497 | 1575 | ||
@@ -1684,7 +1762,7 @@ xfs_attr3_leaf_toosmall( | |||
1684 | */ | 1762 | */ |
1685 | blk = &state->path.blk[ state->path.active-1 ]; | 1763 | blk = &state->path.blk[ state->path.active-1 ]; |
1686 | leaf = blk->bp->b_addr; | 1764 | leaf = blk->bp->b_addr; |
1687 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | 1765 | xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf); |
1688 | bytes = xfs_attr3_leaf_hdr_size(leaf) + | 1766 | bytes = xfs_attr3_leaf_hdr_size(leaf) + |
1689 | ichdr.count * sizeof(xfs_attr_leaf_entry_t) + | 1767 | ichdr.count * sizeof(xfs_attr_leaf_entry_t) + |
1690 | ichdr.usedbytes; | 1768 | ichdr.usedbytes; |
@@ -1740,7 +1818,7 @@ xfs_attr3_leaf_toosmall( | |||
1740 | if (error) | 1818 | if (error) |
1741 | return error; | 1819 | return error; |
1742 | 1820 | ||
1743 | xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr); | 1821 | xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr2, bp->b_addr); |
1744 | 1822 | ||
1745 | bytes = state->args->geo->blksize - | 1823 | bytes = state->args->geo->blksize - |
1746 | (state->args->geo->blksize >> 2) - | 1824 | (state->args->geo->blksize >> 2) - |
@@ -1805,7 +1883,7 @@ xfs_attr3_leaf_remove( | |||
1805 | trace_xfs_attr_leaf_remove(args); | 1883 | trace_xfs_attr_leaf_remove(args); |
1806 | 1884 | ||
1807 | leaf = bp->b_addr; | 1885 | leaf = bp->b_addr; |
1808 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | 1886 | xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); |
1809 | 1887 | ||
1810 | ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); | 1888 | ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); |
1811 | ASSERT(args->index >= 0 && args->index < ichdr.count); | 1889 | ASSERT(args->index >= 0 && args->index < ichdr.count); |
@@ -1918,12 +1996,11 @@ xfs_attr3_leaf_remove( | |||
1918 | tmp = be16_to_cpu(entry->nameidx); | 1996 | tmp = be16_to_cpu(entry->nameidx); |
1919 | } | 1997 | } |
1920 | ichdr.firstused = tmp; | 1998 | ichdr.firstused = tmp; |
1921 | if (!ichdr.firstused) | 1999 | ASSERT(ichdr.firstused != 0); |
1922 | ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN; | ||
1923 | } else { | 2000 | } else { |
1924 | ichdr.holes = 1; /* mark as needing compaction */ | 2001 | ichdr.holes = 1; /* mark as needing compaction */ |
1925 | } | 2002 | } |
1926 | xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); | 2003 | xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr); |
1927 | xfs_trans_log_buf(args->trans, bp, | 2004 | xfs_trans_log_buf(args->trans, bp, |
1928 | XFS_DA_LOGRANGE(leaf, &leaf->hdr, | 2005 | XFS_DA_LOGRANGE(leaf, &leaf->hdr, |
1929 | xfs_attr3_leaf_hdr_size(leaf))); | 2006 | xfs_attr3_leaf_hdr_size(leaf))); |
@@ -1957,8 +2034,8 @@ xfs_attr3_leaf_unbalance( | |||
1957 | 2034 | ||
1958 | drop_leaf = drop_blk->bp->b_addr; | 2035 | drop_leaf = drop_blk->bp->b_addr; |
1959 | save_leaf = save_blk->bp->b_addr; | 2036 | save_leaf = save_blk->bp->b_addr; |
1960 | xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf); | 2037 | xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf); |
1961 | xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf); | 2038 | xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf); |
1962 | entry = xfs_attr3_leaf_entryp(drop_leaf); | 2039 | entry = xfs_attr3_leaf_entryp(drop_leaf); |
1963 | 2040 | ||
1964 | /* | 2041 | /* |
@@ -2012,7 +2089,7 @@ xfs_attr3_leaf_unbalance( | |||
2012 | tmphdr.firstused = state->args->geo->blksize; | 2089 | tmphdr.firstused = state->args->geo->blksize; |
2013 | 2090 | ||
2014 | /* write the header to the temp buffer to initialise it */ | 2091 | /* write the header to the temp buffer to initialise it */ |
2015 | xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr); | 2092 | xfs_attr3_leaf_hdr_to_disk(state->args->geo, tmp_leaf, &tmphdr); |
2016 | 2093 | ||
2017 | if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, | 2094 | if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, |
2018 | drop_blk->bp, &drophdr)) { | 2095 | drop_blk->bp, &drophdr)) { |
@@ -2039,7 +2116,7 @@ xfs_attr3_leaf_unbalance( | |||
2039 | kmem_free(tmp_leaf); | 2116 | kmem_free(tmp_leaf); |
2040 | } | 2117 | } |
2041 | 2118 | ||
2042 | xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr); | 2119 | xfs_attr3_leaf_hdr_to_disk(state->args->geo, save_leaf, &savehdr); |
2043 | xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, | 2120 | xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, |
2044 | state->args->geo->blksize - 1); | 2121 | state->args->geo->blksize - 1); |
2045 | 2122 | ||
@@ -2085,7 +2162,7 @@ xfs_attr3_leaf_lookup_int( | |||
2085 | trace_xfs_attr_leaf_lookup(args); | 2162 | trace_xfs_attr_leaf_lookup(args); |
2086 | 2163 | ||
2087 | leaf = bp->b_addr; | 2164 | leaf = bp->b_addr; |
2088 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | 2165 | xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); |
2089 | entries = xfs_attr3_leaf_entryp(leaf); | 2166 | entries = xfs_attr3_leaf_entryp(leaf); |
2090 | ASSERT(ichdr.count < args->geo->blksize / 8); | 2167 | ASSERT(ichdr.count < args->geo->blksize / 8); |
2091 | 2168 | ||
@@ -2190,7 +2267,7 @@ xfs_attr3_leaf_getvalue( | |||
2190 | int valuelen; | 2267 | int valuelen; |
2191 | 2268 | ||
2192 | leaf = bp->b_addr; | 2269 | leaf = bp->b_addr; |
2193 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | 2270 | xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); |
2194 | ASSERT(ichdr.count < args->geo->blksize / 8); | 2271 | ASSERT(ichdr.count < args->geo->blksize / 8); |
2195 | ASSERT(args->index < ichdr.count); | 2272 | ASSERT(args->index < ichdr.count); |
2196 | 2273 | ||
@@ -2391,8 +2468,9 @@ xfs_attr_leaf_lasthash( | |||
2391 | { | 2468 | { |
2392 | struct xfs_attr3_icleaf_hdr ichdr; | 2469 | struct xfs_attr3_icleaf_hdr ichdr; |
2393 | struct xfs_attr_leaf_entry *entries; | 2470 | struct xfs_attr_leaf_entry *entries; |
2471 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
2394 | 2472 | ||
2395 | xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr); | 2473 | xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr); |
2396 | entries = xfs_attr3_leaf_entryp(bp->b_addr); | 2474 | entries = xfs_attr3_leaf_entryp(bp->b_addr); |
2397 | if (count) | 2475 | if (count) |
2398 | *count = ichdr.count; | 2476 | *count = ichdr.count; |
@@ -2486,7 +2564,7 @@ xfs_attr3_leaf_clearflag( | |||
2486 | ASSERT(entry->flags & XFS_ATTR_INCOMPLETE); | 2564 | ASSERT(entry->flags & XFS_ATTR_INCOMPLETE); |
2487 | 2565 | ||
2488 | #ifdef DEBUG | 2566 | #ifdef DEBUG |
2489 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | 2567 | xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); |
2490 | ASSERT(args->index < ichdr.count); | 2568 | ASSERT(args->index < ichdr.count); |
2491 | ASSERT(args->index >= 0); | 2569 | ASSERT(args->index >= 0); |
2492 | 2570 | ||
@@ -2550,7 +2628,7 @@ xfs_attr3_leaf_setflag( | |||
2550 | 2628 | ||
2551 | leaf = bp->b_addr; | 2629 | leaf = bp->b_addr; |
2552 | #ifdef DEBUG | 2630 | #ifdef DEBUG |
2553 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | 2631 | xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); |
2554 | ASSERT(args->index < ichdr.count); | 2632 | ASSERT(args->index < ichdr.count); |
2555 | ASSERT(args->index >= 0); | 2633 | ASSERT(args->index >= 0); |
2556 | #endif | 2634 | #endif |
@@ -2629,11 +2707,11 @@ xfs_attr3_leaf_flipflags( | |||
2629 | entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2]; | 2707 | entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2]; |
2630 | 2708 | ||
2631 | #ifdef DEBUG | 2709 | #ifdef DEBUG |
2632 | xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); | 2710 | xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr1, leaf1); |
2633 | ASSERT(args->index < ichdr1.count); | 2711 | ASSERT(args->index < ichdr1.count); |
2634 | ASSERT(args->index >= 0); | 2712 | ASSERT(args->index >= 0); |
2635 | 2713 | ||
2636 | xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); | 2714 | xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr2, leaf2); |
2637 | ASSERT(args->index2 < ichdr2.count); | 2715 | ASSERT(args->index2 < ichdr2.count); |
2638 | ASSERT(args->index2 >= 0); | 2716 | ASSERT(args->index2 >= 0); |
2639 | 2717 | ||
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index e2929da7c3ba..025c4b820c03 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h | |||
@@ -100,9 +100,11 @@ int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local); | |||
100 | int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, | 100 | int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, |
101 | xfs_dablk_t bno, xfs_daddr_t mappedbno, | 101 | xfs_dablk_t bno, xfs_daddr_t mappedbno, |
102 | struct xfs_buf **bpp); | 102 | struct xfs_buf **bpp); |
103 | void xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to, | 103 | void xfs_attr3_leaf_hdr_from_disk(struct xfs_da_geometry *geo, |
104 | struct xfs_attr3_icleaf_hdr *to, | ||
104 | struct xfs_attr_leafblock *from); | 105 | struct xfs_attr_leafblock *from); |
105 | void xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to, | 106 | void xfs_attr3_leaf_hdr_to_disk(struct xfs_da_geometry *geo, |
107 | struct xfs_attr_leafblock *to, | ||
106 | struct xfs_attr3_icleaf_hdr *from); | 108 | struct xfs_attr3_icleaf_hdr *from); |
107 | 109 | ||
108 | #endif /* __XFS_ATTR_LEAF_H__ */ | 110 | #endif /* __XFS_ATTR_LEAF_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 61ec015dca16..aeffeaaac0ec 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -244,30 +244,6 @@ xfs_bmap_forkoff_reset( | |||
244 | } | 244 | } |
245 | } | 245 | } |
246 | 246 | ||
247 | /* | ||
248 | * Debug/sanity checking code | ||
249 | */ | ||
250 | |||
251 | STATIC int | ||
252 | xfs_bmap_sanity_check( | ||
253 | struct xfs_mount *mp, | ||
254 | struct xfs_buf *bp, | ||
255 | int level) | ||
256 | { | ||
257 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | ||
258 | |||
259 | if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) && | ||
260 | block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC)) | ||
261 | return 0; | ||
262 | |||
263 | if (be16_to_cpu(block->bb_level) != level || | ||
264 | be16_to_cpu(block->bb_numrecs) == 0 || | ||
265 | be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) | ||
266 | return 0; | ||
267 | |||
268 | return 1; | ||
269 | } | ||
270 | |||
271 | #ifdef DEBUG | 247 | #ifdef DEBUG |
272 | STATIC struct xfs_buf * | 248 | STATIC struct xfs_buf * |
273 | xfs_bmap_get_bp( | 249 | xfs_bmap_get_bp( |
@@ -410,9 +386,6 @@ xfs_bmap_check_leaf_extents( | |||
410 | goto error_norelse; | 386 | goto error_norelse; |
411 | } | 387 | } |
412 | block = XFS_BUF_TO_BLOCK(bp); | 388 | block = XFS_BUF_TO_BLOCK(bp); |
413 | XFS_WANT_CORRUPTED_GOTO( | ||
414 | xfs_bmap_sanity_check(mp, bp, level), | ||
415 | error0); | ||
416 | if (level == 0) | 389 | if (level == 0) |
417 | break; | 390 | break; |
418 | 391 | ||
@@ -424,7 +397,8 @@ xfs_bmap_check_leaf_extents( | |||
424 | xfs_check_block(block, mp, 0, 0); | 397 | xfs_check_block(block, mp, 0, 0); |
425 | pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); | 398 | pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); |
426 | bno = be64_to_cpu(*pp); | 399 | bno = be64_to_cpu(*pp); |
427 | XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); | 400 | XFS_WANT_CORRUPTED_GOTO(mp, |
401 | XFS_FSB_SANITY_CHECK(mp, bno), error0); | ||
428 | if (bp_release) { | 402 | if (bp_release) { |
429 | bp_release = 0; | 403 | bp_release = 0; |
430 | xfs_trans_brelse(NULL, bp); | 404 | xfs_trans_brelse(NULL, bp); |
@@ -1029,7 +1003,7 @@ xfs_bmap_add_attrfork_btree( | |||
1029 | if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) | 1003 | if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) |
1030 | goto error0; | 1004 | goto error0; |
1031 | /* must be at least one entry */ | 1005 | /* must be at least one entry */ |
1032 | XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); | 1006 | XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0); |
1033 | if ((error = xfs_btree_new_iroot(cur, flags, &stat))) | 1007 | if ((error = xfs_btree_new_iroot(cur, flags, &stat))) |
1034 | goto error0; | 1008 | goto error0; |
1035 | if (stat == 0) { | 1009 | if (stat == 0) { |
@@ -1311,14 +1285,12 @@ xfs_bmap_read_extents( | |||
1311 | if (error) | 1285 | if (error) |
1312 | return error; | 1286 | return error; |
1313 | block = XFS_BUF_TO_BLOCK(bp); | 1287 | block = XFS_BUF_TO_BLOCK(bp); |
1314 | XFS_WANT_CORRUPTED_GOTO( | ||
1315 | xfs_bmap_sanity_check(mp, bp, level), | ||
1316 | error0); | ||
1317 | if (level == 0) | 1288 | if (level == 0) |
1318 | break; | 1289 | break; |
1319 | pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); | 1290 | pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); |
1320 | bno = be64_to_cpu(*pp); | 1291 | bno = be64_to_cpu(*pp); |
1321 | XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); | 1292 | XFS_WANT_CORRUPTED_GOTO(mp, |
1293 | XFS_FSB_SANITY_CHECK(mp, bno), error0); | ||
1322 | xfs_trans_brelse(tp, bp); | 1294 | xfs_trans_brelse(tp, bp); |
1323 | } | 1295 | } |
1324 | /* | 1296 | /* |
@@ -1345,9 +1317,6 @@ xfs_bmap_read_extents( | |||
1345 | XFS_ERRLEVEL_LOW, ip->i_mount, block); | 1317 | XFS_ERRLEVEL_LOW, ip->i_mount, block); |
1346 | goto error0; | 1318 | goto error0; |
1347 | } | 1319 | } |
1348 | XFS_WANT_CORRUPTED_GOTO( | ||
1349 | xfs_bmap_sanity_check(mp, bp, 0), | ||
1350 | error0); | ||
1351 | /* | 1320 | /* |
1352 | * Read-ahead the next leaf block, if any. | 1321 | * Read-ahead the next leaf block, if any. |
1353 | */ | 1322 | */ |
@@ -1755,7 +1724,9 @@ xfs_bmap_add_extent_delay_real( | |||
1755 | xfs_filblks_t temp=0; /* value for da_new calculations */ | 1724 | xfs_filblks_t temp=0; /* value for da_new calculations */ |
1756 | xfs_filblks_t temp2=0;/* value for da_new calculations */ | 1725 | xfs_filblks_t temp2=0;/* value for da_new calculations */ |
1757 | int tmp_rval; /* partial logging flags */ | 1726 | int tmp_rval; /* partial logging flags */ |
1727 | struct xfs_mount *mp; | ||
1758 | 1728 | ||
1729 | mp = bma->tp ? bma->tp->t_mountp : NULL; | ||
1759 | ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); | 1730 | ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); |
1760 | 1731 | ||
1761 | ASSERT(bma->idx >= 0); | 1732 | ASSERT(bma->idx >= 0); |
@@ -1866,15 +1837,15 @@ xfs_bmap_add_extent_delay_real( | |||
1866 | RIGHT.br_blockcount, &i); | 1837 | RIGHT.br_blockcount, &i); |
1867 | if (error) | 1838 | if (error) |
1868 | goto done; | 1839 | goto done; |
1869 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1840 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
1870 | error = xfs_btree_delete(bma->cur, &i); | 1841 | error = xfs_btree_delete(bma->cur, &i); |
1871 | if (error) | 1842 | if (error) |
1872 | goto done; | 1843 | goto done; |
1873 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1844 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
1874 | error = xfs_btree_decrement(bma->cur, 0, &i); | 1845 | error = xfs_btree_decrement(bma->cur, 0, &i); |
1875 | if (error) | 1846 | if (error) |
1876 | goto done; | 1847 | goto done; |
1877 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1848 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
1878 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, | 1849 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, |
1879 | LEFT.br_startblock, | 1850 | LEFT.br_startblock, |
1880 | LEFT.br_blockcount + | 1851 | LEFT.br_blockcount + |
@@ -1907,7 +1878,7 @@ xfs_bmap_add_extent_delay_real( | |||
1907 | &i); | 1878 | &i); |
1908 | if (error) | 1879 | if (error) |
1909 | goto done; | 1880 | goto done; |
1910 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1881 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
1911 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, | 1882 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, |
1912 | LEFT.br_startblock, | 1883 | LEFT.br_startblock, |
1913 | LEFT.br_blockcount + | 1884 | LEFT.br_blockcount + |
@@ -1938,7 +1909,7 @@ xfs_bmap_add_extent_delay_real( | |||
1938 | RIGHT.br_blockcount, &i); | 1909 | RIGHT.br_blockcount, &i); |
1939 | if (error) | 1910 | if (error) |
1940 | goto done; | 1911 | goto done; |
1941 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1912 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
1942 | error = xfs_bmbt_update(bma->cur, PREV.br_startoff, | 1913 | error = xfs_bmbt_update(bma->cur, PREV.br_startoff, |
1943 | new->br_startblock, | 1914 | new->br_startblock, |
1944 | PREV.br_blockcount + | 1915 | PREV.br_blockcount + |
@@ -1968,12 +1939,12 @@ xfs_bmap_add_extent_delay_real( | |||
1968 | &i); | 1939 | &i); |
1969 | if (error) | 1940 | if (error) |
1970 | goto done; | 1941 | goto done; |
1971 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 1942 | XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); |
1972 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; | 1943 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; |
1973 | error = xfs_btree_insert(bma->cur, &i); | 1944 | error = xfs_btree_insert(bma->cur, &i); |
1974 | if (error) | 1945 | if (error) |
1975 | goto done; | 1946 | goto done; |
1976 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1947 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
1977 | } | 1948 | } |
1978 | break; | 1949 | break; |
1979 | 1950 | ||
@@ -2001,7 +1972,7 @@ xfs_bmap_add_extent_delay_real( | |||
2001 | &i); | 1972 | &i); |
2002 | if (error) | 1973 | if (error) |
2003 | goto done; | 1974 | goto done; |
2004 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 1975 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2005 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, | 1976 | error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, |
2006 | LEFT.br_startblock, | 1977 | LEFT.br_startblock, |
2007 | LEFT.br_blockcount + | 1978 | LEFT.br_blockcount + |
@@ -2038,12 +2009,12 @@ xfs_bmap_add_extent_delay_real( | |||
2038 | &i); | 2009 | &i); |
2039 | if (error) | 2010 | if (error) |
2040 | goto done; | 2011 | goto done; |
2041 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 2012 | XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); |
2042 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; | 2013 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; |
2043 | error = xfs_btree_insert(bma->cur, &i); | 2014 | error = xfs_btree_insert(bma->cur, &i); |
2044 | if (error) | 2015 | if (error) |
2045 | goto done; | 2016 | goto done; |
2046 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2017 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2047 | } | 2018 | } |
2048 | 2019 | ||
2049 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { | 2020 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { |
@@ -2084,7 +2055,7 @@ xfs_bmap_add_extent_delay_real( | |||
2084 | RIGHT.br_blockcount, &i); | 2055 | RIGHT.br_blockcount, &i); |
2085 | if (error) | 2056 | if (error) |
2086 | goto done; | 2057 | goto done; |
2087 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2058 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2088 | error = xfs_bmbt_update(bma->cur, new->br_startoff, | 2059 | error = xfs_bmbt_update(bma->cur, new->br_startoff, |
2089 | new->br_startblock, | 2060 | new->br_startblock, |
2090 | new->br_blockcount + | 2061 | new->br_blockcount + |
@@ -2122,12 +2093,12 @@ xfs_bmap_add_extent_delay_real( | |||
2122 | &i); | 2093 | &i); |
2123 | if (error) | 2094 | if (error) |
2124 | goto done; | 2095 | goto done; |
2125 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 2096 | XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); |
2126 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; | 2097 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; |
2127 | error = xfs_btree_insert(bma->cur, &i); | 2098 | error = xfs_btree_insert(bma->cur, &i); |
2128 | if (error) | 2099 | if (error) |
2129 | goto done; | 2100 | goto done; |
2130 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2101 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2131 | } | 2102 | } |
2132 | 2103 | ||
2133 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { | 2104 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { |
@@ -2191,12 +2162,12 @@ xfs_bmap_add_extent_delay_real( | |||
2191 | &i); | 2162 | &i); |
2192 | if (error) | 2163 | if (error) |
2193 | goto done; | 2164 | goto done; |
2194 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 2165 | XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); |
2195 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; | 2166 | bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; |
2196 | error = xfs_btree_insert(bma->cur, &i); | 2167 | error = xfs_btree_insert(bma->cur, &i); |
2197 | if (error) | 2168 | if (error) |
2198 | goto done; | 2169 | goto done; |
2199 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2170 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2200 | } | 2171 | } |
2201 | 2172 | ||
2202 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { | 2173 | if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { |
@@ -2212,9 +2183,8 @@ xfs_bmap_add_extent_delay_real( | |||
2212 | diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - | 2183 | diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - |
2213 | (bma->cur ? bma->cur->bc_private.b.allocated : 0)); | 2184 | (bma->cur ? bma->cur->bc_private.b.allocated : 0)); |
2214 | if (diff > 0) { | 2185 | if (diff > 0) { |
2215 | error = xfs_icsb_modify_counters(bma->ip->i_mount, | 2186 | error = xfs_mod_fdblocks(bma->ip->i_mount, |
2216 | XFS_SBS_FDBLOCKS, | 2187 | -((int64_t)diff), false); |
2217 | -((int64_t)diff), 0); | ||
2218 | ASSERT(!error); | 2188 | ASSERT(!error); |
2219 | if (error) | 2189 | if (error) |
2220 | goto done; | 2190 | goto done; |
@@ -2265,9 +2235,8 @@ xfs_bmap_add_extent_delay_real( | |||
2265 | temp += bma->cur->bc_private.b.allocated; | 2235 | temp += bma->cur->bc_private.b.allocated; |
2266 | ASSERT(temp <= da_old); | 2236 | ASSERT(temp <= da_old); |
2267 | if (temp < da_old) | 2237 | if (temp < da_old) |
2268 | xfs_icsb_modify_counters(bma->ip->i_mount, | 2238 | xfs_mod_fdblocks(bma->ip->i_mount, |
2269 | XFS_SBS_FDBLOCKS, | 2239 | (int64_t)(da_old - temp), false); |
2270 | (int64_t)(da_old - temp), 0); | ||
2271 | } | 2240 | } |
2272 | 2241 | ||
2273 | /* clear out the allocated field, done with it now in any case. */ | 2242 | /* clear out the allocated field, done with it now in any case. */ |
@@ -2309,6 +2278,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2309 | /* left is 0, right is 1, prev is 2 */ | 2278 | /* left is 0, right is 1, prev is 2 */ |
2310 | int rval=0; /* return value (logging flags) */ | 2279 | int rval=0; /* return value (logging flags) */ |
2311 | int state = 0;/* state bits, accessed thru macros */ | 2280 | int state = 0;/* state bits, accessed thru macros */ |
2281 | struct xfs_mount *mp = tp->t_mountp; | ||
2312 | 2282 | ||
2313 | *logflagsp = 0; | 2283 | *logflagsp = 0; |
2314 | 2284 | ||
@@ -2421,19 +2391,19 @@ xfs_bmap_add_extent_unwritten_real( | |||
2421 | RIGHT.br_startblock, | 2391 | RIGHT.br_startblock, |
2422 | RIGHT.br_blockcount, &i))) | 2392 | RIGHT.br_blockcount, &i))) |
2423 | goto done; | 2393 | goto done; |
2424 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2394 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2425 | if ((error = xfs_btree_delete(cur, &i))) | 2395 | if ((error = xfs_btree_delete(cur, &i))) |
2426 | goto done; | 2396 | goto done; |
2427 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2397 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2428 | if ((error = xfs_btree_decrement(cur, 0, &i))) | 2398 | if ((error = xfs_btree_decrement(cur, 0, &i))) |
2429 | goto done; | 2399 | goto done; |
2430 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2400 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2431 | if ((error = xfs_btree_delete(cur, &i))) | 2401 | if ((error = xfs_btree_delete(cur, &i))) |
2432 | goto done; | 2402 | goto done; |
2433 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2403 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2434 | if ((error = xfs_btree_decrement(cur, 0, &i))) | 2404 | if ((error = xfs_btree_decrement(cur, 0, &i))) |
2435 | goto done; | 2405 | goto done; |
2436 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2406 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2437 | if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, | 2407 | if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, |
2438 | LEFT.br_startblock, | 2408 | LEFT.br_startblock, |
2439 | LEFT.br_blockcount + PREV.br_blockcount + | 2409 | LEFT.br_blockcount + PREV.br_blockcount + |
@@ -2464,13 +2434,13 @@ xfs_bmap_add_extent_unwritten_real( | |||
2464 | PREV.br_startblock, PREV.br_blockcount, | 2434 | PREV.br_startblock, PREV.br_blockcount, |
2465 | &i))) | 2435 | &i))) |
2466 | goto done; | 2436 | goto done; |
2467 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2437 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2468 | if ((error = xfs_btree_delete(cur, &i))) | 2438 | if ((error = xfs_btree_delete(cur, &i))) |
2469 | goto done; | 2439 | goto done; |
2470 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2440 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2471 | if ((error = xfs_btree_decrement(cur, 0, &i))) | 2441 | if ((error = xfs_btree_decrement(cur, 0, &i))) |
2472 | goto done; | 2442 | goto done; |
2473 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2443 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2474 | if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, | 2444 | if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, |
2475 | LEFT.br_startblock, | 2445 | LEFT.br_startblock, |
2476 | LEFT.br_blockcount + PREV.br_blockcount, | 2446 | LEFT.br_blockcount + PREV.br_blockcount, |
@@ -2499,13 +2469,13 @@ xfs_bmap_add_extent_unwritten_real( | |||
2499 | RIGHT.br_startblock, | 2469 | RIGHT.br_startblock, |
2500 | RIGHT.br_blockcount, &i))) | 2470 | RIGHT.br_blockcount, &i))) |
2501 | goto done; | 2471 | goto done; |
2502 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2472 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2503 | if ((error = xfs_btree_delete(cur, &i))) | 2473 | if ((error = xfs_btree_delete(cur, &i))) |
2504 | goto done; | 2474 | goto done; |
2505 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2475 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2506 | if ((error = xfs_btree_decrement(cur, 0, &i))) | 2476 | if ((error = xfs_btree_decrement(cur, 0, &i))) |
2507 | goto done; | 2477 | goto done; |
2508 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2478 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2509 | if ((error = xfs_bmbt_update(cur, new->br_startoff, | 2479 | if ((error = xfs_bmbt_update(cur, new->br_startoff, |
2510 | new->br_startblock, | 2480 | new->br_startblock, |
2511 | new->br_blockcount + RIGHT.br_blockcount, | 2481 | new->br_blockcount + RIGHT.br_blockcount, |
@@ -2532,7 +2502,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2532 | new->br_startblock, new->br_blockcount, | 2502 | new->br_startblock, new->br_blockcount, |
2533 | &i))) | 2503 | &i))) |
2534 | goto done; | 2504 | goto done; |
2535 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2505 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2536 | if ((error = xfs_bmbt_update(cur, new->br_startoff, | 2506 | if ((error = xfs_bmbt_update(cur, new->br_startoff, |
2537 | new->br_startblock, new->br_blockcount, | 2507 | new->br_startblock, new->br_blockcount, |
2538 | newext))) | 2508 | newext))) |
@@ -2569,7 +2539,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2569 | PREV.br_startblock, PREV.br_blockcount, | 2539 | PREV.br_startblock, PREV.br_blockcount, |
2570 | &i))) | 2540 | &i))) |
2571 | goto done; | 2541 | goto done; |
2572 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2542 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2573 | if ((error = xfs_bmbt_update(cur, | 2543 | if ((error = xfs_bmbt_update(cur, |
2574 | PREV.br_startoff + new->br_blockcount, | 2544 | PREV.br_startoff + new->br_blockcount, |
2575 | PREV.br_startblock + new->br_blockcount, | 2545 | PREV.br_startblock + new->br_blockcount, |
@@ -2611,7 +2581,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2611 | PREV.br_startblock, PREV.br_blockcount, | 2581 | PREV.br_startblock, PREV.br_blockcount, |
2612 | &i))) | 2582 | &i))) |
2613 | goto done; | 2583 | goto done; |
2614 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2584 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2615 | if ((error = xfs_bmbt_update(cur, | 2585 | if ((error = xfs_bmbt_update(cur, |
2616 | PREV.br_startoff + new->br_blockcount, | 2586 | PREV.br_startoff + new->br_blockcount, |
2617 | PREV.br_startblock + new->br_blockcount, | 2587 | PREV.br_startblock + new->br_blockcount, |
@@ -2621,7 +2591,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2621 | cur->bc_rec.b = *new; | 2591 | cur->bc_rec.b = *new; |
2622 | if ((error = xfs_btree_insert(cur, &i))) | 2592 | if ((error = xfs_btree_insert(cur, &i))) |
2623 | goto done; | 2593 | goto done; |
2624 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2594 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2625 | } | 2595 | } |
2626 | break; | 2596 | break; |
2627 | 2597 | ||
@@ -2651,7 +2621,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2651 | PREV.br_startblock, | 2621 | PREV.br_startblock, |
2652 | PREV.br_blockcount, &i))) | 2622 | PREV.br_blockcount, &i))) |
2653 | goto done; | 2623 | goto done; |
2654 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2624 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2655 | if ((error = xfs_bmbt_update(cur, PREV.br_startoff, | 2625 | if ((error = xfs_bmbt_update(cur, PREV.br_startoff, |
2656 | PREV.br_startblock, | 2626 | PREV.br_startblock, |
2657 | PREV.br_blockcount - new->br_blockcount, | 2627 | PREV.br_blockcount - new->br_blockcount, |
@@ -2689,7 +2659,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2689 | PREV.br_startblock, PREV.br_blockcount, | 2659 | PREV.br_startblock, PREV.br_blockcount, |
2690 | &i))) | 2660 | &i))) |
2691 | goto done; | 2661 | goto done; |
2692 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2662 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2693 | if ((error = xfs_bmbt_update(cur, PREV.br_startoff, | 2663 | if ((error = xfs_bmbt_update(cur, PREV.br_startoff, |
2694 | PREV.br_startblock, | 2664 | PREV.br_startblock, |
2695 | PREV.br_blockcount - new->br_blockcount, | 2665 | PREV.br_blockcount - new->br_blockcount, |
@@ -2699,11 +2669,11 @@ xfs_bmap_add_extent_unwritten_real( | |||
2699 | new->br_startblock, new->br_blockcount, | 2669 | new->br_startblock, new->br_blockcount, |
2700 | &i))) | 2670 | &i))) |
2701 | goto done; | 2671 | goto done; |
2702 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 2672 | XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); |
2703 | cur->bc_rec.b.br_state = XFS_EXT_NORM; | 2673 | cur->bc_rec.b.br_state = XFS_EXT_NORM; |
2704 | if ((error = xfs_btree_insert(cur, &i))) | 2674 | if ((error = xfs_btree_insert(cur, &i))) |
2705 | goto done; | 2675 | goto done; |
2706 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2676 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2707 | } | 2677 | } |
2708 | break; | 2678 | break; |
2709 | 2679 | ||
@@ -2737,7 +2707,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2737 | PREV.br_startblock, PREV.br_blockcount, | 2707 | PREV.br_startblock, PREV.br_blockcount, |
2738 | &i))) | 2708 | &i))) |
2739 | goto done; | 2709 | goto done; |
2740 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2710 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2741 | /* new right extent - oldext */ | 2711 | /* new right extent - oldext */ |
2742 | if ((error = xfs_bmbt_update(cur, r[1].br_startoff, | 2712 | if ((error = xfs_bmbt_update(cur, r[1].br_startoff, |
2743 | r[1].br_startblock, r[1].br_blockcount, | 2713 | r[1].br_startblock, r[1].br_blockcount, |
@@ -2749,7 +2719,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
2749 | new->br_startoff - PREV.br_startoff; | 2719 | new->br_startoff - PREV.br_startoff; |
2750 | if ((error = xfs_btree_insert(cur, &i))) | 2720 | if ((error = xfs_btree_insert(cur, &i))) |
2751 | goto done; | 2721 | goto done; |
2752 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2722 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2753 | /* | 2723 | /* |
2754 | * Reset the cursor to the position of the new extent | 2724 | * Reset the cursor to the position of the new extent |
2755 | * we are about to insert as we can't trust it after | 2725 | * we are about to insert as we can't trust it after |
@@ -2759,12 +2729,12 @@ xfs_bmap_add_extent_unwritten_real( | |||
2759 | new->br_startblock, new->br_blockcount, | 2729 | new->br_startblock, new->br_blockcount, |
2760 | &i))) | 2730 | &i))) |
2761 | goto done; | 2731 | goto done; |
2762 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 2732 | XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); |
2763 | /* new middle extent - newext */ | 2733 | /* new middle extent - newext */ |
2764 | cur->bc_rec.b.br_state = new->br_state; | 2734 | cur->bc_rec.b.br_state = new->br_state; |
2765 | if ((error = xfs_btree_insert(cur, &i))) | 2735 | if ((error = xfs_btree_insert(cur, &i))) |
2766 | goto done; | 2736 | goto done; |
2767 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 2737 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
2768 | } | 2738 | } |
2769 | break; | 2739 | break; |
2770 | 2740 | ||
@@ -2944,8 +2914,8 @@ xfs_bmap_add_extent_hole_delay( | |||
2944 | } | 2914 | } |
2945 | if (oldlen != newlen) { | 2915 | if (oldlen != newlen) { |
2946 | ASSERT(oldlen > newlen); | 2916 | ASSERT(oldlen > newlen); |
2947 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, | 2917 | xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen), |
2948 | (int64_t)(oldlen - newlen), 0); | 2918 | false); |
2949 | /* | 2919 | /* |
2950 | * Nothing to do for disk quota accounting here. | 2920 | * Nothing to do for disk quota accounting here. |
2951 | */ | 2921 | */ |
@@ -2968,7 +2938,9 @@ xfs_bmap_add_extent_hole_real( | |||
2968 | xfs_bmbt_irec_t right; /* right neighbor extent entry */ | 2938 | xfs_bmbt_irec_t right; /* right neighbor extent entry */ |
2969 | int rval=0; /* return value (logging flags) */ | 2939 | int rval=0; /* return value (logging flags) */ |
2970 | int state; /* state bits, accessed thru macros */ | 2940 | int state; /* state bits, accessed thru macros */ |
2941 | struct xfs_mount *mp; | ||
2971 | 2942 | ||
2943 | mp = bma->tp ? bma->tp->t_mountp : NULL; | ||
2972 | ifp = XFS_IFORK_PTR(bma->ip, whichfork); | 2944 | ifp = XFS_IFORK_PTR(bma->ip, whichfork); |
2973 | 2945 | ||
2974 | ASSERT(bma->idx >= 0); | 2946 | ASSERT(bma->idx >= 0); |
@@ -3056,15 +3028,15 @@ xfs_bmap_add_extent_hole_real( | |||
3056 | &i); | 3028 | &i); |
3057 | if (error) | 3029 | if (error) |
3058 | goto done; | 3030 | goto done; |
3059 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 3031 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
3060 | error = xfs_btree_delete(bma->cur, &i); | 3032 | error = xfs_btree_delete(bma->cur, &i); |
3061 | if (error) | 3033 | if (error) |
3062 | goto done; | 3034 | goto done; |
3063 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 3035 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
3064 | error = xfs_btree_decrement(bma->cur, 0, &i); | 3036 | error = xfs_btree_decrement(bma->cur, 0, &i); |
3065 | if (error) | 3037 | if (error) |
3066 | goto done; | 3038 | goto done; |
3067 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 3039 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
3068 | error = xfs_bmbt_update(bma->cur, left.br_startoff, | 3040 | error = xfs_bmbt_update(bma->cur, left.br_startoff, |
3069 | left.br_startblock, | 3041 | left.br_startblock, |
3070 | left.br_blockcount + | 3042 | left.br_blockcount + |
@@ -3097,7 +3069,7 @@ xfs_bmap_add_extent_hole_real( | |||
3097 | &i); | 3069 | &i); |
3098 | if (error) | 3070 | if (error) |
3099 | goto done; | 3071 | goto done; |
3100 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 3072 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
3101 | error = xfs_bmbt_update(bma->cur, left.br_startoff, | 3073 | error = xfs_bmbt_update(bma->cur, left.br_startoff, |
3102 | left.br_startblock, | 3074 | left.br_startblock, |
3103 | left.br_blockcount + | 3075 | left.br_blockcount + |
@@ -3131,7 +3103,7 @@ xfs_bmap_add_extent_hole_real( | |||
3131 | right.br_blockcount, &i); | 3103 | right.br_blockcount, &i); |
3132 | if (error) | 3104 | if (error) |
3133 | goto done; | 3105 | goto done; |
3134 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 3106 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
3135 | error = xfs_bmbt_update(bma->cur, new->br_startoff, | 3107 | error = xfs_bmbt_update(bma->cur, new->br_startoff, |
3136 | new->br_startblock, | 3108 | new->br_startblock, |
3137 | new->br_blockcount + | 3109 | new->br_blockcount + |
@@ -3161,12 +3133,12 @@ xfs_bmap_add_extent_hole_real( | |||
3161 | new->br_blockcount, &i); | 3133 | new->br_blockcount, &i); |
3162 | if (error) | 3134 | if (error) |
3163 | goto done; | 3135 | goto done; |
3164 | XFS_WANT_CORRUPTED_GOTO(i == 0, done); | 3136 | XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); |
3165 | bma->cur->bc_rec.b.br_state = new->br_state; | 3137 | bma->cur->bc_rec.b.br_state = new->br_state; |
3166 | error = xfs_btree_insert(bma->cur, &i); | 3138 | error = xfs_btree_insert(bma->cur, &i); |
3167 | if (error) | 3139 | if (error) |
3168 | goto done; | 3140 | goto done; |
3169 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 3141 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
3170 | } | 3142 | } |
3171 | break; | 3143 | break; |
3172 | } | 3144 | } |
@@ -4160,18 +4132,15 @@ xfs_bmapi_reserve_delalloc( | |||
4160 | ASSERT(indlen > 0); | 4132 | ASSERT(indlen > 0); |
4161 | 4133 | ||
4162 | if (rt) { | 4134 | if (rt) { |
4163 | error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, | 4135 | error = xfs_mod_frextents(mp, -((int64_t)extsz)); |
4164 | -((int64_t)extsz), 0); | ||
4165 | } else { | 4136 | } else { |
4166 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | 4137 | error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); |
4167 | -((int64_t)alen), 0); | ||
4168 | } | 4138 | } |
4169 | 4139 | ||
4170 | if (error) | 4140 | if (error) |
4171 | goto out_unreserve_quota; | 4141 | goto out_unreserve_quota; |
4172 | 4142 | ||
4173 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | 4143 | error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false); |
4174 | -((int64_t)indlen), 0); | ||
4175 | if (error) | 4144 | if (error) |
4176 | goto out_unreserve_blocks; | 4145 | goto out_unreserve_blocks; |
4177 | 4146 | ||
@@ -4198,9 +4167,9 @@ xfs_bmapi_reserve_delalloc( | |||
4198 | 4167 | ||
4199 | out_unreserve_blocks: | 4168 | out_unreserve_blocks: |
4200 | if (rt) | 4169 | if (rt) |
4201 | xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0); | 4170 | xfs_mod_frextents(mp, extsz); |
4202 | else | 4171 | else |
4203 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); | 4172 | xfs_mod_fdblocks(mp, alen, false); |
4204 | out_unreserve_quota: | 4173 | out_unreserve_quota: |
4205 | if (XFS_IS_QUOTA_ON(mp)) | 4174 | if (XFS_IS_QUOTA_ON(mp)) |
4206 | xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? | 4175 | xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? |
@@ -4801,7 +4770,7 @@ xfs_bmap_del_extent( | |||
4801 | got.br_startblock, got.br_blockcount, | 4770 | got.br_startblock, got.br_blockcount, |
4802 | &i))) | 4771 | &i))) |
4803 | goto done; | 4772 | goto done; |
4804 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 4773 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
4805 | } | 4774 | } |
4806 | da_old = da_new = 0; | 4775 | da_old = da_new = 0; |
4807 | } else { | 4776 | } else { |
@@ -4835,7 +4804,7 @@ xfs_bmap_del_extent( | |||
4835 | } | 4804 | } |
4836 | if ((error = xfs_btree_delete(cur, &i))) | 4805 | if ((error = xfs_btree_delete(cur, &i))) |
4837 | goto done; | 4806 | goto done; |
4838 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 4807 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
4839 | break; | 4808 | break; |
4840 | 4809 | ||
4841 | case 2: | 4810 | case 2: |
@@ -4935,7 +4904,8 @@ xfs_bmap_del_extent( | |||
4935 | got.br_startblock, | 4904 | got.br_startblock, |
4936 | temp, &i))) | 4905 | temp, &i))) |
4937 | goto done; | 4906 | goto done; |
4938 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 4907 | XFS_WANT_CORRUPTED_GOTO(mp, |
4908 | i == 1, done); | ||
4939 | /* | 4909 | /* |
4940 | * Update the btree record back | 4910 | * Update the btree record back |
4941 | * to the original value. | 4911 | * to the original value. |
@@ -4956,7 +4926,7 @@ xfs_bmap_del_extent( | |||
4956 | error = -ENOSPC; | 4926 | error = -ENOSPC; |
4957 | goto done; | 4927 | goto done; |
4958 | } | 4928 | } |
4959 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 4929 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); |
4960 | } else | 4930 | } else |
4961 | flags |= xfs_ilog_fext(whichfork); | 4931 | flags |= xfs_ilog_fext(whichfork); |
4962 | XFS_IFORK_NEXT_SET(ip, whichfork, | 4932 | XFS_IFORK_NEXT_SET(ip, whichfork, |
@@ -5012,10 +4982,8 @@ xfs_bmap_del_extent( | |||
5012 | * Nothing to do for disk quota accounting here. | 4982 | * Nothing to do for disk quota accounting here. |
5013 | */ | 4983 | */ |
5014 | ASSERT(da_old >= da_new); | 4984 | ASSERT(da_old >= da_new); |
5015 | if (da_old > da_new) { | 4985 | if (da_old > da_new) |
5016 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | 4986 | xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false); |
5017 | (int64_t)(da_old - da_new), 0); | ||
5018 | } | ||
5019 | done: | 4987 | done: |
5020 | *logflagsp = flags; | 4988 | *logflagsp = flags; |
5021 | return error; | 4989 | return error; |
@@ -5284,14 +5252,13 @@ xfs_bunmapi( | |||
5284 | 5252 | ||
5285 | rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); | 5253 | rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); |
5286 | do_div(rtexts, mp->m_sb.sb_rextsize); | 5254 | do_div(rtexts, mp->m_sb.sb_rextsize); |
5287 | xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, | 5255 | xfs_mod_frextents(mp, (int64_t)rtexts); |
5288 | (int64_t)rtexts, 0); | ||
5289 | (void)xfs_trans_reserve_quota_nblks(NULL, | 5256 | (void)xfs_trans_reserve_quota_nblks(NULL, |
5290 | ip, -((long)del.br_blockcount), 0, | 5257 | ip, -((long)del.br_blockcount), 0, |
5291 | XFS_QMOPT_RES_RTBLKS); | 5258 | XFS_QMOPT_RES_RTBLKS); |
5292 | } else { | 5259 | } else { |
5293 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | 5260 | xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, |
5294 | (int64_t)del.br_blockcount, 0); | 5261 | false); |
5295 | (void)xfs_trans_reserve_quota_nblks(NULL, | 5262 | (void)xfs_trans_reserve_quota_nblks(NULL, |
5296 | ip, -((long)del.br_blockcount), 0, | 5263 | ip, -((long)del.br_blockcount), 0, |
5297 | XFS_QMOPT_RES_REGBLKS); | 5264 | XFS_QMOPT_RES_REGBLKS); |
@@ -5453,6 +5420,7 @@ xfs_bmse_merge( | |||
5453 | struct xfs_bmbt_irec left; | 5420 | struct xfs_bmbt_irec left; |
5454 | xfs_filblks_t blockcount; | 5421 | xfs_filblks_t blockcount; |
5455 | int error, i; | 5422 | int error, i; |
5423 | struct xfs_mount *mp = ip->i_mount; | ||
5456 | 5424 | ||
5457 | xfs_bmbt_get_all(gotp, &got); | 5425 | xfs_bmbt_get_all(gotp, &got); |
5458 | xfs_bmbt_get_all(leftp, &left); | 5426 | xfs_bmbt_get_all(leftp, &left); |
@@ -5487,19 +5455,19 @@ xfs_bmse_merge( | |||
5487 | got.br_blockcount, &i); | 5455 | got.br_blockcount, &i); |
5488 | if (error) | 5456 | if (error) |
5489 | return error; | 5457 | return error; |
5490 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 5458 | XFS_WANT_CORRUPTED_RETURN(mp, i == 1); |
5491 | 5459 | ||
5492 | error = xfs_btree_delete(cur, &i); | 5460 | error = xfs_btree_delete(cur, &i); |
5493 | if (error) | 5461 | if (error) |
5494 | return error; | 5462 | return error; |
5495 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 5463 | XFS_WANT_CORRUPTED_RETURN(mp, i == 1); |
5496 | 5464 | ||
5497 | /* lookup and update size of the previous extent */ | 5465 | /* lookup and update size of the previous extent */ |
5498 | error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, | 5466 | error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, |
5499 | left.br_blockcount, &i); | 5467 | left.br_blockcount, &i); |
5500 | if (error) | 5468 | if (error) |
5501 | return error; | 5469 | return error; |
5502 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 5470 | XFS_WANT_CORRUPTED_RETURN(mp, i == 1); |
5503 | 5471 | ||
5504 | left.br_blockcount = blockcount; | 5472 | left.br_blockcount = blockcount; |
5505 | 5473 | ||
@@ -5518,50 +5486,92 @@ xfs_bmse_shift_one( | |||
5518 | int *current_ext, | 5486 | int *current_ext, |
5519 | struct xfs_bmbt_rec_host *gotp, | 5487 | struct xfs_bmbt_rec_host *gotp, |
5520 | struct xfs_btree_cur *cur, | 5488 | struct xfs_btree_cur *cur, |
5521 | int *logflags) | 5489 | int *logflags, |
5490 | enum shift_direction direction) | ||
5522 | { | 5491 | { |
5523 | struct xfs_ifork *ifp; | 5492 | struct xfs_ifork *ifp; |
5493 | struct xfs_mount *mp; | ||
5524 | xfs_fileoff_t startoff; | 5494 | xfs_fileoff_t startoff; |
5525 | struct xfs_bmbt_rec_host *leftp; | 5495 | struct xfs_bmbt_rec_host *adj_irecp; |
5526 | struct xfs_bmbt_irec got; | 5496 | struct xfs_bmbt_irec got; |
5527 | struct xfs_bmbt_irec left; | 5497 | struct xfs_bmbt_irec adj_irec; |
5528 | int error; | 5498 | int error; |
5529 | int i; | 5499 | int i; |
5500 | int total_extents; | ||
5530 | 5501 | ||
5502 | mp = ip->i_mount; | ||
5531 | ifp = XFS_IFORK_PTR(ip, whichfork); | 5503 | ifp = XFS_IFORK_PTR(ip, whichfork); |
5504 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | ||
5532 | 5505 | ||
5533 | xfs_bmbt_get_all(gotp, &got); | 5506 | xfs_bmbt_get_all(gotp, &got); |
5534 | startoff = got.br_startoff - offset_shift_fsb; | ||
5535 | 5507 | ||
5536 | /* delalloc extents should be prevented by caller */ | 5508 | /* delalloc extents should be prevented by caller */ |
5537 | XFS_WANT_CORRUPTED_RETURN(!isnullstartblock(got.br_startblock)); | 5509 | XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); |
5538 | 5510 | ||
5539 | /* | 5511 | if (direction == SHIFT_LEFT) { |
5540 | * Check for merge if we've got an extent to the left, otherwise make | 5512 | startoff = got.br_startoff - offset_shift_fsb; |
5541 | * sure there's enough room at the start of the file for the shift. | 5513 | |
5542 | */ | 5514 | /* |
5543 | if (*current_ext) { | 5515 | * Check for merge if we've got an extent to the left, |
5544 | /* grab the left extent and check for a large enough hole */ | 5516 | * otherwise make sure there's enough room at the start |
5545 | leftp = xfs_iext_get_ext(ifp, *current_ext - 1); | 5517 | * of the file for the shift. |
5546 | xfs_bmbt_get_all(leftp, &left); | 5518 | */ |
5519 | if (!*current_ext) { | ||
5520 | if (got.br_startoff < offset_shift_fsb) | ||
5521 | return -EINVAL; | ||
5522 | goto update_current_ext; | ||
5523 | } | ||
5524 | /* | ||
5525 | * grab the left extent and check for a large | ||
5526 | * enough hole. | ||
5527 | */ | ||
5528 | adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1); | ||
5529 | xfs_bmbt_get_all(adj_irecp, &adj_irec); | ||
5547 | 5530 | ||
5548 | if (startoff < left.br_startoff + left.br_blockcount) | 5531 | if (startoff < |
5532 | adj_irec.br_startoff + adj_irec.br_blockcount) | ||
5549 | return -EINVAL; | 5533 | return -EINVAL; |
5550 | 5534 | ||
5551 | /* check whether to merge the extent or shift it down */ | 5535 | /* check whether to merge the extent or shift it down */ |
5552 | if (xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) { | 5536 | if (xfs_bmse_can_merge(&adj_irec, &got, |
5537 | offset_shift_fsb)) { | ||
5553 | return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, | 5538 | return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, |
5554 | *current_ext, gotp, leftp, cur, | 5539 | *current_ext, gotp, adj_irecp, |
5555 | logflags); | 5540 | cur, logflags); |
5556 | } | 5541 | } |
5557 | } else if (got.br_startoff < offset_shift_fsb) | 5542 | } else { |
5558 | return -EINVAL; | 5543 | startoff = got.br_startoff + offset_shift_fsb; |
5559 | 5544 | /* nothing to move if this is the last extent */ | |
5545 | if (*current_ext >= (total_extents - 1)) | ||
5546 | goto update_current_ext; | ||
5547 | /* | ||
5548 | * If this is not the last extent in the file, make sure there | ||
5549 | * is enough room between current extent and next extent for | ||
5550 | * accommodating the shift. | ||
5551 | */ | ||
5552 | adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1); | ||
5553 | xfs_bmbt_get_all(adj_irecp, &adj_irec); | ||
5554 | if (startoff + got.br_blockcount > adj_irec.br_startoff) | ||
5555 | return -EINVAL; | ||
5556 | /* | ||
5557 | * Unlike a left shift (which involves a hole punch), | ||
5558 | * a right shift does not modify extent neighbors | ||
5559 | * in any way. We should never find mergeable extents | ||
5560 | * in this scenario. Check anyways and warn if we | ||
5561 | * encounter two extents that could be one. | ||
5562 | */ | ||
5563 | if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb)) | ||
5564 | WARN_ON_ONCE(1); | ||
5565 | } | ||
5560 | /* | 5566 | /* |
5561 | * Increment the extent index for the next iteration, update the start | 5567 | * Increment the extent index for the next iteration, update the start |
5562 | * offset of the in-core extent and update the btree if applicable. | 5568 | * offset of the in-core extent and update the btree if applicable. |
5563 | */ | 5569 | */ |
5564 | (*current_ext)++; | 5570 | update_current_ext: |
5571 | if (direction == SHIFT_LEFT) | ||
5572 | (*current_ext)++; | ||
5573 | else | ||
5574 | (*current_ext)--; | ||
5565 | xfs_bmbt_set_startoff(gotp, startoff); | 5575 | xfs_bmbt_set_startoff(gotp, startoff); |
5566 | *logflags |= XFS_ILOG_CORE; | 5576 | *logflags |= XFS_ILOG_CORE; |
5567 | if (!cur) { | 5577 | if (!cur) { |
@@ -5573,18 +5583,18 @@ xfs_bmse_shift_one( | |||
5573 | got.br_blockcount, &i); | 5583 | got.br_blockcount, &i); |
5574 | if (error) | 5584 | if (error) |
5575 | return error; | 5585 | return error; |
5576 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 5586 | XFS_WANT_CORRUPTED_RETURN(mp, i == 1); |
5577 | 5587 | ||
5578 | got.br_startoff = startoff; | 5588 | got.br_startoff = startoff; |
5579 | return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, | 5589 | return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, |
5580 | got.br_blockcount, got.br_state); | 5590 | got.br_blockcount, got.br_state); |
5581 | } | 5591 | } |
5582 | 5592 | ||
5583 | /* | 5593 | /* |
5584 | * Shift extent records to the left to cover a hole. | 5594 | * Shift extent records to the left/right to cover/create a hole. |
5585 | * | 5595 | * |
5586 | * The maximum number of extents to be shifted in a single operation is | 5596 | * The maximum number of extents to be shifted in a single operation is |
5587 | * @num_exts. @start_fsb specifies the file offset to start the shift and the | 5597 | * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the |
5588 | * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb | 5598 | * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb |
5589 | * is the length by which each extent is shifted. If there is no hole to shift | 5599 | * is the length by which each extent is shifted. If there is no hole to shift |
5590 | * the extents into, this will be considered invalid operation and we abort | 5600 | * the extents into, this will be considered invalid operation and we abort |
@@ -5594,12 +5604,13 @@ int | |||
5594 | xfs_bmap_shift_extents( | 5604 | xfs_bmap_shift_extents( |
5595 | struct xfs_trans *tp, | 5605 | struct xfs_trans *tp, |
5596 | struct xfs_inode *ip, | 5606 | struct xfs_inode *ip, |
5597 | xfs_fileoff_t start_fsb, | 5607 | xfs_fileoff_t *next_fsb, |
5598 | xfs_fileoff_t offset_shift_fsb, | 5608 | xfs_fileoff_t offset_shift_fsb, |
5599 | int *done, | 5609 | int *done, |
5600 | xfs_fileoff_t *next_fsb, | 5610 | xfs_fileoff_t stop_fsb, |
5601 | xfs_fsblock_t *firstblock, | 5611 | xfs_fsblock_t *firstblock, |
5602 | struct xfs_bmap_free *flist, | 5612 | struct xfs_bmap_free *flist, |
5613 | enum shift_direction direction, | ||
5603 | int num_exts) | 5614 | int num_exts) |
5604 | { | 5615 | { |
5605 | struct xfs_btree_cur *cur = NULL; | 5616 | struct xfs_btree_cur *cur = NULL; |
@@ -5609,10 +5620,11 @@ xfs_bmap_shift_extents( | |||
5609 | struct xfs_ifork *ifp; | 5620 | struct xfs_ifork *ifp; |
5610 | xfs_extnum_t nexts = 0; | 5621 | xfs_extnum_t nexts = 0; |
5611 | xfs_extnum_t current_ext; | 5622 | xfs_extnum_t current_ext; |
5623 | xfs_extnum_t total_extents; | ||
5624 | xfs_extnum_t stop_extent; | ||
5612 | int error = 0; | 5625 | int error = 0; |
5613 | int whichfork = XFS_DATA_FORK; | 5626 | int whichfork = XFS_DATA_FORK; |
5614 | int logflags = 0; | 5627 | int logflags = 0; |
5615 | int total_extents; | ||
5616 | 5628 | ||
5617 | if (unlikely(XFS_TEST_ERROR( | 5629 | if (unlikely(XFS_TEST_ERROR( |
5618 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | 5630 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && |
@@ -5628,6 +5640,8 @@ xfs_bmap_shift_extents( | |||
5628 | 5640 | ||
5629 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | 5641 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); |
5630 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 5642 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
5643 | ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); | ||
5644 | ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT); | ||
5631 | 5645 | ||
5632 | ifp = XFS_IFORK_PTR(ip, whichfork); | 5646 | ifp = XFS_IFORK_PTR(ip, whichfork); |
5633 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | 5647 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { |
@@ -5645,43 +5659,83 @@ xfs_bmap_shift_extents( | |||
5645 | } | 5659 | } |
5646 | 5660 | ||
5647 | /* | 5661 | /* |
5662 | * There may be delalloc extents in the data fork before the range we | ||
5663 | * are collapsing out, so we cannot use the count of real extents here. | ||
5664 | * Instead we have to calculate it from the incore fork. | ||
5665 | */ | ||
5666 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | ||
5667 | if (total_extents == 0) { | ||
5668 | *done = 1; | ||
5669 | goto del_cursor; | ||
5670 | } | ||
5671 | |||
5672 | /* | ||
5673 | * In case of first right shift, we need to initialize next_fsb | ||
5674 | */ | ||
5675 | if (*next_fsb == NULLFSBLOCK) { | ||
5676 | gotp = xfs_iext_get_ext(ifp, total_extents - 1); | ||
5677 | xfs_bmbt_get_all(gotp, &got); | ||
5678 | *next_fsb = got.br_startoff; | ||
5679 | if (stop_fsb > *next_fsb) { | ||
5680 | *done = 1; | ||
5681 | goto del_cursor; | ||
5682 | } | ||
5683 | } | ||
5684 | |||
5685 | /* Lookup the extent index at which we have to stop */ | ||
5686 | if (direction == SHIFT_RIGHT) { | ||
5687 | gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent); | ||
5688 | /* Make stop_extent exclusive of shift range */ | ||
5689 | stop_extent--; | ||
5690 | } else | ||
5691 | stop_extent = total_extents; | ||
5692 | |||
5693 | /* | ||
5648 | * Look up the extent index for the fsb where we start shifting. We can | 5694 | * Look up the extent index for the fsb where we start shifting. We can |
5649 | * henceforth iterate with current_ext as extent list changes are locked | 5695 | * henceforth iterate with current_ext as extent list changes are locked |
5650 | * out via ilock. | 5696 | * out via ilock. |
5651 | * | 5697 | * |
5652 | * gotp can be null in 2 cases: 1) if there are no extents or 2) | 5698 | * gotp can be null in 2 cases: 1) if there are no extents or 2) |
5653 | * start_fsb lies in a hole beyond which there are no extents. Either | 5699 | * *next_fsb lies in a hole beyond which there are no extents. Either |
5654 | * way, we are done. | 5700 | * way, we are done. |
5655 | */ | 5701 | */ |
5656 | gotp = xfs_iext_bno_to_ext(ifp, start_fsb, ¤t_ext); | 5702 | gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, ¤t_ext); |
5657 | if (!gotp) { | 5703 | if (!gotp) { |
5658 | *done = 1; | 5704 | *done = 1; |
5659 | goto del_cursor; | 5705 | goto del_cursor; |
5660 | } | 5706 | } |
5661 | 5707 | ||
5662 | /* | 5708 | /* some sanity checking before we finally start shifting extents */ |
5663 | * There may be delalloc extents in the data fork before the range we | 5709 | if ((direction == SHIFT_LEFT && current_ext >= stop_extent) || |
5664 | * are collapsing out, so we cannot use the count of real extents here. | 5710 | (direction == SHIFT_RIGHT && current_ext <= stop_extent)) { |
5665 | * Instead we have to calculate it from the incore fork. | 5711 | error = -EIO; |
5666 | */ | 5712 | goto del_cursor; |
5667 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | 5713 | } |
5668 | while (nexts++ < num_exts && current_ext < total_extents) { | 5714 | |
5715 | while (nexts++ < num_exts) { | ||
5669 | error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, | 5716 | error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, |
5670 | ¤t_ext, gotp, cur, &logflags); | 5717 | ¤t_ext, gotp, cur, &logflags, |
5718 | direction); | ||
5671 | if (error) | 5719 | if (error) |
5672 | goto del_cursor; | 5720 | goto del_cursor; |
5721 | /* | ||
5722 | * If there was an extent merge during the shift, the extent | ||
5723 | * count can change. Update the total and grade the next record. | ||
5724 | */ | ||
5725 | if (direction == SHIFT_LEFT) { | ||
5726 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | ||
5727 | stop_extent = total_extents; | ||
5728 | } | ||
5673 | 5729 | ||
5674 | /* update total extent count and grab the next record */ | 5730 | if (current_ext == stop_extent) { |
5675 | total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); | 5731 | *done = 1; |
5676 | if (current_ext >= total_extents) | 5732 | *next_fsb = NULLFSBLOCK; |
5677 | break; | 5733 | break; |
5734 | } | ||
5678 | gotp = xfs_iext_get_ext(ifp, current_ext); | 5735 | gotp = xfs_iext_get_ext(ifp, current_ext); |
5679 | } | 5736 | } |
5680 | 5737 | ||
5681 | /* Check if we are done */ | 5738 | if (!*done) { |
5682 | if (current_ext == total_extents) { | ||
5683 | *done = 1; | ||
5684 | } else if (next_fsb) { | ||
5685 | xfs_bmbt_get_all(gotp, &got); | 5739 | xfs_bmbt_get_all(gotp, &got); |
5686 | *next_fsb = got.br_startoff; | 5740 | *next_fsb = got.br_startoff; |
5687 | } | 5741 | } |
@@ -5696,3 +5750,189 @@ del_cursor: | |||
5696 | 5750 | ||
5697 | return error; | 5751 | return error; |
5698 | } | 5752 | } |
5753 | |||
5754 | /* | ||
5755 | * Splits an extent into two extents at split_fsb block such that it is | ||
5756 | * the first block of the current_ext. @current_ext is a target extent | ||
5757 | * to be split. @split_fsb is a block where the extents is split. | ||
5758 | * If split_fsb lies in a hole or the first block of extents, just return 0. | ||
5759 | */ | ||
5760 | STATIC int | ||
5761 | xfs_bmap_split_extent_at( | ||
5762 | struct xfs_trans *tp, | ||
5763 | struct xfs_inode *ip, | ||
5764 | xfs_fileoff_t split_fsb, | ||
5765 | xfs_fsblock_t *firstfsb, | ||
5766 | struct xfs_bmap_free *free_list) | ||
5767 | { | ||
5768 | int whichfork = XFS_DATA_FORK; | ||
5769 | struct xfs_btree_cur *cur = NULL; | ||
5770 | struct xfs_bmbt_rec_host *gotp; | ||
5771 | struct xfs_bmbt_irec got; | ||
5772 | struct xfs_bmbt_irec new; /* split extent */ | ||
5773 | struct xfs_mount *mp = ip->i_mount; | ||
5774 | struct xfs_ifork *ifp; | ||
5775 | xfs_fsblock_t gotblkcnt; /* new block count for got */ | ||
5776 | xfs_extnum_t current_ext; | ||
5777 | int error = 0; | ||
5778 | int logflags = 0; | ||
5779 | int i = 0; | ||
5780 | |||
5781 | if (unlikely(XFS_TEST_ERROR( | ||
5782 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | ||
5783 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), | ||
5784 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { | ||
5785 | XFS_ERROR_REPORT("xfs_bmap_split_extent_at", | ||
5786 | XFS_ERRLEVEL_LOW, mp); | ||
5787 | return -EFSCORRUPTED; | ||
5788 | } | ||
5789 | |||
5790 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
5791 | return -EIO; | ||
5792 | |||
5793 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
5794 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
5795 | /* Read in all the extents */ | ||
5796 | error = xfs_iread_extents(tp, ip, whichfork); | ||
5797 | if (error) | ||
5798 | return error; | ||
5799 | } | ||
5800 | |||
5801 | /* | ||
5802 | * gotp can be null in 2 cases: 1) if there are no extents | ||
5803 | * or 2) split_fsb lies in a hole beyond which there are | ||
5804 | * no extents. Either way, we are done. | ||
5805 | */ | ||
5806 | gotp = xfs_iext_bno_to_ext(ifp, split_fsb, ¤t_ext); | ||
5807 | if (!gotp) | ||
5808 | return 0; | ||
5809 | |||
5810 | xfs_bmbt_get_all(gotp, &got); | ||
5811 | |||
5812 | /* | ||
5813 | * Check split_fsb lies in a hole or the start boundary offset | ||
5814 | * of the extent. | ||
5815 | */ | ||
5816 | if (got.br_startoff >= split_fsb) | ||
5817 | return 0; | ||
5818 | |||
5819 | gotblkcnt = split_fsb - got.br_startoff; | ||
5820 | new.br_startoff = split_fsb; | ||
5821 | new.br_startblock = got.br_startblock + gotblkcnt; | ||
5822 | new.br_blockcount = got.br_blockcount - gotblkcnt; | ||
5823 | new.br_state = got.br_state; | ||
5824 | |||
5825 | if (ifp->if_flags & XFS_IFBROOT) { | ||
5826 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); | ||
5827 | cur->bc_private.b.firstblock = *firstfsb; | ||
5828 | cur->bc_private.b.flist = free_list; | ||
5829 | cur->bc_private.b.flags = 0; | ||
5830 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, | ||
5831 | got.br_startblock, | ||
5832 | got.br_blockcount, | ||
5833 | &i); | ||
5834 | if (error) | ||
5835 | goto del_cursor; | ||
5836 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); | ||
5837 | } | ||
5838 | |||
5839 | xfs_bmbt_set_blockcount(gotp, gotblkcnt); | ||
5840 | got.br_blockcount = gotblkcnt; | ||
5841 | |||
5842 | logflags = XFS_ILOG_CORE; | ||
5843 | if (cur) { | ||
5844 | error = xfs_bmbt_update(cur, got.br_startoff, | ||
5845 | got.br_startblock, | ||
5846 | got.br_blockcount, | ||
5847 | got.br_state); | ||
5848 | if (error) | ||
5849 | goto del_cursor; | ||
5850 | } else | ||
5851 | logflags |= XFS_ILOG_DEXT; | ||
5852 | |||
5853 | /* Add new extent */ | ||
5854 | current_ext++; | ||
5855 | xfs_iext_insert(ip, current_ext, 1, &new, 0); | ||
5856 | XFS_IFORK_NEXT_SET(ip, whichfork, | ||
5857 | XFS_IFORK_NEXTENTS(ip, whichfork) + 1); | ||
5858 | |||
5859 | if (cur) { | ||
5860 | error = xfs_bmbt_lookup_eq(cur, new.br_startoff, | ||
5861 | new.br_startblock, new.br_blockcount, | ||
5862 | &i); | ||
5863 | if (error) | ||
5864 | goto del_cursor; | ||
5865 | XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor); | ||
5866 | cur->bc_rec.b.br_state = new.br_state; | ||
5867 | |||
5868 | error = xfs_btree_insert(cur, &i); | ||
5869 | if (error) | ||
5870 | goto del_cursor; | ||
5871 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); | ||
5872 | } | ||
5873 | |||
5874 | /* | ||
5875 | * Convert to a btree if necessary. | ||
5876 | */ | ||
5877 | if (xfs_bmap_needs_btree(ip, whichfork)) { | ||
5878 | int tmp_logflags; /* partial log flag return val */ | ||
5879 | |||
5880 | ASSERT(cur == NULL); | ||
5881 | error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list, | ||
5882 | &cur, 0, &tmp_logflags, whichfork); | ||
5883 | logflags |= tmp_logflags; | ||
5884 | } | ||
5885 | |||
5886 | del_cursor: | ||
5887 | if (cur) { | ||
5888 | cur->bc_private.b.allocated = 0; | ||
5889 | xfs_btree_del_cursor(cur, | ||
5890 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
5891 | } | ||
5892 | |||
5893 | if (logflags) | ||
5894 | xfs_trans_log_inode(tp, ip, logflags); | ||
5895 | return error; | ||
5896 | } | ||
5897 | |||
5898 | int | ||
5899 | xfs_bmap_split_extent( | ||
5900 | struct xfs_inode *ip, | ||
5901 | xfs_fileoff_t split_fsb) | ||
5902 | { | ||
5903 | struct xfs_mount *mp = ip->i_mount; | ||
5904 | struct xfs_trans *tp; | ||
5905 | struct xfs_bmap_free free_list; | ||
5906 | xfs_fsblock_t firstfsb; | ||
5907 | int committed; | ||
5908 | int error; | ||
5909 | |||
5910 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); | ||
5911 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, | ||
5912 | XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); | ||
5913 | if (error) { | ||
5914 | xfs_trans_cancel(tp, 0); | ||
5915 | return error; | ||
5916 | } | ||
5917 | |||
5918 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
5919 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | ||
5920 | |||
5921 | xfs_bmap_init(&free_list, &firstfsb); | ||
5922 | |||
5923 | error = xfs_bmap_split_extent_at(tp, ip, split_fsb, | ||
5924 | &firstfsb, &free_list); | ||
5925 | if (error) | ||
5926 | goto out; | ||
5927 | |||
5928 | error = xfs_bmap_finish(&tp, &free_list, &committed); | ||
5929 | if (error) | ||
5930 | goto out; | ||
5931 | |||
5932 | return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
5933 | |||
5934 | |||
5935 | out: | ||
5936 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | ||
5937 | return error; | ||
5938 | } | ||
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index b9d8a499d2c4..6aaa0c1c7200 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h | |||
@@ -166,6 +166,11 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp) | |||
166 | */ | 166 | */ |
167 | #define XFS_BMAP_MAX_SHIFT_EXTENTS 1 | 167 | #define XFS_BMAP_MAX_SHIFT_EXTENTS 1 |
168 | 168 | ||
169 | enum shift_direction { | ||
170 | SHIFT_LEFT = 0, | ||
171 | SHIFT_RIGHT, | ||
172 | }; | ||
173 | |||
169 | #ifdef DEBUG | 174 | #ifdef DEBUG |
170 | void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, | 175 | void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, |
171 | int whichfork, unsigned long caller_ip); | 176 | int whichfork, unsigned long caller_ip); |
@@ -211,8 +216,10 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, | |||
211 | xfs_extnum_t num); | 216 | xfs_extnum_t num); |
212 | uint xfs_default_attroffset(struct xfs_inode *ip); | 217 | uint xfs_default_attroffset(struct xfs_inode *ip); |
213 | int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, | 218 | int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, |
214 | xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, | 219 | xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, |
215 | int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, | 220 | int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, |
216 | struct xfs_bmap_free *flist, int num_exts); | 221 | struct xfs_bmap_free *flist, enum shift_direction direction, |
222 | int num_exts); | ||
223 | int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); | ||
217 | 224 | ||
218 | #endif /* __XFS_BMAP_H__ */ | 225 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 81cad433df85..c72283dd8d44 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c | |||
@@ -168,7 +168,7 @@ xfs_btree_check_lptr( | |||
168 | xfs_fsblock_t bno, /* btree block disk address */ | 168 | xfs_fsblock_t bno, /* btree block disk address */ |
169 | int level) /* btree block level */ | 169 | int level) /* btree block level */ |
170 | { | 170 | { |
171 | XFS_WANT_CORRUPTED_RETURN( | 171 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, |
172 | level > 0 && | 172 | level > 0 && |
173 | bno != NULLFSBLOCK && | 173 | bno != NULLFSBLOCK && |
174 | XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); | 174 | XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); |
@@ -187,7 +187,7 @@ xfs_btree_check_sptr( | |||
187 | { | 187 | { |
188 | xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; | 188 | xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; |
189 | 189 | ||
190 | XFS_WANT_CORRUPTED_RETURN( | 190 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, |
191 | level > 0 && | 191 | level > 0 && |
192 | bno != NULLAGBLOCK && | 192 | bno != NULLAGBLOCK && |
193 | bno != 0 && | 193 | bno != 0 && |
@@ -1825,7 +1825,7 @@ xfs_btree_lookup( | |||
1825 | error = xfs_btree_increment(cur, 0, &i); | 1825 | error = xfs_btree_increment(cur, 0, &i); |
1826 | if (error) | 1826 | if (error) |
1827 | goto error0; | 1827 | goto error0; |
1828 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 1828 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); |
1829 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | 1829 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); |
1830 | *stat = 1; | 1830 | *stat = 1; |
1831 | return 0; | 1831 | return 0; |
@@ -2285,7 +2285,7 @@ xfs_btree_rshift( | |||
2285 | if (error) | 2285 | if (error) |
2286 | goto error0; | 2286 | goto error0; |
2287 | i = xfs_btree_lastrec(tcur, level); | 2287 | i = xfs_btree_lastrec(tcur, level); |
2288 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 2288 | XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); |
2289 | 2289 | ||
2290 | error = xfs_btree_increment(tcur, level, &i); | 2290 | error = xfs_btree_increment(tcur, level, &i); |
2291 | if (error) | 2291 | if (error) |
@@ -3138,7 +3138,7 @@ xfs_btree_insert( | |||
3138 | goto error0; | 3138 | goto error0; |
3139 | } | 3139 | } |
3140 | 3140 | ||
3141 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 3141 | XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); |
3142 | level++; | 3142 | level++; |
3143 | 3143 | ||
3144 | /* | 3144 | /* |
@@ -3582,15 +3582,15 @@ xfs_btree_delrec( | |||
3582 | * Actually any entry but the first would suffice. | 3582 | * Actually any entry but the first would suffice. |
3583 | */ | 3583 | */ |
3584 | i = xfs_btree_lastrec(tcur, level); | 3584 | i = xfs_btree_lastrec(tcur, level); |
3585 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 3585 | XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); |
3586 | 3586 | ||
3587 | error = xfs_btree_increment(tcur, level, &i); | 3587 | error = xfs_btree_increment(tcur, level, &i); |
3588 | if (error) | 3588 | if (error) |
3589 | goto error0; | 3589 | goto error0; |
3590 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 3590 | XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); |
3591 | 3591 | ||
3592 | i = xfs_btree_lastrec(tcur, level); | 3592 | i = xfs_btree_lastrec(tcur, level); |
3593 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 3593 | XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); |
3594 | 3594 | ||
3595 | /* Grab a pointer to the block. */ | 3595 | /* Grab a pointer to the block. */ |
3596 | right = xfs_btree_get_block(tcur, level, &rbp); | 3596 | right = xfs_btree_get_block(tcur, level, &rbp); |
@@ -3634,12 +3634,12 @@ xfs_btree_delrec( | |||
3634 | rrecs = xfs_btree_get_numrecs(right); | 3634 | rrecs = xfs_btree_get_numrecs(right); |
3635 | if (!xfs_btree_ptr_is_null(cur, &lptr)) { | 3635 | if (!xfs_btree_ptr_is_null(cur, &lptr)) { |
3636 | i = xfs_btree_firstrec(tcur, level); | 3636 | i = xfs_btree_firstrec(tcur, level); |
3637 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 3637 | XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); |
3638 | 3638 | ||
3639 | error = xfs_btree_decrement(tcur, level, &i); | 3639 | error = xfs_btree_decrement(tcur, level, &i); |
3640 | if (error) | 3640 | if (error) |
3641 | goto error0; | 3641 | goto error0; |
3642 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 3642 | XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); |
3643 | } | 3643 | } |
3644 | } | 3644 | } |
3645 | 3645 | ||
@@ -3653,13 +3653,13 @@ xfs_btree_delrec( | |||
3653 | * previous block. | 3653 | * previous block. |
3654 | */ | 3654 | */ |
3655 | i = xfs_btree_firstrec(tcur, level); | 3655 | i = xfs_btree_firstrec(tcur, level); |
3656 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 3656 | XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); |
3657 | 3657 | ||
3658 | error = xfs_btree_decrement(tcur, level, &i); | 3658 | error = xfs_btree_decrement(tcur, level, &i); |
3659 | if (error) | 3659 | if (error) |
3660 | goto error0; | 3660 | goto error0; |
3661 | i = xfs_btree_firstrec(tcur, level); | 3661 | i = xfs_btree_firstrec(tcur, level); |
3662 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 3662 | XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); |
3663 | 3663 | ||
3664 | /* Grab a pointer to the block. */ | 3664 | /* Grab a pointer to the block. */ |
3665 | left = xfs_btree_get_block(tcur, level, &lbp); | 3665 | left = xfs_btree_get_block(tcur, level, &lbp); |
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 9cb0115c6bd1..2385f8cd08ab 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c | |||
@@ -538,12 +538,12 @@ xfs_da3_root_split( | |||
538 | oldroot = blk1->bp->b_addr; | 538 | oldroot = blk1->bp->b_addr; |
539 | if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || | 539 | if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || |
540 | oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { | 540 | oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { |
541 | struct xfs_da3_icnode_hdr nodehdr; | 541 | struct xfs_da3_icnode_hdr icnodehdr; |
542 | 542 | ||
543 | dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); | 543 | dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot); |
544 | btree = dp->d_ops->node_tree_p(oldroot); | 544 | btree = dp->d_ops->node_tree_p(oldroot); |
545 | size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot); | 545 | size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot); |
546 | level = nodehdr.level; | 546 | level = icnodehdr.level; |
547 | 547 | ||
548 | /* | 548 | /* |
549 | * we are about to copy oldroot to bp, so set up the type | 549 | * we are about to copy oldroot to bp, so set up the type |
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 0a49b0286372..74bcbabfa523 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h | |||
@@ -725,7 +725,13 @@ struct xfs_attr3_icleaf_hdr { | |||
725 | __uint16_t magic; | 725 | __uint16_t magic; |
726 | __uint16_t count; | 726 | __uint16_t count; |
727 | __uint16_t usedbytes; | 727 | __uint16_t usedbytes; |
728 | __uint16_t firstused; | 728 | /* |
729 | * firstused is 32-bit here instead of 16-bit like the on-disk variant | ||
730 | * to support maximum fsb size of 64k without overflow issues throughout | ||
731 | * the attr code. Instead, the overflow condition is handled on | ||
732 | * conversion to/from disk. | ||
733 | */ | ||
734 | __uint32_t firstused; | ||
729 | __u8 holes; | 735 | __u8 holes; |
730 | struct { | 736 | struct { |
731 | __uint16_t base; | 737 | __uint16_t base; |
@@ -734,6 +740,12 @@ struct xfs_attr3_icleaf_hdr { | |||
734 | }; | 740 | }; |
735 | 741 | ||
736 | /* | 742 | /* |
743 | * Special value to represent fs block size in the leaf header firstused field. | ||
744 | * Only used when block size overflows the 2-bytes available on disk. | ||
745 | */ | ||
746 | #define XFS_ATTR3_LEAF_NULLOFF 0 | ||
747 | |||
748 | /* | ||
737 | * Flags used in the leaf_entry[i].flags field. | 749 | * Flags used in the leaf_entry[i].flags field. |
738 | * NOTE: the INCOMPLETE bit must not collide with the flags bits specified | 750 | * NOTE: the INCOMPLETE bit must not collide with the flags bits specified |
739 | * on the system call, they are "or"ed together for various operations. | 751 | * on the system call, they are "or"ed together for various operations. |
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 5ff31be9b1cd..de1ea16f5748 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c | |||
@@ -89,7 +89,7 @@ __xfs_dir3_data_check( | |||
89 | * so just ensure that the count falls somewhere inside the | 89 | * so just ensure that the count falls somewhere inside the |
90 | * block right now. | 90 | * block right now. |
91 | */ | 91 | */ |
92 | XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) < | 92 | XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) < |
93 | ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); | 93 | ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); |
94 | break; | 94 | break; |
95 | case cpu_to_be32(XFS_DIR3_DATA_MAGIC): | 95 | case cpu_to_be32(XFS_DIR3_DATA_MAGIC): |
@@ -107,21 +107,21 @@ __xfs_dir3_data_check( | |||
107 | bf = ops->data_bestfree_p(hdr); | 107 | bf = ops->data_bestfree_p(hdr); |
108 | count = lastfree = freeseen = 0; | 108 | count = lastfree = freeseen = 0; |
109 | if (!bf[0].length) { | 109 | if (!bf[0].length) { |
110 | XFS_WANT_CORRUPTED_RETURN(!bf[0].offset); | 110 | XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset); |
111 | freeseen |= 1 << 0; | 111 | freeseen |= 1 << 0; |
112 | } | 112 | } |
113 | if (!bf[1].length) { | 113 | if (!bf[1].length) { |
114 | XFS_WANT_CORRUPTED_RETURN(!bf[1].offset); | 114 | XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset); |
115 | freeseen |= 1 << 1; | 115 | freeseen |= 1 << 1; |
116 | } | 116 | } |
117 | if (!bf[2].length) { | 117 | if (!bf[2].length) { |
118 | XFS_WANT_CORRUPTED_RETURN(!bf[2].offset); | 118 | XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset); |
119 | freeseen |= 1 << 2; | 119 | freeseen |= 1 << 2; |
120 | } | 120 | } |
121 | 121 | ||
122 | XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >= | 122 | XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >= |
123 | be16_to_cpu(bf[1].length)); | 123 | be16_to_cpu(bf[1].length)); |
124 | XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >= | 124 | XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >= |
125 | be16_to_cpu(bf[2].length)); | 125 | be16_to_cpu(bf[2].length)); |
126 | /* | 126 | /* |
127 | * Loop over the data/unused entries. | 127 | * Loop over the data/unused entries. |
@@ -134,18 +134,18 @@ __xfs_dir3_data_check( | |||
134 | * doesn't need to be there. | 134 | * doesn't need to be there. |
135 | */ | 135 | */ |
136 | if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { | 136 | if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { |
137 | XFS_WANT_CORRUPTED_RETURN(lastfree == 0); | 137 | XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); |
138 | XFS_WANT_CORRUPTED_RETURN( | 138 | XFS_WANT_CORRUPTED_RETURN(mp, |
139 | be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == | 139 | be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == |
140 | (char *)dup - (char *)hdr); | 140 | (char *)dup - (char *)hdr); |
141 | dfp = xfs_dir2_data_freefind(hdr, bf, dup); | 141 | dfp = xfs_dir2_data_freefind(hdr, bf, dup); |
142 | if (dfp) { | 142 | if (dfp) { |
143 | i = (int)(dfp - bf); | 143 | i = (int)(dfp - bf); |
144 | XFS_WANT_CORRUPTED_RETURN( | 144 | XFS_WANT_CORRUPTED_RETURN(mp, |
145 | (freeseen & (1 << i)) == 0); | 145 | (freeseen & (1 << i)) == 0); |
146 | freeseen |= 1 << i; | 146 | freeseen |= 1 << i; |
147 | } else { | 147 | } else { |
148 | XFS_WANT_CORRUPTED_RETURN( | 148 | XFS_WANT_CORRUPTED_RETURN(mp, |
149 | be16_to_cpu(dup->length) <= | 149 | be16_to_cpu(dup->length) <= |
150 | be16_to_cpu(bf[2].length)); | 150 | be16_to_cpu(bf[2].length)); |
151 | } | 151 | } |
@@ -160,13 +160,13 @@ __xfs_dir3_data_check( | |||
160 | * The linear search is crude but this is DEBUG code. | 160 | * The linear search is crude but this is DEBUG code. |
161 | */ | 161 | */ |
162 | dep = (xfs_dir2_data_entry_t *)p; | 162 | dep = (xfs_dir2_data_entry_t *)p; |
163 | XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0); | 163 | XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); |
164 | XFS_WANT_CORRUPTED_RETURN( | 164 | XFS_WANT_CORRUPTED_RETURN(mp, |
165 | !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); | 165 | !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); |
166 | XFS_WANT_CORRUPTED_RETURN( | 166 | XFS_WANT_CORRUPTED_RETURN(mp, |
167 | be16_to_cpu(*ops->data_entry_tag_p(dep)) == | 167 | be16_to_cpu(*ops->data_entry_tag_p(dep)) == |
168 | (char *)dep - (char *)hdr); | 168 | (char *)dep - (char *)hdr); |
169 | XFS_WANT_CORRUPTED_RETURN( | 169 | XFS_WANT_CORRUPTED_RETURN(mp, |
170 | ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); | 170 | ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); |
171 | count++; | 171 | count++; |
172 | lastfree = 0; | 172 | lastfree = 0; |
@@ -183,14 +183,15 @@ __xfs_dir3_data_check( | |||
183 | be32_to_cpu(lep[i].hashval) == hash) | 183 | be32_to_cpu(lep[i].hashval) == hash) |
184 | break; | 184 | break; |
185 | } | 185 | } |
186 | XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count)); | 186 | XFS_WANT_CORRUPTED_RETURN(mp, |
187 | i < be32_to_cpu(btp->count)); | ||
187 | } | 188 | } |
188 | p += ops->data_entsize(dep->namelen); | 189 | p += ops->data_entsize(dep->namelen); |
189 | } | 190 | } |
190 | /* | 191 | /* |
191 | * Need to have seen all the entries and all the bestfree slots. | 192 | * Need to have seen all the entries and all the bestfree slots. |
192 | */ | 193 | */ |
193 | XFS_WANT_CORRUPTED_RETURN(freeseen == 7); | 194 | XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7); |
194 | if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || | 195 | if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || |
195 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { | 196 | hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { |
196 | for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { | 197 | for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { |
@@ -198,13 +199,13 @@ __xfs_dir3_data_check( | |||
198 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) | 199 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) |
199 | stale++; | 200 | stale++; |
200 | if (i > 0) | 201 | if (i > 0) |
201 | XFS_WANT_CORRUPTED_RETURN( | 202 | XFS_WANT_CORRUPTED_RETURN(mp, |
202 | be32_to_cpu(lep[i].hashval) >= | 203 | be32_to_cpu(lep[i].hashval) >= |
203 | be32_to_cpu(lep[i - 1].hashval)); | 204 | be32_to_cpu(lep[i - 1].hashval)); |
204 | } | 205 | } |
205 | XFS_WANT_CORRUPTED_RETURN(count == | 206 | XFS_WANT_CORRUPTED_RETURN(mp, count == |
206 | be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); | 207 | be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); |
207 | XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale)); | 208 | XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale)); |
208 | } | 209 | } |
209 | return 0; | 210 | return 0; |
210 | } | 211 | } |
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 8eb718979383..4daaa662337b 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h | |||
@@ -264,68 +264,6 @@ typedef struct xfs_dsb { | |||
264 | /* must be padded to 64 bit alignment */ | 264 | /* must be padded to 64 bit alignment */ |
265 | } xfs_dsb_t; | 265 | } xfs_dsb_t; |
266 | 266 | ||
267 | /* | ||
268 | * Sequence number values for the fields. | ||
269 | */ | ||
270 | typedef enum { | ||
271 | XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS, | ||
272 | XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO, | ||
273 | XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS, | ||
274 | XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS, | ||
275 | XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE, | ||
276 | XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG, | ||
277 | XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG, | ||
278 | XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT, | ||
279 | XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO, | ||
280 | XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN, | ||
281 | XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG, | ||
282 | XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT, | ||
283 | XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT, | ||
284 | XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT, | ||
285 | XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD, | ||
286 | XFS_SBS_PQUOTINO, XFS_SBS_LSN, | ||
287 | XFS_SBS_FIELDCOUNT | ||
288 | } xfs_sb_field_t; | ||
289 | |||
290 | /* | ||
291 | * Mask values, defined based on the xfs_sb_field_t values. | ||
292 | * Only define the ones we're using. | ||
293 | */ | ||
294 | #define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x) | ||
295 | #define XFS_SB_UUID XFS_SB_MVAL(UUID) | ||
296 | #define XFS_SB_FNAME XFS_SB_MVAL(FNAME) | ||
297 | #define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO) | ||
298 | #define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO) | ||
299 | #define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO) | ||
300 | #define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM) | ||
301 | #define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO) | ||
302 | #define XFS_SB_GQUOTINO XFS_SB_MVAL(GQUOTINO) | ||
303 | #define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS) | ||
304 | #define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN) | ||
305 | #define XFS_SB_UNIT XFS_SB_MVAL(UNIT) | ||
306 | #define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH) | ||
307 | #define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT) | ||
308 | #define XFS_SB_IFREE XFS_SB_MVAL(IFREE) | ||
309 | #define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) | ||
310 | #define XFS_SB_FEATURES2 (XFS_SB_MVAL(FEATURES2) | \ | ||
311 | XFS_SB_MVAL(BAD_FEATURES2)) | ||
312 | #define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT) | ||
313 | #define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT) | ||
314 | #define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT) | ||
315 | #define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT) | ||
316 | #define XFS_SB_CRC XFS_SB_MVAL(CRC) | ||
317 | #define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO) | ||
318 | #define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) | ||
319 | #define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) | ||
320 | #define XFS_SB_MOD_BITS \ | ||
321 | (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \ | ||
322 | XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ | ||
323 | XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ | ||
324 | XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ | ||
325 | XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \ | ||
326 | XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \ | ||
327 | XFS_SB_PQUOTINO) | ||
328 | |||
329 | 267 | ||
330 | /* | 268 | /* |
331 | * Misc. Flags - warning - these will be cleared by xfs_repair unless | 269 | * Misc. Flags - warning - these will be cleared by xfs_repair unless |
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 116ef1ddb3e3..07349a183a11 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c | |||
@@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc( | |||
376 | */ | 376 | */ |
377 | newlen = args.mp->m_ialloc_inos; | 377 | newlen = args.mp->m_ialloc_inos; |
378 | if (args.mp->m_maxicount && | 378 | if (args.mp->m_maxicount && |
379 | args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) | 379 | percpu_counter_read(&args.mp->m_icount) + newlen > |
380 | args.mp->m_maxicount) | ||
380 | return -ENOSPC; | 381 | return -ENOSPC; |
381 | args.minlen = args.maxlen = args.mp->m_ialloc_blks; | 382 | args.minlen = args.maxlen = args.mp->m_ialloc_blks; |
382 | /* | 383 | /* |
@@ -700,7 +701,7 @@ xfs_ialloc_next_rec( | |||
700 | error = xfs_inobt_get_rec(cur, rec, &i); | 701 | error = xfs_inobt_get_rec(cur, rec, &i); |
701 | if (error) | 702 | if (error) |
702 | return error; | 703 | return error; |
703 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 704 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); |
704 | } | 705 | } |
705 | 706 | ||
706 | return 0; | 707 | return 0; |
@@ -724,7 +725,7 @@ xfs_ialloc_get_rec( | |||
724 | error = xfs_inobt_get_rec(cur, rec, &i); | 725 | error = xfs_inobt_get_rec(cur, rec, &i); |
725 | if (error) | 726 | if (error) |
726 | return error; | 727 | return error; |
727 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 728 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); |
728 | } | 729 | } |
729 | 730 | ||
730 | return 0; | 731 | return 0; |
@@ -783,12 +784,12 @@ xfs_dialloc_ag_inobt( | |||
783 | error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); | 784 | error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); |
784 | if (error) | 785 | if (error) |
785 | goto error0; | 786 | goto error0; |
786 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 787 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
787 | 788 | ||
788 | error = xfs_inobt_get_rec(cur, &rec, &j); | 789 | error = xfs_inobt_get_rec(cur, &rec, &j); |
789 | if (error) | 790 | if (error) |
790 | goto error0; | 791 | goto error0; |
791 | XFS_WANT_CORRUPTED_GOTO(j == 1, error0); | 792 | XFS_WANT_CORRUPTED_GOTO(mp, j == 1, error0); |
792 | 793 | ||
793 | if (rec.ir_freecount > 0) { | 794 | if (rec.ir_freecount > 0) { |
794 | /* | 795 | /* |
@@ -944,19 +945,19 @@ newino: | |||
944 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | 945 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); |
945 | if (error) | 946 | if (error) |
946 | goto error0; | 947 | goto error0; |
947 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 948 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
948 | 949 | ||
949 | for (;;) { | 950 | for (;;) { |
950 | error = xfs_inobt_get_rec(cur, &rec, &i); | 951 | error = xfs_inobt_get_rec(cur, &rec, &i); |
951 | if (error) | 952 | if (error) |
952 | goto error0; | 953 | goto error0; |
953 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 954 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
954 | if (rec.ir_freecount > 0) | 955 | if (rec.ir_freecount > 0) |
955 | break; | 956 | break; |
956 | error = xfs_btree_increment(cur, 0, &i); | 957 | error = xfs_btree_increment(cur, 0, &i); |
957 | if (error) | 958 | if (error) |
958 | goto error0; | 959 | goto error0; |
959 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 960 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
960 | } | 961 | } |
961 | 962 | ||
962 | alloc_inode: | 963 | alloc_inode: |
@@ -1016,7 +1017,7 @@ xfs_dialloc_ag_finobt_near( | |||
1016 | error = xfs_inobt_get_rec(lcur, rec, &i); | 1017 | error = xfs_inobt_get_rec(lcur, rec, &i); |
1017 | if (error) | 1018 | if (error) |
1018 | return error; | 1019 | return error; |
1019 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 1020 | XFS_WANT_CORRUPTED_RETURN(lcur->bc_mp, i == 1); |
1020 | 1021 | ||
1021 | /* | 1022 | /* |
1022 | * See if we've landed in the parent inode record. The finobt | 1023 | * See if we've landed in the parent inode record. The finobt |
@@ -1039,10 +1040,10 @@ xfs_dialloc_ag_finobt_near( | |||
1039 | error = xfs_inobt_get_rec(rcur, &rrec, &j); | 1040 | error = xfs_inobt_get_rec(rcur, &rrec, &j); |
1040 | if (error) | 1041 | if (error) |
1041 | goto error_rcur; | 1042 | goto error_rcur; |
1042 | XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); | 1043 | XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, j == 1, error_rcur); |
1043 | } | 1044 | } |
1044 | 1045 | ||
1045 | XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); | 1046 | XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, i == 1 || j == 1, error_rcur); |
1046 | if (i == 1 && j == 1) { | 1047 | if (i == 1 && j == 1) { |
1047 | /* | 1048 | /* |
1048 | * Both the left and right records are valid. Choose the closer | 1049 | * Both the left and right records are valid. Choose the closer |
@@ -1095,7 +1096,7 @@ xfs_dialloc_ag_finobt_newino( | |||
1095 | error = xfs_inobt_get_rec(cur, rec, &i); | 1096 | error = xfs_inobt_get_rec(cur, rec, &i); |
1096 | if (error) | 1097 | if (error) |
1097 | return error; | 1098 | return error; |
1098 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 1099 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); |
1099 | return 0; | 1100 | return 0; |
1100 | } | 1101 | } |
1101 | } | 1102 | } |
@@ -1106,12 +1107,12 @@ xfs_dialloc_ag_finobt_newino( | |||
1106 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | 1107 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); |
1107 | if (error) | 1108 | if (error) |
1108 | return error; | 1109 | return error; |
1109 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 1110 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); |
1110 | 1111 | ||
1111 | error = xfs_inobt_get_rec(cur, rec, &i); | 1112 | error = xfs_inobt_get_rec(cur, rec, &i); |
1112 | if (error) | 1113 | if (error) |
1113 | return error; | 1114 | return error; |
1114 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 1115 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); |
1115 | 1116 | ||
1116 | return 0; | 1117 | return 0; |
1117 | } | 1118 | } |
@@ -1133,19 +1134,19 @@ xfs_dialloc_ag_update_inobt( | |||
1133 | error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); | 1134 | error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); |
1134 | if (error) | 1135 | if (error) |
1135 | return error; | 1136 | return error; |
1136 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 1137 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); |
1137 | 1138 | ||
1138 | error = xfs_inobt_get_rec(cur, &rec, &i); | 1139 | error = xfs_inobt_get_rec(cur, &rec, &i); |
1139 | if (error) | 1140 | if (error) |
1140 | return error; | 1141 | return error; |
1141 | XFS_WANT_CORRUPTED_RETURN(i == 1); | 1142 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); |
1142 | ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % | 1143 | ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % |
1143 | XFS_INODES_PER_CHUNK) == 0); | 1144 | XFS_INODES_PER_CHUNK) == 0); |
1144 | 1145 | ||
1145 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | 1146 | rec.ir_free &= ~XFS_INOBT_MASK(offset); |
1146 | rec.ir_freecount--; | 1147 | rec.ir_freecount--; |
1147 | 1148 | ||
1148 | XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && | 1149 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, (rec.ir_free == frec->ir_free) && |
1149 | (rec.ir_freecount == frec->ir_freecount)); | 1150 | (rec.ir_freecount == frec->ir_freecount)); |
1150 | 1151 | ||
1151 | return xfs_inobt_update(cur, &rec); | 1152 | return xfs_inobt_update(cur, &rec); |
@@ -1340,7 +1341,8 @@ xfs_dialloc( | |||
1340 | * inode. | 1341 | * inode. |
1341 | */ | 1342 | */ |
1342 | if (mp->m_maxicount && | 1343 | if (mp->m_maxicount && |
1343 | mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { | 1344 | percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos > |
1345 | mp->m_maxicount) { | ||
1344 | noroom = 1; | 1346 | noroom = 1; |
1345 | okalloc = 0; | 1347 | okalloc = 0; |
1346 | } | 1348 | } |
@@ -1475,14 +1477,14 @@ xfs_difree_inobt( | |||
1475 | __func__, error); | 1477 | __func__, error); |
1476 | goto error0; | 1478 | goto error0; |
1477 | } | 1479 | } |
1478 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1480 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1479 | error = xfs_inobt_get_rec(cur, &rec, &i); | 1481 | error = xfs_inobt_get_rec(cur, &rec, &i); |
1480 | if (error) { | 1482 | if (error) { |
1481 | xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", | 1483 | xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", |
1482 | __func__, error); | 1484 | __func__, error); |
1483 | goto error0; | 1485 | goto error0; |
1484 | } | 1486 | } |
1485 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1487 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); |
1486 | /* | 1488 | /* |
1487 | * Get the offset in the inode chunk. | 1489 | * Get the offset in the inode chunk. |
1488 | */ | 1490 | */ |
@@ -1592,7 +1594,7 @@ xfs_difree_finobt( | |||
1592 | * freed an inode in a previously fully allocated chunk. If not, | 1594 | * freed an inode in a previously fully allocated chunk. If not, |
1593 | * something is out of sync. | 1595 | * something is out of sync. |
1594 | */ | 1596 | */ |
1595 | XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); | 1597 | XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error); |
1596 | 1598 | ||
1597 | error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, | 1599 | error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, |
1598 | ibtrec->ir_free, &i); | 1600 | ibtrec->ir_free, &i); |
@@ -1613,12 +1615,12 @@ xfs_difree_finobt( | |||
1613 | error = xfs_inobt_get_rec(cur, &rec, &i); | 1615 | error = xfs_inobt_get_rec(cur, &rec, &i); |
1614 | if (error) | 1616 | if (error) |
1615 | goto error; | 1617 | goto error; |
1616 | XFS_WANT_CORRUPTED_GOTO(i == 1, error); | 1618 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error); |
1617 | 1619 | ||
1618 | rec.ir_free |= XFS_INOBT_MASK(offset); | 1620 | rec.ir_free |= XFS_INOBT_MASK(offset); |
1619 | rec.ir_freecount++; | 1621 | rec.ir_freecount++; |
1620 | 1622 | ||
1621 | XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && | 1623 | XFS_WANT_CORRUPTED_GOTO(mp, (rec.ir_free == ibtrec->ir_free) && |
1622 | (rec.ir_freecount == ibtrec->ir_freecount), | 1624 | (rec.ir_freecount == ibtrec->ir_freecount), |
1623 | error); | 1625 | error); |
1624 | 1626 | ||
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index b0a5fe95a3e2..dc4bfc5d88fc 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c | |||
@@ -111,14 +111,6 @@ xfs_mount_validate_sb( | |||
111 | bool check_inprogress, | 111 | bool check_inprogress, |
112 | bool check_version) | 112 | bool check_version) |
113 | { | 113 | { |
114 | |||
115 | /* | ||
116 | * If the log device and data device have the | ||
117 | * same device number, the log is internal. | ||
118 | * Consequently, the sb_logstart should be non-zero. If | ||
119 | * we have a zero sb_logstart in this case, we may be trying to mount | ||
120 | * a volume filesystem in a non-volume manner. | ||
121 | */ | ||
122 | if (sbp->sb_magicnum != XFS_SB_MAGIC) { | 114 | if (sbp->sb_magicnum != XFS_SB_MAGIC) { |
123 | xfs_warn(mp, "bad magic number"); | 115 | xfs_warn(mp, "bad magic number"); |
124 | return -EWRONGFS; | 116 | return -EWRONGFS; |
@@ -743,17 +735,15 @@ xfs_initialize_perag_data( | |||
743 | btree += pag->pagf_btreeblks; | 735 | btree += pag->pagf_btreeblks; |
744 | xfs_perag_put(pag); | 736 | xfs_perag_put(pag); |
745 | } | 737 | } |
746 | /* | 738 | |
747 | * Overwrite incore superblock counters with just-read data | 739 | /* Overwrite incore superblock counters with just-read data */ |
748 | */ | ||
749 | spin_lock(&mp->m_sb_lock); | 740 | spin_lock(&mp->m_sb_lock); |
750 | sbp->sb_ifree = ifree; | 741 | sbp->sb_ifree = ifree; |
751 | sbp->sb_icount = ialloc; | 742 | sbp->sb_icount = ialloc; |
752 | sbp->sb_fdblocks = bfree + bfreelst + btree; | 743 | sbp->sb_fdblocks = bfree + bfreelst + btree; |
753 | spin_unlock(&mp->m_sb_lock); | 744 | spin_unlock(&mp->m_sb_lock); |
754 | 745 | ||
755 | /* Fixup the per-cpu counters as well. */ | 746 | xfs_reinit_percpu_counters(mp); |
756 | xfs_icsb_reinit_counters(mp); | ||
757 | 747 | ||
758 | return 0; | 748 | return 0; |
759 | } | 749 | } |
@@ -771,6 +761,10 @@ xfs_log_sb( | |||
771 | struct xfs_mount *mp = tp->t_mountp; | 761 | struct xfs_mount *mp = tp->t_mountp; |
772 | struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); | 762 | struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); |
773 | 763 | ||
764 | mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); | ||
765 | mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); | ||
766 | mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); | ||
767 | |||
774 | xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); | 768 | xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); |
775 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); | 769 | xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); |
776 | xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); | 770 | xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 1d8eef9cf0f5..a56960dd1684 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -1232,6 +1232,117 @@ xfs_vm_releasepage( | |||
1232 | return try_to_free_buffers(page); | 1232 | return try_to_free_buffers(page); |
1233 | } | 1233 | } |
1234 | 1234 | ||
1235 | /* | ||
1236 | * When we map a DIO buffer, we may need to attach an ioend that describes the | ||
1237 | * type of write IO we are doing. This passes to the completion function the | ||
1238 | * operations it needs to perform. If the mapping is for an overwrite wholly | ||
1239 | * within the EOF then we don't need an ioend and so we don't allocate one. | ||
1240 | * This avoids the unnecessary overhead of allocating and freeing ioends for | ||
1241 | * workloads that don't require transactions on IO completion. | ||
1242 | * | ||
1243 | * If we get multiple mappings in a single IO, we might be mapping different | ||
1244 | * types. But because the direct IO can only have a single private pointer, we | ||
1245 | * need to ensure that: | ||
1246 | * | ||
1247 | * a) i) the ioend spans the entire region of unwritten mappings; or | ||
1248 | * ii) the ioend spans all the mappings that cross or are beyond EOF; and | ||
1249 | * b) if it contains unwritten extents, it is *permanently* marked as such | ||
1250 | * | ||
1251 | * We could do this by chaining ioends like buffered IO does, but we only | ||
1252 | * actually get one IO completion callback from the direct IO, and that spans | ||
1253 | * the entire IO regardless of how many mappings and IOs are needed to complete | ||
1254 | * the DIO. There is only going to be one reference to the ioend and its life | ||
1255 | * cycle is constrained by the DIO completion code. hence we don't need | ||
1256 | * reference counting here. | ||
1257 | */ | ||
1258 | static void | ||
1259 | xfs_map_direct( | ||
1260 | struct inode *inode, | ||
1261 | struct buffer_head *bh_result, | ||
1262 | struct xfs_bmbt_irec *imap, | ||
1263 | xfs_off_t offset) | ||
1264 | { | ||
1265 | struct xfs_ioend *ioend; | ||
1266 | xfs_off_t size = bh_result->b_size; | ||
1267 | int type; | ||
1268 | |||
1269 | if (ISUNWRITTEN(imap)) | ||
1270 | type = XFS_IO_UNWRITTEN; | ||
1271 | else | ||
1272 | type = XFS_IO_OVERWRITE; | ||
1273 | |||
1274 | trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); | ||
1275 | |||
1276 | if (bh_result->b_private) { | ||
1277 | ioend = bh_result->b_private; | ||
1278 | ASSERT(ioend->io_size > 0); | ||
1279 | ASSERT(offset >= ioend->io_offset); | ||
1280 | if (offset + size > ioend->io_offset + ioend->io_size) | ||
1281 | ioend->io_size = offset - ioend->io_offset + size; | ||
1282 | |||
1283 | if (type == XFS_IO_UNWRITTEN && type != ioend->io_type) | ||
1284 | ioend->io_type = XFS_IO_UNWRITTEN; | ||
1285 | |||
1286 | trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset, | ||
1287 | ioend->io_size, ioend->io_type, | ||
1288 | imap); | ||
1289 | } else if (type == XFS_IO_UNWRITTEN || | ||
1290 | offset + size > i_size_read(inode)) { | ||
1291 | ioend = xfs_alloc_ioend(inode, type); | ||
1292 | ioend->io_offset = offset; | ||
1293 | ioend->io_size = size; | ||
1294 | |||
1295 | bh_result->b_private = ioend; | ||
1296 | set_buffer_defer_completion(bh_result); | ||
1297 | |||
1298 | trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type, | ||
1299 | imap); | ||
1300 | } else { | ||
1301 | trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, | ||
1302 | imap); | ||
1303 | } | ||
1304 | } | ||
1305 | |||
1306 | /* | ||
1307 | * If this is O_DIRECT or the mpage code calling tell them how large the mapping | ||
1308 | * is, so that we can avoid repeated get_blocks calls. | ||
1309 | * | ||
1310 | * If the mapping spans EOF, then we have to break the mapping up as the mapping | ||
1311 | * for blocks beyond EOF must be marked new so that sub block regions can be | ||
1312 | * correctly zeroed. We can't do this for mappings within EOF unless the mapping | ||
1313 | * was just allocated or is unwritten, otherwise the callers would overwrite | ||
1314 | * existing data with zeros. Hence we have to split the mapping into a range up | ||
1315 | * to and including EOF, and a second mapping for beyond EOF. | ||
1316 | */ | ||
1317 | static void | ||
1318 | xfs_map_trim_size( | ||
1319 | struct inode *inode, | ||
1320 | sector_t iblock, | ||
1321 | struct buffer_head *bh_result, | ||
1322 | struct xfs_bmbt_irec *imap, | ||
1323 | xfs_off_t offset, | ||
1324 | ssize_t size) | ||
1325 | { | ||
1326 | xfs_off_t mapping_size; | ||
1327 | |||
1328 | mapping_size = imap->br_startoff + imap->br_blockcount - iblock; | ||
1329 | mapping_size <<= inode->i_blkbits; | ||
1330 | |||
1331 | ASSERT(mapping_size > 0); | ||
1332 | if (mapping_size > size) | ||
1333 | mapping_size = size; | ||
1334 | if (offset < i_size_read(inode) && | ||
1335 | offset + mapping_size >= i_size_read(inode)) { | ||
1336 | /* limit mapping to block that spans EOF */ | ||
1337 | mapping_size = roundup_64(i_size_read(inode) - offset, | ||
1338 | 1 << inode->i_blkbits); | ||
1339 | } | ||
1340 | if (mapping_size > LONG_MAX) | ||
1341 | mapping_size = LONG_MAX; | ||
1342 | |||
1343 | bh_result->b_size = mapping_size; | ||
1344 | } | ||
1345 | |||
1235 | STATIC int | 1346 | STATIC int |
1236 | __xfs_get_blocks( | 1347 | __xfs_get_blocks( |
1237 | struct inode *inode, | 1348 | struct inode *inode, |
@@ -1320,31 +1431,37 @@ __xfs_get_blocks( | |||
1320 | 1431 | ||
1321 | xfs_iunlock(ip, lockmode); | 1432 | xfs_iunlock(ip, lockmode); |
1322 | } | 1433 | } |
1323 | 1434 | trace_xfs_get_blocks_alloc(ip, offset, size, | |
1324 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); | 1435 | ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN |
1436 | : XFS_IO_DELALLOC, &imap); | ||
1325 | } else if (nimaps) { | 1437 | } else if (nimaps) { |
1326 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); | 1438 | trace_xfs_get_blocks_found(ip, offset, size, |
1439 | ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN | ||
1440 | : XFS_IO_OVERWRITE, &imap); | ||
1327 | xfs_iunlock(ip, lockmode); | 1441 | xfs_iunlock(ip, lockmode); |
1328 | } else { | 1442 | } else { |
1329 | trace_xfs_get_blocks_notfound(ip, offset, size); | 1443 | trace_xfs_get_blocks_notfound(ip, offset, size); |
1330 | goto out_unlock; | 1444 | goto out_unlock; |
1331 | } | 1445 | } |
1332 | 1446 | ||
1447 | /* trim mapping down to size requested */ | ||
1448 | if (direct || size > (1 << inode->i_blkbits)) | ||
1449 | xfs_map_trim_size(inode, iblock, bh_result, | ||
1450 | &imap, offset, size); | ||
1451 | |||
1452 | /* | ||
1453 | * For unwritten extents do not report a disk address in the buffered | ||
1454 | * read case (treat as if we're reading into a hole). | ||
1455 | */ | ||
1333 | if (imap.br_startblock != HOLESTARTBLOCK && | 1456 | if (imap.br_startblock != HOLESTARTBLOCK && |
1334 | imap.br_startblock != DELAYSTARTBLOCK) { | 1457 | imap.br_startblock != DELAYSTARTBLOCK && |
1335 | /* | 1458 | (create || !ISUNWRITTEN(&imap))) { |
1336 | * For unwritten extents do not report a disk address on | 1459 | xfs_map_buffer(inode, bh_result, &imap, offset); |
1337 | * the read case (treat as if we're reading into a hole). | 1460 | if (ISUNWRITTEN(&imap)) |
1338 | */ | ||
1339 | if (create || !ISUNWRITTEN(&imap)) | ||
1340 | xfs_map_buffer(inode, bh_result, &imap, offset); | ||
1341 | if (create && ISUNWRITTEN(&imap)) { | ||
1342 | if (direct) { | ||
1343 | bh_result->b_private = inode; | ||
1344 | set_buffer_defer_completion(bh_result); | ||
1345 | } | ||
1346 | set_buffer_unwritten(bh_result); | 1461 | set_buffer_unwritten(bh_result); |
1347 | } | 1462 | /* direct IO needs special help */ |
1463 | if (create && direct) | ||
1464 | xfs_map_direct(inode, bh_result, &imap, offset); | ||
1348 | } | 1465 | } |
1349 | 1466 | ||
1350 | /* | 1467 | /* |
@@ -1377,39 +1494,6 @@ __xfs_get_blocks( | |||
1377 | } | 1494 | } |
1378 | } | 1495 | } |
1379 | 1496 | ||
1380 | /* | ||
1381 | * If this is O_DIRECT or the mpage code calling tell them how large | ||
1382 | * the mapping is, so that we can avoid repeated get_blocks calls. | ||
1383 | * | ||
1384 | * If the mapping spans EOF, then we have to break the mapping up as the | ||
1385 | * mapping for blocks beyond EOF must be marked new so that sub block | ||
1386 | * regions can be correctly zeroed. We can't do this for mappings within | ||
1387 | * EOF unless the mapping was just allocated or is unwritten, otherwise | ||
1388 | * the callers would overwrite existing data with zeros. Hence we have | ||
1389 | * to split the mapping into a range up to and including EOF, and a | ||
1390 | * second mapping for beyond EOF. | ||
1391 | */ | ||
1392 | if (direct || size > (1 << inode->i_blkbits)) { | ||
1393 | xfs_off_t mapping_size; | ||
1394 | |||
1395 | mapping_size = imap.br_startoff + imap.br_blockcount - iblock; | ||
1396 | mapping_size <<= inode->i_blkbits; | ||
1397 | |||
1398 | ASSERT(mapping_size > 0); | ||
1399 | if (mapping_size > size) | ||
1400 | mapping_size = size; | ||
1401 | if (offset < i_size_read(inode) && | ||
1402 | offset + mapping_size >= i_size_read(inode)) { | ||
1403 | /* limit mapping to block that spans EOF */ | ||
1404 | mapping_size = roundup_64(i_size_read(inode) - offset, | ||
1405 | 1 << inode->i_blkbits); | ||
1406 | } | ||
1407 | if (mapping_size > LONG_MAX) | ||
1408 | mapping_size = LONG_MAX; | ||
1409 | |||
1410 | bh_result->b_size = mapping_size; | ||
1411 | } | ||
1412 | |||
1413 | return 0; | 1497 | return 0; |
1414 | 1498 | ||
1415 | out_unlock: | 1499 | out_unlock: |
@@ -1440,9 +1524,11 @@ xfs_get_blocks_direct( | |||
1440 | /* | 1524 | /* |
1441 | * Complete a direct I/O write request. | 1525 | * Complete a direct I/O write request. |
1442 | * | 1526 | * |
1443 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | 1527 | * The ioend structure is passed from __xfs_get_blocks() to tell us what to do. |
1444 | * need to issue a transaction to convert the range from unwritten to written | 1528 | * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite |
1445 | * extents. | 1529 | * wholly within the EOF and so there is nothing for us to do. Note that in this |
1530 | * case the completion can be called in interrupt context, whereas if we have an | ||
1531 | * ioend we will always be called in task context (i.e. from a workqueue). | ||
1446 | */ | 1532 | */ |
1447 | STATIC void | 1533 | STATIC void |
1448 | xfs_end_io_direct_write( | 1534 | xfs_end_io_direct_write( |
@@ -1454,43 +1540,71 @@ xfs_end_io_direct_write( | |||
1454 | struct inode *inode = file_inode(iocb->ki_filp); | 1540 | struct inode *inode = file_inode(iocb->ki_filp); |
1455 | struct xfs_inode *ip = XFS_I(inode); | 1541 | struct xfs_inode *ip = XFS_I(inode); |
1456 | struct xfs_mount *mp = ip->i_mount; | 1542 | struct xfs_mount *mp = ip->i_mount; |
1543 | struct xfs_ioend *ioend = private; | ||
1457 | 1544 | ||
1458 | if (XFS_FORCED_SHUTDOWN(mp)) | 1545 | trace_xfs_gbmap_direct_endio(ip, offset, size, |
1546 | ioend ? ioend->io_type : 0, NULL); | ||
1547 | |||
1548 | if (!ioend) { | ||
1549 | ASSERT(offset + size <= i_size_read(inode)); | ||
1459 | return; | 1550 | return; |
1551 | } | ||
1552 | |||
1553 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1554 | goto out_end_io; | ||
1460 | 1555 | ||
1461 | /* | 1556 | /* |
1462 | * While the generic direct I/O code updates the inode size, it does | 1557 | * dio completion end_io functions are only called on writes if more |
1463 | * so only after the end_io handler is called, which means our | 1558 | * than 0 bytes was written. |
1464 | * end_io handler thinks the on-disk size is outside the in-core | ||
1465 | * size. To prevent this just update it a little bit earlier here. | ||
1466 | */ | 1559 | */ |
1560 | ASSERT(size > 0); | ||
1561 | |||
1562 | /* | ||
1563 | * The ioend only maps whole blocks, while the IO may be sector aligned. | ||
1564 | * Hence the ioend offset/size may not match the IO offset/size exactly. | ||
1565 | * Because we don't map overwrites within EOF into the ioend, the offset | ||
1566 | * may not match, but only if the endio spans EOF. Either way, write | ||
1567 | * the IO sizes into the ioend so that completion processing does the | ||
1568 | * right thing. | ||
1569 | */ | ||
1570 | ASSERT(offset + size <= ioend->io_offset + ioend->io_size); | ||
1571 | ioend->io_size = size; | ||
1572 | ioend->io_offset = offset; | ||
1573 | |||
1574 | /* | ||
1575 | * The ioend tells us whether we are doing unwritten extent conversion | ||
1576 | * or an append transaction that updates the on-disk file size. These | ||
1577 | * cases are the only cases where we should *potentially* be needing | ||
1578 | * to update the VFS inode size. | ||
1579 | * | ||
1580 | * We need to update the in-core inode size here so that we don't end up | ||
1581 | * with the on-disk inode size being outside the in-core inode size. We | ||
1582 | * have no other method of updating EOF for AIO, so always do it here | ||
1583 | * if necessary. | ||
1584 | * | ||
1585 | * We need to lock the test/set EOF update as we can be racing with | ||
1586 | * other IO completions here to update the EOF. Failing to serialise | ||
1587 | * here can result in EOF moving backwards and Bad Things Happen when | ||
1588 | * that occurs. | ||
1589 | */ | ||
1590 | spin_lock(&ip->i_flags_lock); | ||
1467 | if (offset + size > i_size_read(inode)) | 1591 | if (offset + size > i_size_read(inode)) |
1468 | i_size_write(inode, offset + size); | 1592 | i_size_write(inode, offset + size); |
1593 | spin_unlock(&ip->i_flags_lock); | ||
1469 | 1594 | ||
1470 | /* | 1595 | /* |
1471 | * For direct I/O we do not know if we need to allocate blocks or not, | 1596 | * If we are doing an append IO that needs to update the EOF on disk, |
1472 | * so we can't preallocate an append transaction, as that results in | 1597 | * do the transaction reserve now so we can use common end io |
1473 | * nested reservations and log space deadlocks. Hence allocate the | 1598 | * processing. Stashing the error (if there is one) in the ioend will |
1474 | * transaction here. While this is sub-optimal and can block IO | 1599 | * result in the ioend processing passing on the error if it is |
1475 | * completion for some time, we're stuck with doing it this way until | 1600 | * possible as we can't return it from here. |
1476 | * we can pass the ioend to the direct IO allocation callbacks and | ||
1477 | * avoid nesting that way. | ||
1478 | */ | 1601 | */ |
1479 | if (private && size > 0) { | 1602 | if (ioend->io_type == XFS_IO_OVERWRITE) |
1480 | xfs_iomap_write_unwritten(ip, offset, size); | 1603 | ioend->io_error = xfs_setfilesize_trans_alloc(ioend); |
1481 | } else if (offset + size > ip->i_d.di_size) { | ||
1482 | struct xfs_trans *tp; | ||
1483 | int error; | ||
1484 | |||
1485 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | ||
1486 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); | ||
1487 | if (error) { | ||
1488 | xfs_trans_cancel(tp, 0); | ||
1489 | return; | ||
1490 | } | ||
1491 | 1604 | ||
1492 | xfs_setfilesize(ip, tp, offset, size); | 1605 | out_end_io: |
1493 | } | 1606 | xfs_end_io(&ioend->io_work); |
1607 | return; | ||
1494 | } | 1608 | } |
1495 | 1609 | ||
1496 | STATIC ssize_t | 1610 | STATIC ssize_t |
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 83af4c149635..f9c1c64782d3 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c | |||
@@ -132,9 +132,10 @@ xfs_attr3_leaf_inactive( | |||
132 | int size; | 132 | int size; |
133 | int tmp; | 133 | int tmp; |
134 | int i; | 134 | int i; |
135 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
135 | 136 | ||
136 | leaf = bp->b_addr; | 137 | leaf = bp->b_addr; |
137 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | 138 | xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); |
138 | 139 | ||
139 | /* | 140 | /* |
140 | * Count the number of "remote" value extents. | 141 | * Count the number of "remote" value extents. |
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index a43d370d2c58..65fb37a18e92 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c | |||
@@ -225,6 +225,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) | |||
225 | int error, i; | 225 | int error, i; |
226 | struct xfs_buf *bp; | 226 | struct xfs_buf *bp; |
227 | struct xfs_inode *dp = context->dp; | 227 | struct xfs_inode *dp = context->dp; |
228 | struct xfs_mount *mp = dp->i_mount; | ||
228 | 229 | ||
229 | trace_xfs_attr_node_list(context); | 230 | trace_xfs_attr_node_list(context); |
230 | 231 | ||
@@ -256,7 +257,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) | |||
256 | case XFS_ATTR_LEAF_MAGIC: | 257 | case XFS_ATTR_LEAF_MAGIC: |
257 | case XFS_ATTR3_LEAF_MAGIC: | 258 | case XFS_ATTR3_LEAF_MAGIC: |
258 | leaf = bp->b_addr; | 259 | leaf = bp->b_addr; |
259 | xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); | 260 | xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, |
261 | &leafhdr, leaf); | ||
260 | entries = xfs_attr3_leaf_entryp(leaf); | 262 | entries = xfs_attr3_leaf_entryp(leaf); |
261 | if (cursor->hashval > be32_to_cpu( | 263 | if (cursor->hashval > be32_to_cpu( |
262 | entries[leafhdr.count - 1].hashval)) { | 264 | entries[leafhdr.count - 1].hashval)) { |
@@ -340,7 +342,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) | |||
340 | xfs_trans_brelse(NULL, bp); | 342 | xfs_trans_brelse(NULL, bp); |
341 | return error; | 343 | return error; |
342 | } | 344 | } |
343 | xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); | 345 | xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); |
344 | if (context->seen_enough || leafhdr.forw == 0) | 346 | if (context->seen_enough || leafhdr.forw == 0) |
345 | break; | 347 | break; |
346 | cursor->blkno = leafhdr.forw; | 348 | cursor->blkno = leafhdr.forw; |
@@ -368,11 +370,12 @@ xfs_attr3_leaf_list_int( | |||
368 | struct xfs_attr_leaf_entry *entry; | 370 | struct xfs_attr_leaf_entry *entry; |
369 | int retval; | 371 | int retval; |
370 | int i; | 372 | int i; |
373 | struct xfs_mount *mp = context->dp->i_mount; | ||
371 | 374 | ||
372 | trace_xfs_attr_list_leaf(context); | 375 | trace_xfs_attr_list_leaf(context); |
373 | 376 | ||
374 | leaf = bp->b_addr; | 377 | leaf = bp->b_addr; |
375 | xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); | 378 | xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); |
376 | entries = xfs_attr3_leaf_entryp(leaf); | 379 | entries = xfs_attr3_leaf_entryp(leaf); |
377 | 380 | ||
378 | cursor = context->cursor; | 381 | cursor = context->cursor; |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 22a5dcb70b32..a52bbd3abc7d 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -1376,22 +1376,19 @@ out: | |||
1376 | } | 1376 | } |
1377 | 1377 | ||
1378 | /* | 1378 | /* |
1379 | * xfs_collapse_file_space() | 1379 | * @next_fsb will keep track of the extent currently undergoing shift. |
1380 | * This routine frees disk space and shift extent for the given file. | 1380 | * @stop_fsb will keep track of the extent at which we have to stop. |
1381 | * The first thing we do is to free data blocks in the specified range | 1381 | * If we are shifting left, we will start with block (offset + len) and |
1382 | * by calling xfs_free_file_space(). It would also sync dirty data | 1382 | * shift each extent till last extent. |
1383 | * and invalidate page cache over the region on which collapse range | 1383 | * If we are shifting right, we will start with last extent inside file space |
1384 | * is working. And Shift extent records to the left to cover a hole. | 1384 | * and continue until we reach the block corresponding to offset. |
1385 | * RETURNS: | ||
1386 | * 0 on success | ||
1387 | * errno on error | ||
1388 | * | ||
1389 | */ | 1385 | */ |
1390 | int | 1386 | static int |
1391 | xfs_collapse_file_space( | 1387 | xfs_shift_file_space( |
1392 | struct xfs_inode *ip, | 1388 | struct xfs_inode *ip, |
1393 | xfs_off_t offset, | 1389 | xfs_off_t offset, |
1394 | xfs_off_t len) | 1390 | xfs_off_t len, |
1391 | enum shift_direction direction) | ||
1395 | { | 1392 | { |
1396 | int done = 0; | 1393 | int done = 0; |
1397 | struct xfs_mount *mp = ip->i_mount; | 1394 | struct xfs_mount *mp = ip->i_mount; |
@@ -1400,21 +1397,26 @@ xfs_collapse_file_space( | |||
1400 | struct xfs_bmap_free free_list; | 1397 | struct xfs_bmap_free free_list; |
1401 | xfs_fsblock_t first_block; | 1398 | xfs_fsblock_t first_block; |
1402 | int committed; | 1399 | int committed; |
1403 | xfs_fileoff_t start_fsb; | 1400 | xfs_fileoff_t stop_fsb; |
1404 | xfs_fileoff_t next_fsb; | 1401 | xfs_fileoff_t next_fsb; |
1405 | xfs_fileoff_t shift_fsb; | 1402 | xfs_fileoff_t shift_fsb; |
1406 | 1403 | ||
1407 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | 1404 | ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); |
1408 | 1405 | ||
1409 | trace_xfs_collapse_file_space(ip); | 1406 | if (direction == SHIFT_LEFT) { |
1407 | next_fsb = XFS_B_TO_FSB(mp, offset + len); | ||
1408 | stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); | ||
1409 | } else { | ||
1410 | /* | ||
1411 | * If right shift, delegate the work of initialization of | ||
1412 | * next_fsb to xfs_bmap_shift_extent as it has ilock held. | ||
1413 | */ | ||
1414 | next_fsb = NULLFSBLOCK; | ||
1415 | stop_fsb = XFS_B_TO_FSB(mp, offset); | ||
1416 | } | ||
1410 | 1417 | ||
1411 | next_fsb = XFS_B_TO_FSB(mp, offset + len); | ||
1412 | shift_fsb = XFS_B_TO_FSB(mp, len); | 1418 | shift_fsb = XFS_B_TO_FSB(mp, len); |
1413 | 1419 | ||
1414 | error = xfs_free_file_space(ip, offset, len); | ||
1415 | if (error) | ||
1416 | return error; | ||
1417 | |||
1418 | /* | 1420 | /* |
1419 | * Trim eofblocks to avoid shifting uninitialized post-eof preallocation | 1421 | * Trim eofblocks to avoid shifting uninitialized post-eof preallocation |
1420 | * into the accessible region of the file. | 1422 | * into the accessible region of the file. |
@@ -1427,20 +1429,28 @@ xfs_collapse_file_space( | |||
1427 | 1429 | ||
1428 | /* | 1430 | /* |
1429 | * Writeback and invalidate cache for the remainder of the file as we're | 1431 | * Writeback and invalidate cache for the remainder of the file as we're |
1430 | * about to shift down every extent from the collapse range to EOF. The | 1432 | * about to shift down every extent from offset to EOF. |
1431 | * free of the collapse range above might have already done some of | ||
1432 | * this, but we shouldn't rely on it to do anything outside of the range | ||
1433 | * that was freed. | ||
1434 | */ | 1433 | */ |
1435 | error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | 1434 | error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
1436 | offset + len, -1); | 1435 | offset, -1); |
1437 | if (error) | 1436 | if (error) |
1438 | return error; | 1437 | return error; |
1439 | error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, | 1438 | error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, |
1440 | (offset + len) >> PAGE_CACHE_SHIFT, -1); | 1439 | offset >> PAGE_CACHE_SHIFT, -1); |
1441 | if (error) | 1440 | if (error) |
1442 | return error; | 1441 | return error; |
1443 | 1442 | ||
1443 | /* | ||
1444 | * The extent shiting code works on extent granularity. So, if | ||
1445 | * stop_fsb is not the starting block of extent, we need to split | ||
1446 | * the extent at stop_fsb. | ||
1447 | */ | ||
1448 | if (direction == SHIFT_RIGHT) { | ||
1449 | error = xfs_bmap_split_extent(ip, stop_fsb); | ||
1450 | if (error) | ||
1451 | return error; | ||
1452 | } | ||
1453 | |||
1444 | while (!error && !done) { | 1454 | while (!error && !done) { |
1445 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); | 1455 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); |
1446 | /* | 1456 | /* |
@@ -1464,7 +1474,7 @@ xfs_collapse_file_space( | |||
1464 | if (error) | 1474 | if (error) |
1465 | goto out; | 1475 | goto out; |
1466 | 1476 | ||
1467 | xfs_trans_ijoin(tp, ip, 0); | 1477 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
1468 | 1478 | ||
1469 | xfs_bmap_init(&free_list, &first_block); | 1479 | xfs_bmap_init(&free_list, &first_block); |
1470 | 1480 | ||
@@ -1472,10 +1482,9 @@ xfs_collapse_file_space( | |||
1472 | * We are using the write transaction in which max 2 bmbt | 1482 | * We are using the write transaction in which max 2 bmbt |
1473 | * updates are allowed | 1483 | * updates are allowed |
1474 | */ | 1484 | */ |
1475 | start_fsb = next_fsb; | 1485 | error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb, |
1476 | error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb, | 1486 | &done, stop_fsb, &first_block, &free_list, |
1477 | &done, &next_fsb, &first_block, &free_list, | 1487 | direction, XFS_BMAP_MAX_SHIFT_EXTENTS); |
1478 | XFS_BMAP_MAX_SHIFT_EXTENTS); | ||
1479 | if (error) | 1488 | if (error) |
1480 | goto out; | 1489 | goto out; |
1481 | 1490 | ||
@@ -1484,18 +1493,70 @@ xfs_collapse_file_space( | |||
1484 | goto out; | 1493 | goto out; |
1485 | 1494 | ||
1486 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 1495 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
1487 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1488 | } | 1496 | } |
1489 | 1497 | ||
1490 | return error; | 1498 | return error; |
1491 | 1499 | ||
1492 | out: | 1500 | out: |
1493 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | 1501 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |
1494 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1495 | return error; | 1502 | return error; |
1496 | } | 1503 | } |
1497 | 1504 | ||
1498 | /* | 1505 | /* |
1506 | * xfs_collapse_file_space() | ||
1507 | * This routine frees disk space and shift extent for the given file. | ||
1508 | * The first thing we do is to free data blocks in the specified range | ||
1509 | * by calling xfs_free_file_space(). It would also sync dirty data | ||
1510 | * and invalidate page cache over the region on which collapse range | ||
1511 | * is working. And Shift extent records to the left to cover a hole. | ||
1512 | * RETURNS: | ||
1513 | * 0 on success | ||
1514 | * errno on error | ||
1515 | * | ||
1516 | */ | ||
1517 | int | ||
1518 | xfs_collapse_file_space( | ||
1519 | struct xfs_inode *ip, | ||
1520 | xfs_off_t offset, | ||
1521 | xfs_off_t len) | ||
1522 | { | ||
1523 | int error; | ||
1524 | |||
1525 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
1526 | trace_xfs_collapse_file_space(ip); | ||
1527 | |||
1528 | error = xfs_free_file_space(ip, offset, len); | ||
1529 | if (error) | ||
1530 | return error; | ||
1531 | |||
1532 | return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT); | ||
1533 | } | ||
1534 | |||
1535 | /* | ||
1536 | * xfs_insert_file_space() | ||
1537 | * This routine create hole space by shifting extents for the given file. | ||
1538 | * The first thing we do is to sync dirty data and invalidate page cache | ||
1539 | * over the region on which insert range is working. And split an extent | ||
1540 | * to two extents at given offset by calling xfs_bmap_split_extent. | ||
1541 | * And shift all extent records which are laying between [offset, | ||
1542 | * last allocated extent] to the right to reserve hole range. | ||
1543 | * RETURNS: | ||
1544 | * 0 on success | ||
1545 | * errno on error | ||
1546 | */ | ||
1547 | int | ||
1548 | xfs_insert_file_space( | ||
1549 | struct xfs_inode *ip, | ||
1550 | loff_t offset, | ||
1551 | loff_t len) | ||
1552 | { | ||
1553 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
1554 | trace_xfs_insert_file_space(ip); | ||
1555 | |||
1556 | return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT); | ||
1557 | } | ||
1558 | |||
1559 | /* | ||
1499 | * We need to check that the format of the data fork in the temporary inode is | 1560 | * We need to check that the format of the data fork in the temporary inode is |
1500 | * valid for the target inode before doing the swap. This is not a problem with | 1561 | * valid for the target inode before doing the swap. This is not a problem with |
1501 | * attr1 because of the fixed fork offset, but attr2 has a dynamically sized | 1562 | * attr1 because of the fixed fork offset, but attr2 has a dynamically sized |
@@ -1599,13 +1660,6 @@ xfs_swap_extent_flush( | |||
1599 | /* Verify O_DIRECT for ftmp */ | 1660 | /* Verify O_DIRECT for ftmp */ |
1600 | if (VFS_I(ip)->i_mapping->nrpages) | 1661 | if (VFS_I(ip)->i_mapping->nrpages) |
1601 | return -EINVAL; | 1662 | return -EINVAL; |
1602 | |||
1603 | /* | ||
1604 | * Don't try to swap extents on mmap()d files because we can't lock | ||
1605 | * out races against page faults safely. | ||
1606 | */ | ||
1607 | if (mapping_mapped(VFS_I(ip)->i_mapping)) | ||
1608 | return -EBUSY; | ||
1609 | return 0; | 1663 | return 0; |
1610 | } | 1664 | } |
1611 | 1665 | ||
@@ -1633,13 +1687,14 @@ xfs_swap_extents( | |||
1633 | } | 1687 | } |
1634 | 1688 | ||
1635 | /* | 1689 | /* |
1636 | * Lock up the inodes against other IO and truncate to begin with. | 1690 | * Lock the inodes against other IO, page faults and truncate to |
1637 | * Then we can ensure the inodes are flushed and have no page cache | 1691 | * begin with. Then we can ensure the inodes are flushed and have no |
1638 | * safely. Once we have done this we can take the ilocks and do the rest | 1692 | * page cache safely. Once we have done this we can take the ilocks and |
1639 | * of the checks. | 1693 | * do the rest of the checks. |
1640 | */ | 1694 | */ |
1641 | lock_flags = XFS_IOLOCK_EXCL; | 1695 | lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; |
1642 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); | 1696 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); |
1697 | xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); | ||
1643 | 1698 | ||
1644 | /* Verify that both files have the same format */ | 1699 | /* Verify that both files have the same format */ |
1645 | if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { | 1700 | if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { |
@@ -1666,8 +1721,16 @@ xfs_swap_extents( | |||
1666 | xfs_trans_cancel(tp, 0); | 1721 | xfs_trans_cancel(tp, 0); |
1667 | goto out_unlock; | 1722 | goto out_unlock; |
1668 | } | 1723 | } |
1724 | |||
1725 | /* | ||
1726 | * Lock and join the inodes to the tansaction so that transaction commit | ||
1727 | * or cancel will unlock the inodes from this point onwards. | ||
1728 | */ | ||
1669 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | 1729 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); |
1670 | lock_flags |= XFS_ILOCK_EXCL; | 1730 | lock_flags |= XFS_ILOCK_EXCL; |
1731 | xfs_trans_ijoin(tp, ip, lock_flags); | ||
1732 | xfs_trans_ijoin(tp, tip, lock_flags); | ||
1733 | |||
1671 | 1734 | ||
1672 | /* Verify all data are being swapped */ | 1735 | /* Verify all data are being swapped */ |
1673 | if (sxp->sx_offset != 0 || | 1736 | if (sxp->sx_offset != 0 || |
@@ -1720,9 +1783,6 @@ xfs_swap_extents( | |||
1720 | goto out_trans_cancel; | 1783 | goto out_trans_cancel; |
1721 | } | 1784 | } |
1722 | 1785 | ||
1723 | xfs_trans_ijoin(tp, ip, lock_flags); | ||
1724 | xfs_trans_ijoin(tp, tip, lock_flags); | ||
1725 | |||
1726 | /* | 1786 | /* |
1727 | * Before we've swapped the forks, lets set the owners of the forks | 1787 | * Before we've swapped the forks, lets set the owners of the forks |
1728 | * appropriately. We have to do this as we are demand paging the btree | 1788 | * appropriately. We have to do this as we are demand paging the btree |
@@ -1856,5 +1916,5 @@ out_unlock: | |||
1856 | 1916 | ||
1857 | out_trans_cancel: | 1917 | out_trans_cancel: |
1858 | xfs_trans_cancel(tp, 0); | 1918 | xfs_trans_cancel(tp, 0); |
1859 | goto out_unlock; | 1919 | goto out; |
1860 | } | 1920 | } |
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 736429a72a12..af97d9a1dfb4 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h | |||
@@ -63,6 +63,8 @@ int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, | |||
63 | xfs_off_t len); | 63 | xfs_off_t len); |
64 | int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, | 64 | int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, |
65 | xfs_off_t len); | 65 | xfs_off_t len); |
66 | int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset, | ||
67 | xfs_off_t len); | ||
66 | 68 | ||
67 | /* EOF block manipulation functions */ | 69 | /* EOF block manipulation functions */ |
68 | bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); | 70 | bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 507d96a57ac7..092d652bc03d 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -537,9 +537,9 @@ xfs_buf_item_push( | |||
537 | 537 | ||
538 | /* has a previous flush failed due to IO errors? */ | 538 | /* has a previous flush failed due to IO errors? */ |
539 | if ((bp->b_flags & XBF_WRITE_FAIL) && | 539 | if ((bp->b_flags & XBF_WRITE_FAIL) && |
540 | ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { | 540 | ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) { |
541 | xfs_warn(bp->b_target->bt_mount, | 541 | xfs_warn(bp->b_target->bt_mount, |
542 | "Detected failing async write on buffer block 0x%llx. Retrying async write.", | 542 | "Failing async write on buffer block 0x%llx. Retrying async write.", |
543 | (long long)bp->b_bn); | 543 | (long long)bp->b_bn); |
544 | } | 544 | } |
545 | 545 | ||
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 799e5a2d334d..e85a9519a5ae 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c | |||
@@ -84,7 +84,7 @@ xfs_trim_extents( | |||
84 | error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); | 84 | error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); |
85 | if (error) | 85 | if (error) |
86 | goto out_del_cursor; | 86 | goto out_del_cursor; |
87 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); | 87 | XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_del_cursor); |
88 | ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); | 88 | ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); |
89 | 89 | ||
90 | /* | 90 | /* |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 3ee186ac1093..338e50bbfd1e 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -131,7 +131,7 @@ xfs_error_report( | |||
131 | { | 131 | { |
132 | if (level <= xfs_error_level) { | 132 | if (level <= xfs_error_level) { |
133 | xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, | 133 | xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, |
134 | "Internal error %s at line %d of file %s. Caller %pF", | 134 | "Internal error %s at line %d of file %s. Caller %pS", |
135 | tag, linenum, filename, ra); | 135 | tag, linenum, filename, ra); |
136 | 136 | ||
137 | xfs_stack_trace(); | 137 | xfs_stack_trace(); |
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 279a76e52791..c0394ed126fc 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h | |||
@@ -40,25 +40,25 @@ extern void xfs_verifier_error(struct xfs_buf *bp); | |||
40 | /* | 40 | /* |
41 | * Macros to set EFSCORRUPTED & return/branch. | 41 | * Macros to set EFSCORRUPTED & return/branch. |
42 | */ | 42 | */ |
43 | #define XFS_WANT_CORRUPTED_GOTO(x,l) \ | 43 | #define XFS_WANT_CORRUPTED_GOTO(mp, x, l) \ |
44 | { \ | 44 | { \ |
45 | int fs_is_ok = (x); \ | 45 | int fs_is_ok = (x); \ |
46 | ASSERT(fs_is_ok); \ | 46 | ASSERT(fs_is_ok); \ |
47 | if (unlikely(!fs_is_ok)) { \ | 47 | if (unlikely(!fs_is_ok)) { \ |
48 | XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \ | 48 | XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \ |
49 | XFS_ERRLEVEL_LOW, NULL); \ | 49 | XFS_ERRLEVEL_LOW, mp); \ |
50 | error = -EFSCORRUPTED; \ | 50 | error = -EFSCORRUPTED; \ |
51 | goto l; \ | 51 | goto l; \ |
52 | } \ | 52 | } \ |
53 | } | 53 | } |
54 | 54 | ||
55 | #define XFS_WANT_CORRUPTED_RETURN(x) \ | 55 | #define XFS_WANT_CORRUPTED_RETURN(mp, x) \ |
56 | { \ | 56 | { \ |
57 | int fs_is_ok = (x); \ | 57 | int fs_is_ok = (x); \ |
58 | ASSERT(fs_is_ok); \ | 58 | ASSERT(fs_is_ok); \ |
59 | if (unlikely(!fs_is_ok)) { \ | 59 | if (unlikely(!fs_is_ok)) { \ |
60 | XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \ | 60 | XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \ |
61 | XFS_ERRLEVEL_LOW, NULL); \ | 61 | XFS_ERRLEVEL_LOW, mp); \ |
62 | return -EFSCORRUPTED; \ | 62 | return -EFSCORRUPTED; \ |
63 | } \ | 63 | } \ |
64 | } | 64 | } |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1f12ad0a8585..8121e75352ee 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -559,7 +559,7 @@ restart: | |||
559 | if (error <= 0) | 559 | if (error <= 0) |
560 | return error; | 560 | return error; |
561 | 561 | ||
562 | error = xfs_break_layouts(inode, iolock); | 562 | error = xfs_break_layouts(inode, iolock, true); |
563 | if (error) | 563 | if (error) |
564 | return error; | 564 | return error; |
565 | 565 | ||
@@ -569,21 +569,42 @@ restart: | |||
569 | * write. If zeroing is needed and we are currently holding the | 569 | * write. If zeroing is needed and we are currently holding the |
570 | * iolock shared, we need to update it to exclusive which implies | 570 | * iolock shared, we need to update it to exclusive which implies |
571 | * having to redo all checks before. | 571 | * having to redo all checks before. |
572 | * | ||
573 | * We need to serialise against EOF updates that occur in IO | ||
574 | * completions here. We want to make sure that nobody is changing the | ||
575 | * size while we do this check until we have placed an IO barrier (i.e. | ||
576 | * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. | ||
577 | * The spinlock effectively forms a memory barrier once we have the | ||
578 | * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value | ||
579 | * and hence be able to correctly determine if we need to run zeroing. | ||
572 | */ | 580 | */ |
581 | spin_lock(&ip->i_flags_lock); | ||
573 | if (iocb->ki_pos > i_size_read(inode)) { | 582 | if (iocb->ki_pos > i_size_read(inode)) { |
574 | bool zero = false; | 583 | bool zero = false; |
575 | 584 | ||
585 | spin_unlock(&ip->i_flags_lock); | ||
576 | if (*iolock == XFS_IOLOCK_SHARED) { | 586 | if (*iolock == XFS_IOLOCK_SHARED) { |
577 | xfs_rw_iunlock(ip, *iolock); | 587 | xfs_rw_iunlock(ip, *iolock); |
578 | *iolock = XFS_IOLOCK_EXCL; | 588 | *iolock = XFS_IOLOCK_EXCL; |
579 | xfs_rw_ilock(ip, *iolock); | 589 | xfs_rw_ilock(ip, *iolock); |
580 | iov_iter_reexpand(from, count); | 590 | iov_iter_reexpand(from, count); |
591 | |||
592 | /* | ||
593 | * We now have an IO submission barrier in place, but | ||
594 | * AIO can do EOF updates during IO completion and hence | ||
595 | * we now need to wait for all of them to drain. Non-AIO | ||
596 | * DIO will have drained before we are given the | ||
597 | * XFS_IOLOCK_EXCL, and so for most cases this wait is a | ||
598 | * no-op. | ||
599 | */ | ||
600 | inode_dio_wait(inode); | ||
581 | goto restart; | 601 | goto restart; |
582 | } | 602 | } |
583 | error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); | 603 | error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); |
584 | if (error) | 604 | if (error) |
585 | return error; | 605 | return error; |
586 | } | 606 | } else |
607 | spin_unlock(&ip->i_flags_lock); | ||
587 | 608 | ||
588 | /* | 609 | /* |
589 | * Updating the timestamps will grab the ilock again from | 610 | * Updating the timestamps will grab the ilock again from |
@@ -645,6 +666,8 @@ xfs_file_dio_aio_write( | |||
645 | int iolock; | 666 | int iolock; |
646 | size_t count = iov_iter_count(from); | 667 | size_t count = iov_iter_count(from); |
647 | loff_t pos = iocb->ki_pos; | 668 | loff_t pos = iocb->ki_pos; |
669 | loff_t end; | ||
670 | struct iov_iter data; | ||
648 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | 671 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? |
649 | mp->m_rtdev_targp : mp->m_ddev_targp; | 672 | mp->m_rtdev_targp : mp->m_ddev_targp; |
650 | 673 | ||
@@ -685,10 +708,11 @@ xfs_file_dio_aio_write( | |||
685 | goto out; | 708 | goto out; |
686 | count = iov_iter_count(from); | 709 | count = iov_iter_count(from); |
687 | pos = iocb->ki_pos; | 710 | pos = iocb->ki_pos; |
711 | end = pos + count - 1; | ||
688 | 712 | ||
689 | if (mapping->nrpages) { | 713 | if (mapping->nrpages) { |
690 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | 714 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
691 | pos, pos + count - 1); | 715 | pos, end); |
692 | if (ret) | 716 | if (ret) |
693 | goto out; | 717 | goto out; |
694 | /* | 718 | /* |
@@ -698,7 +722,7 @@ xfs_file_dio_aio_write( | |||
698 | */ | 722 | */ |
699 | ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, | 723 | ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, |
700 | pos >> PAGE_CACHE_SHIFT, | 724 | pos >> PAGE_CACHE_SHIFT, |
701 | (pos + count - 1) >> PAGE_CACHE_SHIFT); | 725 | end >> PAGE_CACHE_SHIFT); |
702 | WARN_ON_ONCE(ret); | 726 | WARN_ON_ONCE(ret); |
703 | ret = 0; | 727 | ret = 0; |
704 | } | 728 | } |
@@ -715,8 +739,22 @@ xfs_file_dio_aio_write( | |||
715 | } | 739 | } |
716 | 740 | ||
717 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); | 741 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
718 | ret = generic_file_direct_write(iocb, from, pos); | ||
719 | 742 | ||
743 | data = *from; | ||
744 | ret = mapping->a_ops->direct_IO(iocb, &data, pos); | ||
745 | |||
746 | /* see generic_file_direct_write() for why this is necessary */ | ||
747 | if (mapping->nrpages) { | ||
748 | invalidate_inode_pages2_range(mapping, | ||
749 | pos >> PAGE_CACHE_SHIFT, | ||
750 | end >> PAGE_CACHE_SHIFT); | ||
751 | } | ||
752 | |||
753 | if (ret > 0) { | ||
754 | pos += ret; | ||
755 | iov_iter_advance(from, ret); | ||
756 | iocb->ki_pos = pos; | ||
757 | } | ||
720 | out: | 758 | out: |
721 | xfs_rw_iunlock(ip, iolock); | 759 | xfs_rw_iunlock(ip, iolock); |
722 | 760 | ||
@@ -822,6 +860,11 @@ xfs_file_write_iter( | |||
822 | return ret; | 860 | return ret; |
823 | } | 861 | } |
824 | 862 | ||
863 | #define XFS_FALLOC_FL_SUPPORTED \ | ||
864 | (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ | ||
865 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ | ||
866 | FALLOC_FL_INSERT_RANGE) | ||
867 | |||
825 | STATIC long | 868 | STATIC long |
826 | xfs_file_fallocate( | 869 | xfs_file_fallocate( |
827 | struct file *file, | 870 | struct file *file, |
@@ -835,18 +878,21 @@ xfs_file_fallocate( | |||
835 | enum xfs_prealloc_flags flags = 0; | 878 | enum xfs_prealloc_flags flags = 0; |
836 | uint iolock = XFS_IOLOCK_EXCL; | 879 | uint iolock = XFS_IOLOCK_EXCL; |
837 | loff_t new_size = 0; | 880 | loff_t new_size = 0; |
881 | bool do_file_insert = 0; | ||
838 | 882 | ||
839 | if (!S_ISREG(inode->i_mode)) | 883 | if (!S_ISREG(inode->i_mode)) |
840 | return -EINVAL; | 884 | return -EINVAL; |
841 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | | 885 | if (mode & ~XFS_FALLOC_FL_SUPPORTED) |
842 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) | ||
843 | return -EOPNOTSUPP; | 886 | return -EOPNOTSUPP; |
844 | 887 | ||
845 | xfs_ilock(ip, iolock); | 888 | xfs_ilock(ip, iolock); |
846 | error = xfs_break_layouts(inode, &iolock); | 889 | error = xfs_break_layouts(inode, &iolock, false); |
847 | if (error) | 890 | if (error) |
848 | goto out_unlock; | 891 | goto out_unlock; |
849 | 892 | ||
893 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); | ||
894 | iolock |= XFS_MMAPLOCK_EXCL; | ||
895 | |||
850 | if (mode & FALLOC_FL_PUNCH_HOLE) { | 896 | if (mode & FALLOC_FL_PUNCH_HOLE) { |
851 | error = xfs_free_file_space(ip, offset, len); | 897 | error = xfs_free_file_space(ip, offset, len); |
852 | if (error) | 898 | if (error) |
@@ -873,6 +919,27 @@ xfs_file_fallocate( | |||
873 | error = xfs_collapse_file_space(ip, offset, len); | 919 | error = xfs_collapse_file_space(ip, offset, len); |
874 | if (error) | 920 | if (error) |
875 | goto out_unlock; | 921 | goto out_unlock; |
922 | } else if (mode & FALLOC_FL_INSERT_RANGE) { | ||
923 | unsigned blksize_mask = (1 << inode->i_blkbits) - 1; | ||
924 | |||
925 | new_size = i_size_read(inode) + len; | ||
926 | if (offset & blksize_mask || len & blksize_mask) { | ||
927 | error = -EINVAL; | ||
928 | goto out_unlock; | ||
929 | } | ||
930 | |||
931 | /* check the new inode size does not wrap through zero */ | ||
932 | if (new_size > inode->i_sb->s_maxbytes) { | ||
933 | error = -EFBIG; | ||
934 | goto out_unlock; | ||
935 | } | ||
936 | |||
937 | /* Offset should be less than i_size */ | ||
938 | if (offset >= i_size_read(inode)) { | ||
939 | error = -EINVAL; | ||
940 | goto out_unlock; | ||
941 | } | ||
942 | do_file_insert = 1; | ||
876 | } else { | 943 | } else { |
877 | flags |= XFS_PREALLOC_SET; | 944 | flags |= XFS_PREALLOC_SET; |
878 | 945 | ||
@@ -907,8 +974,19 @@ xfs_file_fallocate( | |||
907 | iattr.ia_valid = ATTR_SIZE; | 974 | iattr.ia_valid = ATTR_SIZE; |
908 | iattr.ia_size = new_size; | 975 | iattr.ia_size = new_size; |
909 | error = xfs_setattr_size(ip, &iattr); | 976 | error = xfs_setattr_size(ip, &iattr); |
977 | if (error) | ||
978 | goto out_unlock; | ||
910 | } | 979 | } |
911 | 980 | ||
981 | /* | ||
982 | * Perform hole insertion now that the file size has been | ||
983 | * updated so that if we crash during the operation we don't | ||
984 | * leave shifted extents past EOF and hence losing access to | ||
985 | * the data that is contained within them. | ||
986 | */ | ||
987 | if (do_file_insert) | ||
988 | error = xfs_insert_file_space(ip, offset, len); | ||
989 | |||
912 | out_unlock: | 990 | out_unlock: |
913 | xfs_iunlock(ip, iolock); | 991 | xfs_iunlock(ip, iolock); |
914 | return error; | 992 | return error; |
@@ -997,20 +1075,6 @@ xfs_file_mmap( | |||
997 | } | 1075 | } |
998 | 1076 | ||
999 | /* | 1077 | /* |
1000 | * mmap()d file has taken write protection fault and is being made | ||
1001 | * writable. We can set the page state up correctly for a writable | ||
1002 | * page, which means we can do correct delalloc accounting (ENOSPC | ||
1003 | * checking!) and unwritten extent mapping. | ||
1004 | */ | ||
1005 | STATIC int | ||
1006 | xfs_vm_page_mkwrite( | ||
1007 | struct vm_area_struct *vma, | ||
1008 | struct vm_fault *vmf) | ||
1009 | { | ||
1010 | return block_page_mkwrite(vma, vmf, xfs_get_blocks); | ||
1011 | } | ||
1012 | |||
1013 | /* | ||
1014 | * This type is designed to indicate the type of offset we would like | 1078 | * This type is designed to indicate the type of offset we would like |
1015 | * to search from page cache for xfs_seek_hole_data(). | 1079 | * to search from page cache for xfs_seek_hole_data(). |
1016 | */ | 1080 | */ |
@@ -1385,6 +1449,55 @@ xfs_file_llseek( | |||
1385 | } | 1449 | } |
1386 | } | 1450 | } |
1387 | 1451 | ||
1452 | /* | ||
1453 | * Locking for serialisation of IO during page faults. This results in a lock | ||
1454 | * ordering of: | ||
1455 | * | ||
1456 | * mmap_sem (MM) | ||
1457 | * i_mmap_lock (XFS - truncate serialisation) | ||
1458 | * page_lock (MM) | ||
1459 | * i_lock (XFS - extent map serialisation) | ||
1460 | */ | ||
1461 | STATIC int | ||
1462 | xfs_filemap_fault( | ||
1463 | struct vm_area_struct *vma, | ||
1464 | struct vm_fault *vmf) | ||
1465 | { | ||
1466 | struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); | ||
1467 | int error; | ||
1468 | |||
1469 | trace_xfs_filemap_fault(ip); | ||
1470 | |||
1471 | xfs_ilock(ip, XFS_MMAPLOCK_SHARED); | ||
1472 | error = filemap_fault(vma, vmf); | ||
1473 | xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); | ||
1474 | |||
1475 | return error; | ||
1476 | } | ||
1477 | |||
1478 | /* | ||
1479 | * mmap()d file has taken write protection fault and is being made writable. We | ||
1480 | * can set the page state up correctly for a writable page, which means we can | ||
1481 | * do correct delalloc accounting (ENOSPC checking!) and unwritten extent | ||
1482 | * mapping. | ||
1483 | */ | ||
1484 | STATIC int | ||
1485 | xfs_filemap_page_mkwrite( | ||
1486 | struct vm_area_struct *vma, | ||
1487 | struct vm_fault *vmf) | ||
1488 | { | ||
1489 | struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); | ||
1490 | int error; | ||
1491 | |||
1492 | trace_xfs_filemap_page_mkwrite(ip); | ||
1493 | |||
1494 | xfs_ilock(ip, XFS_MMAPLOCK_SHARED); | ||
1495 | error = block_page_mkwrite(vma, vmf, xfs_get_blocks); | ||
1496 | xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); | ||
1497 | |||
1498 | return error; | ||
1499 | } | ||
1500 | |||
1388 | const struct file_operations xfs_file_operations = { | 1501 | const struct file_operations xfs_file_operations = { |
1389 | .llseek = xfs_file_llseek, | 1502 | .llseek = xfs_file_llseek, |
1390 | .read_iter = xfs_file_read_iter, | 1503 | .read_iter = xfs_file_read_iter, |
@@ -1415,7 +1528,7 @@ const struct file_operations xfs_dir_file_operations = { | |||
1415 | }; | 1528 | }; |
1416 | 1529 | ||
1417 | static const struct vm_operations_struct xfs_file_vm_ops = { | 1530 | static const struct vm_operations_struct xfs_file_vm_ops = { |
1418 | .fault = filemap_fault, | 1531 | .fault = xfs_filemap_fault, |
1419 | .map_pages = filemap_map_pages, | 1532 | .map_pages = filemap_map_pages, |
1420 | .page_mkwrite = xfs_vm_page_mkwrite, | 1533 | .page_mkwrite = xfs_filemap_page_mkwrite, |
1421 | }; | 1534 | }; |
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index a2e86e8a0fea..8f9f854376c6 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
@@ -322,7 +322,7 @@ xfs_filestream_lookup_ag( | |||
322 | 322 | ||
323 | pip = xfs_filestream_get_parent(ip); | 323 | pip = xfs_filestream_get_parent(ip); |
324 | if (!pip) | 324 | if (!pip) |
325 | goto out; | 325 | return NULLAGNUMBER; |
326 | 326 | ||
327 | mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); | 327 | mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); |
328 | if (mru) { | 328 | if (mru) { |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 74efe5b760dc..cb7e8a29dfb6 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -637,12 +637,13 @@ xfs_fs_counts( | |||
637 | xfs_mount_t *mp, | 637 | xfs_mount_t *mp, |
638 | xfs_fsop_counts_t *cnt) | 638 | xfs_fsop_counts_t *cnt) |
639 | { | 639 | { |
640 | xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); | 640 | cnt->allocino = percpu_counter_read_positive(&mp->m_icount); |
641 | cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); | ||
642 | cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - | ||
643 | XFS_ALLOC_SET_ASIDE(mp); | ||
644 | |||
641 | spin_lock(&mp->m_sb_lock); | 645 | spin_lock(&mp->m_sb_lock); |
642 | cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); | ||
643 | cnt->freertx = mp->m_sb.sb_frextents; | 646 | cnt->freertx = mp->m_sb.sb_frextents; |
644 | cnt->freeino = mp->m_sb.sb_ifree; | ||
645 | cnt->allocino = mp->m_sb.sb_icount; | ||
646 | spin_unlock(&mp->m_sb_lock); | 647 | spin_unlock(&mp->m_sb_lock); |
647 | return 0; | 648 | return 0; |
648 | } | 649 | } |
@@ -692,14 +693,9 @@ xfs_reserve_blocks( | |||
692 | * what to do. This means that the amount of free space can | 693 | * what to do. This means that the amount of free space can |
693 | * change while we do this, so we need to retry if we end up | 694 | * change while we do this, so we need to retry if we end up |
694 | * trying to reserve more space than is available. | 695 | * trying to reserve more space than is available. |
695 | * | ||
696 | * We also use the xfs_mod_incore_sb() interface so that we | ||
697 | * don't have to care about whether per cpu counter are | ||
698 | * enabled, disabled or even compiled in.... | ||
699 | */ | 696 | */ |
700 | retry: | 697 | retry: |
701 | spin_lock(&mp->m_sb_lock); | 698 | spin_lock(&mp->m_sb_lock); |
702 | xfs_icsb_sync_counters_locked(mp, 0); | ||
703 | 699 | ||
704 | /* | 700 | /* |
705 | * If our previous reservation was larger than the current value, | 701 | * If our previous reservation was larger than the current value, |
@@ -716,7 +712,8 @@ retry: | |||
716 | } else { | 712 | } else { |
717 | __int64_t free; | 713 | __int64_t free; |
718 | 714 | ||
719 | free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); | 715 | free = percpu_counter_sum(&mp->m_fdblocks) - |
716 | XFS_ALLOC_SET_ASIDE(mp); | ||
720 | if (!free) | 717 | if (!free) |
721 | goto out; /* ENOSPC and fdblks_delta = 0 */ | 718 | goto out; /* ENOSPC and fdblks_delta = 0 */ |
722 | 719 | ||
@@ -755,8 +752,7 @@ out: | |||
755 | * the extra reserve blocks from the reserve..... | 752 | * the extra reserve blocks from the reserve..... |
756 | */ | 753 | */ |
757 | int error; | 754 | int error; |
758 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | 755 | error = xfs_mod_fdblocks(mp, fdblks_delta, 0); |
759 | fdblks_delta, 0); | ||
760 | if (error == -ENOSPC) | 756 | if (error == -ENOSPC) |
761 | goto retry; | 757 | goto retry; |
762 | } | 758 | } |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 9771b7ef62ed..76a9f2783282 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -439,11 +439,11 @@ again: | |||
439 | *ipp = ip; | 439 | *ipp = ip; |
440 | 440 | ||
441 | /* | 441 | /* |
442 | * If we have a real type for an on-disk inode, we can set ops(&unlock) | 442 | * If we have a real type for an on-disk inode, we can setup the inode |
443 | * now. If it's a new inode being created, xfs_ialloc will handle it. | 443 | * now. If it's a new inode being created, xfs_ialloc will handle it. |
444 | */ | 444 | */ |
445 | if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) | 445 | if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) |
446 | xfs_setup_inode(ip); | 446 | xfs_setup_existing_inode(ip); |
447 | return 0; | 447 | return 0; |
448 | 448 | ||
449 | out_error_or_again: | 449 | out_error_or_again: |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 6163767aa856..d6ebc85192b7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared( | |||
117 | } | 117 | } |
118 | 118 | ||
119 | /* | 119 | /* |
120 | * The xfs inode contains 2 locks: a multi-reader lock called the | 120 | * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and |
121 | * i_iolock and a multi-reader lock called the i_lock. This routine | 121 | * the i_lock. This routine allows various combinations of the locks to be |
122 | * allows either or both of the locks to be obtained. | 122 | * obtained. |
123 | * | 123 | * |
124 | * The 2 locks should always be ordered so that the IO lock is | 124 | * The 3 locks should always be ordered so that the IO lock is obtained first, |
125 | * obtained first in order to prevent deadlock. | 125 | * the mmap lock second and the ilock last in order to prevent deadlock. |
126 | * | 126 | * |
127 | * ip -- the inode being locked | 127 | * Basic locking order: |
128 | * lock_flags -- this parameter indicates the inode's locks | 128 | * |
129 | * to be locked. It can be: | 129 | * i_iolock -> i_mmap_lock -> page_lock -> i_ilock |
130 | * XFS_IOLOCK_SHARED, | 130 | * |
131 | * XFS_IOLOCK_EXCL, | 131 | * mmap_sem locking order: |
132 | * XFS_ILOCK_SHARED, | 132 | * |
133 | * XFS_ILOCK_EXCL, | 133 | * i_iolock -> page lock -> mmap_sem |
134 | * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, | 134 | * mmap_sem -> i_mmap_lock -> page_lock |
135 | * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, | 135 | * |
136 | * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, | 136 | * The difference in mmap_sem locking order mean that we cannot hold the |
137 | * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | 137 | * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can |
138 | * fault in pages during copy in/out (for buffered IO) or require the mmap_sem | ||
139 | * in get_user_pages() to map the user pages into the kernel address space for | ||
140 | * direct IO. Similarly the i_iolock cannot be taken inside a page fault because | ||
141 | * page faults already hold the mmap_sem. | ||
142 | * | ||
143 | * Hence to serialise fully against both syscall and mmap based IO, we need to | ||
144 | * take both the i_iolock and the i_mmap_lock. These locks should *only* be both | ||
145 | * taken in places where we need to invalidate the page cache in a race | ||
146 | * free manner (e.g. truncate, hole punch and other extent manipulation | ||
147 | * functions). | ||
138 | */ | 148 | */ |
139 | void | 149 | void |
140 | xfs_ilock( | 150 | xfs_ilock( |
@@ -150,6 +160,8 @@ xfs_ilock( | |||
150 | */ | 160 | */ |
151 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | 161 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != |
152 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | 162 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); |
163 | ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != | ||
164 | (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); | ||
153 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | 165 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != |
154 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | 166 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); |
155 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); | 167 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); |
@@ -159,6 +171,11 @@ xfs_ilock( | |||
159 | else if (lock_flags & XFS_IOLOCK_SHARED) | 171 | else if (lock_flags & XFS_IOLOCK_SHARED) |
160 | mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); | 172 | mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); |
161 | 173 | ||
174 | if (lock_flags & XFS_MMAPLOCK_EXCL) | ||
175 | mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); | ||
176 | else if (lock_flags & XFS_MMAPLOCK_SHARED) | ||
177 | mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); | ||
178 | |||
162 | if (lock_flags & XFS_ILOCK_EXCL) | 179 | if (lock_flags & XFS_ILOCK_EXCL) |
163 | mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); | 180 | mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); |
164 | else if (lock_flags & XFS_ILOCK_SHARED) | 181 | else if (lock_flags & XFS_ILOCK_SHARED) |
@@ -191,6 +208,8 @@ xfs_ilock_nowait( | |||
191 | */ | 208 | */ |
192 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | 209 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != |
193 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | 210 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); |
211 | ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != | ||
212 | (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); | ||
194 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | 213 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != |
195 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | 214 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); |
196 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); | 215 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); |
@@ -202,21 +221,35 @@ xfs_ilock_nowait( | |||
202 | if (!mrtryaccess(&ip->i_iolock)) | 221 | if (!mrtryaccess(&ip->i_iolock)) |
203 | goto out; | 222 | goto out; |
204 | } | 223 | } |
224 | |||
225 | if (lock_flags & XFS_MMAPLOCK_EXCL) { | ||
226 | if (!mrtryupdate(&ip->i_mmaplock)) | ||
227 | goto out_undo_iolock; | ||
228 | } else if (lock_flags & XFS_MMAPLOCK_SHARED) { | ||
229 | if (!mrtryaccess(&ip->i_mmaplock)) | ||
230 | goto out_undo_iolock; | ||
231 | } | ||
232 | |||
205 | if (lock_flags & XFS_ILOCK_EXCL) { | 233 | if (lock_flags & XFS_ILOCK_EXCL) { |
206 | if (!mrtryupdate(&ip->i_lock)) | 234 | if (!mrtryupdate(&ip->i_lock)) |
207 | goto out_undo_iolock; | 235 | goto out_undo_mmaplock; |
208 | } else if (lock_flags & XFS_ILOCK_SHARED) { | 236 | } else if (lock_flags & XFS_ILOCK_SHARED) { |
209 | if (!mrtryaccess(&ip->i_lock)) | 237 | if (!mrtryaccess(&ip->i_lock)) |
210 | goto out_undo_iolock; | 238 | goto out_undo_mmaplock; |
211 | } | 239 | } |
212 | return 1; | 240 | return 1; |
213 | 241 | ||
214 | out_undo_iolock: | 242 | out_undo_mmaplock: |
243 | if (lock_flags & XFS_MMAPLOCK_EXCL) | ||
244 | mrunlock_excl(&ip->i_mmaplock); | ||
245 | else if (lock_flags & XFS_MMAPLOCK_SHARED) | ||
246 | mrunlock_shared(&ip->i_mmaplock); | ||
247 | out_undo_iolock: | ||
215 | if (lock_flags & XFS_IOLOCK_EXCL) | 248 | if (lock_flags & XFS_IOLOCK_EXCL) |
216 | mrunlock_excl(&ip->i_iolock); | 249 | mrunlock_excl(&ip->i_iolock); |
217 | else if (lock_flags & XFS_IOLOCK_SHARED) | 250 | else if (lock_flags & XFS_IOLOCK_SHARED) |
218 | mrunlock_shared(&ip->i_iolock); | 251 | mrunlock_shared(&ip->i_iolock); |
219 | out: | 252 | out: |
220 | return 0; | 253 | return 0; |
221 | } | 254 | } |
222 | 255 | ||
@@ -244,6 +277,8 @@ xfs_iunlock( | |||
244 | */ | 277 | */ |
245 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | 278 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != |
246 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | 279 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); |
280 | ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != | ||
281 | (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); | ||
247 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | 282 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != |
248 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | 283 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); |
249 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); | 284 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); |
@@ -254,6 +289,11 @@ xfs_iunlock( | |||
254 | else if (lock_flags & XFS_IOLOCK_SHARED) | 289 | else if (lock_flags & XFS_IOLOCK_SHARED) |
255 | mrunlock_shared(&ip->i_iolock); | 290 | mrunlock_shared(&ip->i_iolock); |
256 | 291 | ||
292 | if (lock_flags & XFS_MMAPLOCK_EXCL) | ||
293 | mrunlock_excl(&ip->i_mmaplock); | ||
294 | else if (lock_flags & XFS_MMAPLOCK_SHARED) | ||
295 | mrunlock_shared(&ip->i_mmaplock); | ||
296 | |||
257 | if (lock_flags & XFS_ILOCK_EXCL) | 297 | if (lock_flags & XFS_ILOCK_EXCL) |
258 | mrunlock_excl(&ip->i_lock); | 298 | mrunlock_excl(&ip->i_lock); |
259 | else if (lock_flags & XFS_ILOCK_SHARED) | 299 | else if (lock_flags & XFS_ILOCK_SHARED) |
@@ -271,11 +311,14 @@ xfs_ilock_demote( | |||
271 | xfs_inode_t *ip, | 311 | xfs_inode_t *ip, |
272 | uint lock_flags) | 312 | uint lock_flags) |
273 | { | 313 | { |
274 | ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); | 314 | ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)); |
275 | ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); | 315 | ASSERT((lock_flags & |
316 | ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); | ||
276 | 317 | ||
277 | if (lock_flags & XFS_ILOCK_EXCL) | 318 | if (lock_flags & XFS_ILOCK_EXCL) |
278 | mrdemote(&ip->i_lock); | 319 | mrdemote(&ip->i_lock); |
320 | if (lock_flags & XFS_MMAPLOCK_EXCL) | ||
321 | mrdemote(&ip->i_mmaplock); | ||
279 | if (lock_flags & XFS_IOLOCK_EXCL) | 322 | if (lock_flags & XFS_IOLOCK_EXCL) |
280 | mrdemote(&ip->i_iolock); | 323 | mrdemote(&ip->i_iolock); |
281 | 324 | ||
@@ -294,6 +337,12 @@ xfs_isilocked( | |||
294 | return rwsem_is_locked(&ip->i_lock.mr_lock); | 337 | return rwsem_is_locked(&ip->i_lock.mr_lock); |
295 | } | 338 | } |
296 | 339 | ||
340 | if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) { | ||
341 | if (!(lock_flags & XFS_MMAPLOCK_SHARED)) | ||
342 | return !!ip->i_mmaplock.mr_writer; | ||
343 | return rwsem_is_locked(&ip->i_mmaplock.mr_lock); | ||
344 | } | ||
345 | |||
297 | if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { | 346 | if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { |
298 | if (!(lock_flags & XFS_IOLOCK_SHARED)) | 347 | if (!(lock_flags & XFS_IOLOCK_SHARED)) |
299 | return !!ip->i_iolock.mr_writer; | 348 | return !!ip->i_iolock.mr_writer; |
@@ -314,14 +363,27 @@ int xfs_lock_delays; | |||
314 | #endif | 363 | #endif |
315 | 364 | ||
316 | /* | 365 | /* |
317 | * Bump the subclass so xfs_lock_inodes() acquires each lock with | 366 | * Bump the subclass so xfs_lock_inodes() acquires each lock with a different |
318 | * a different value | 367 | * value. This shouldn't be called for page fault locking, but we also need to |
368 | * ensure we don't overrun the number of lockdep subclasses for the iolock or | ||
369 | * mmaplock as that is limited to 12 by the mmap lock lockdep annotations. | ||
319 | */ | 370 | */ |
320 | static inline int | 371 | static inline int |
321 | xfs_lock_inumorder(int lock_mode, int subclass) | 372 | xfs_lock_inumorder(int lock_mode, int subclass) |
322 | { | 373 | { |
323 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) | 374 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { |
375 | ASSERT(subclass + XFS_LOCK_INUMORDER < | ||
376 | (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT))); | ||
324 | lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; | 377 | lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; |
378 | } | ||
379 | |||
380 | if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { | ||
381 | ASSERT(subclass + XFS_LOCK_INUMORDER < | ||
382 | (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT))); | ||
383 | lock_mode |= (subclass + XFS_LOCK_INUMORDER) << | ||
384 | XFS_MMAPLOCK_SHIFT; | ||
385 | } | ||
386 | |||
325 | if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) | 387 | if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) |
326 | lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; | 388 | lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; |
327 | 389 | ||
@@ -329,15 +391,14 @@ xfs_lock_inumorder(int lock_mode, int subclass) | |||
329 | } | 391 | } |
330 | 392 | ||
331 | /* | 393 | /* |
332 | * The following routine will lock n inodes in exclusive mode. | 394 | * The following routine will lock n inodes in exclusive mode. We assume the |
333 | * We assume the caller calls us with the inodes in i_ino order. | 395 | * caller calls us with the inodes in i_ino order. |
334 | * | 396 | * |
335 | * We need to detect deadlock where an inode that we lock | 397 | * We need to detect deadlock where an inode that we lock is in the AIL and we |
336 | * is in the AIL and we start waiting for another inode that is locked | 398 | * start waiting for another inode that is locked by a thread in a long running |
337 | * by a thread in a long running transaction (such as truncate). This can | 399 | * transaction (such as truncate). This can result in deadlock since the long |
338 | * result in deadlock since the long running trans might need to wait | 400 | * running trans might need to wait for the inode we just locked in order to |
339 | * for the inode we just locked in order to push the tail and free space | 401 | * push the tail and free space in the log. |
340 | * in the log. | ||
341 | */ | 402 | */ |
342 | void | 403 | void |
343 | xfs_lock_inodes( | 404 | xfs_lock_inodes( |
@@ -348,30 +409,27 @@ xfs_lock_inodes( | |||
348 | int attempts = 0, i, j, try_lock; | 409 | int attempts = 0, i, j, try_lock; |
349 | xfs_log_item_t *lp; | 410 | xfs_log_item_t *lp; |
350 | 411 | ||
351 | ASSERT(ips && (inodes >= 2)); /* we need at least two */ | 412 | /* currently supports between 2 and 5 inodes */ |
413 | ASSERT(ips && inodes >= 2 && inodes <= 5); | ||
352 | 414 | ||
353 | try_lock = 0; | 415 | try_lock = 0; |
354 | i = 0; | 416 | i = 0; |
355 | |||
356 | again: | 417 | again: |
357 | for (; i < inodes; i++) { | 418 | for (; i < inodes; i++) { |
358 | ASSERT(ips[i]); | 419 | ASSERT(ips[i]); |
359 | 420 | ||
360 | if (i && (ips[i] == ips[i-1])) /* Already locked */ | 421 | if (i && (ips[i] == ips[i - 1])) /* Already locked */ |
361 | continue; | 422 | continue; |
362 | 423 | ||
363 | /* | 424 | /* |
364 | * If try_lock is not set yet, make sure all locked inodes | 425 | * If try_lock is not set yet, make sure all locked inodes are |
365 | * are not in the AIL. | 426 | * not in the AIL. If any are, set try_lock to be used later. |
366 | * If any are, set try_lock to be used later. | ||
367 | */ | 427 | */ |
368 | |||
369 | if (!try_lock) { | 428 | if (!try_lock) { |
370 | for (j = (i - 1); j >= 0 && !try_lock; j--) { | 429 | for (j = (i - 1); j >= 0 && !try_lock; j--) { |
371 | lp = (xfs_log_item_t *)ips[j]->i_itemp; | 430 | lp = (xfs_log_item_t *)ips[j]->i_itemp; |
372 | if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { | 431 | if (lp && (lp->li_flags & XFS_LI_IN_AIL)) |
373 | try_lock++; | 432 | try_lock++; |
374 | } | ||
375 | } | 433 | } |
376 | } | 434 | } |
377 | 435 | ||
@@ -381,51 +439,42 @@ again: | |||
381 | * we can't get any, we must release all we have | 439 | * we can't get any, we must release all we have |
382 | * and try again. | 440 | * and try again. |
383 | */ | 441 | */ |
442 | if (!try_lock) { | ||
443 | xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); | ||
444 | continue; | ||
445 | } | ||
446 | |||
447 | /* try_lock means we have an inode locked that is in the AIL. */ | ||
448 | ASSERT(i != 0); | ||
449 | if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) | ||
450 | continue; | ||
384 | 451 | ||
385 | if (try_lock) { | 452 | /* |
386 | /* try_lock must be 0 if i is 0. */ | 453 | * Unlock all previous guys and try again. xfs_iunlock will try |
454 | * to push the tail if the inode is in the AIL. | ||
455 | */ | ||
456 | attempts++; | ||
457 | for (j = i - 1; j >= 0; j--) { | ||
387 | /* | 458 | /* |
388 | * try_lock means we have an inode locked | 459 | * Check to see if we've already unlocked this one. Not |
389 | * that is in the AIL. | 460 | * the first one going back, and the inode ptr is the |
461 | * same. | ||
390 | */ | 462 | */ |
391 | ASSERT(i != 0); | 463 | if (j != (i - 1) && ips[j] == ips[j + 1]) |
392 | if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { | 464 | continue; |
393 | attempts++; | 465 | |
394 | 466 | xfs_iunlock(ips[j], lock_mode); | |
395 | /* | 467 | } |
396 | * Unlock all previous guys and try again. | ||
397 | * xfs_iunlock will try to push the tail | ||
398 | * if the inode is in the AIL. | ||
399 | */ | ||
400 | |||
401 | for(j = i - 1; j >= 0; j--) { | ||
402 | |||
403 | /* | ||
404 | * Check to see if we've already | ||
405 | * unlocked this one. | ||
406 | * Not the first one going back, | ||
407 | * and the inode ptr is the same. | ||
408 | */ | ||
409 | if ((j != (i - 1)) && ips[j] == | ||
410 | ips[j+1]) | ||
411 | continue; | ||
412 | |||
413 | xfs_iunlock(ips[j], lock_mode); | ||
414 | } | ||
415 | 468 | ||
416 | if ((attempts % 5) == 0) { | 469 | if ((attempts % 5) == 0) { |
417 | delay(1); /* Don't just spin the CPU */ | 470 | delay(1); /* Don't just spin the CPU */ |
418 | #ifdef DEBUG | 471 | #ifdef DEBUG |
419 | xfs_lock_delays++; | 472 | xfs_lock_delays++; |
420 | #endif | 473 | #endif |
421 | } | ||
422 | i = 0; | ||
423 | try_lock = 0; | ||
424 | goto again; | ||
425 | } | ||
426 | } else { | ||
427 | xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); | ||
428 | } | 474 | } |
475 | i = 0; | ||
476 | try_lock = 0; | ||
477 | goto again; | ||
429 | } | 478 | } |
430 | 479 | ||
431 | #ifdef DEBUG | 480 | #ifdef DEBUG |
@@ -440,10 +489,10 @@ again: | |||
440 | } | 489 | } |
441 | 490 | ||
442 | /* | 491 | /* |
443 | * xfs_lock_two_inodes() can only be used to lock one type of lock | 492 | * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - |
444 | * at a time - the iolock or the ilock, but not both at once. If | 493 | * the iolock, the mmaplock or the ilock, but not more than one at a time. If we |
445 | * we lock both at once, lockdep will report false positives saying | 494 | * lock more than one at a time, lockdep will report false positives saying we |
446 | * we have violated locking orders. | 495 | * have violated locking orders. |
447 | */ | 496 | */ |
448 | void | 497 | void |
449 | xfs_lock_two_inodes( | 498 | xfs_lock_two_inodes( |
@@ -455,8 +504,12 @@ xfs_lock_two_inodes( | |||
455 | int attempts = 0; | 504 | int attempts = 0; |
456 | xfs_log_item_t *lp; | 505 | xfs_log_item_t *lp; |
457 | 506 | ||
458 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) | 507 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { |
459 | ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); | 508 | ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); |
509 | ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); | ||
510 | } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) | ||
511 | ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); | ||
512 | |||
460 | ASSERT(ip0->i_ino != ip1->i_ino); | 513 | ASSERT(ip0->i_ino != ip1->i_ino); |
461 | 514 | ||
462 | if (ip0->i_ino > ip1->i_ino) { | 515 | if (ip0->i_ino > ip1->i_ino) { |
@@ -818,7 +871,7 @@ xfs_ialloc( | |||
818 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 871 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
819 | xfs_trans_log_inode(tp, ip, flags); | 872 | xfs_trans_log_inode(tp, ip, flags); |
820 | 873 | ||
821 | /* now that we have an i_mode we can setup inode ops and unlock */ | 874 | /* now that we have an i_mode we can setup the inode structure */ |
822 | xfs_setup_inode(ip); | 875 | xfs_setup_inode(ip); |
823 | 876 | ||
824 | *ipp = ip; | 877 | *ipp = ip; |
@@ -1235,12 +1288,14 @@ xfs_create( | |||
1235 | xfs_trans_cancel(tp, cancel_flags); | 1288 | xfs_trans_cancel(tp, cancel_flags); |
1236 | out_release_inode: | 1289 | out_release_inode: |
1237 | /* | 1290 | /* |
1238 | * Wait until after the current transaction is aborted to | 1291 | * Wait until after the current transaction is aborted to finish the |
1239 | * release the inode. This prevents recursive transactions | 1292 | * setup of the inode and release the inode. This prevents recursive |
1240 | * and deadlocks from xfs_inactive. | 1293 | * transactions and deadlocks from xfs_inactive. |
1241 | */ | 1294 | */ |
1242 | if (ip) | 1295 | if (ip) { |
1296 | xfs_finish_inode_setup(ip); | ||
1243 | IRELE(ip); | 1297 | IRELE(ip); |
1298 | } | ||
1244 | 1299 | ||
1245 | xfs_qm_dqrele(udqp); | 1300 | xfs_qm_dqrele(udqp); |
1246 | xfs_qm_dqrele(gdqp); | 1301 | xfs_qm_dqrele(gdqp); |
@@ -1345,12 +1400,14 @@ xfs_create_tmpfile( | |||
1345 | xfs_trans_cancel(tp, cancel_flags); | 1400 | xfs_trans_cancel(tp, cancel_flags); |
1346 | out_release_inode: | 1401 | out_release_inode: |
1347 | /* | 1402 | /* |
1348 | * Wait until after the current transaction is aborted to | 1403 | * Wait until after the current transaction is aborted to finish the |
1349 | * release the inode. This prevents recursive transactions | 1404 | * setup of the inode and release the inode. This prevents recursive |
1350 | * and deadlocks from xfs_inactive. | 1405 | * transactions and deadlocks from xfs_inactive. |
1351 | */ | 1406 | */ |
1352 | if (ip) | 1407 | if (ip) { |
1408 | xfs_finish_inode_setup(ip); | ||
1353 | IRELE(ip); | 1409 | IRELE(ip); |
1410 | } | ||
1354 | 1411 | ||
1355 | xfs_qm_dqrele(udqp); | 1412 | xfs_qm_dqrele(udqp); |
1356 | xfs_qm_dqrele(gdqp); | 1413 | xfs_qm_dqrele(gdqp); |
@@ -2611,19 +2668,22 @@ xfs_remove( | |||
2611 | /* | 2668 | /* |
2612 | * Enter all inodes for a rename transaction into a sorted array. | 2669 | * Enter all inodes for a rename transaction into a sorted array. |
2613 | */ | 2670 | */ |
2671 | #define __XFS_SORT_INODES 5 | ||
2614 | STATIC void | 2672 | STATIC void |
2615 | xfs_sort_for_rename( | 2673 | xfs_sort_for_rename( |
2616 | xfs_inode_t *dp1, /* in: old (source) directory inode */ | 2674 | struct xfs_inode *dp1, /* in: old (source) directory inode */ |
2617 | xfs_inode_t *dp2, /* in: new (target) directory inode */ | 2675 | struct xfs_inode *dp2, /* in: new (target) directory inode */ |
2618 | xfs_inode_t *ip1, /* in: inode of old entry */ | 2676 | struct xfs_inode *ip1, /* in: inode of old entry */ |
2619 | xfs_inode_t *ip2, /* in: inode of new entry, if it | 2677 | struct xfs_inode *ip2, /* in: inode of new entry */ |
2620 | already exists, NULL otherwise. */ | 2678 | struct xfs_inode *wip, /* in: whiteout inode */ |
2621 | xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ | 2679 | struct xfs_inode **i_tab,/* out: sorted array of inodes */ |
2622 | int *num_inodes) /* out: number of inodes in array */ | 2680 | int *num_inodes) /* in/out: inodes in array */ |
2623 | { | 2681 | { |
2624 | xfs_inode_t *temp; | ||
2625 | int i, j; | 2682 | int i, j; |
2626 | 2683 | ||
2684 | ASSERT(*num_inodes == __XFS_SORT_INODES); | ||
2685 | memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *)); | ||
2686 | |||
2627 | /* | 2687 | /* |
2628 | * i_tab contains a list of pointers to inodes. We initialize | 2688 | * i_tab contains a list of pointers to inodes. We initialize |
2629 | * the table here & we'll sort it. We will then use it to | 2689 | * the table here & we'll sort it. We will then use it to |
@@ -2631,25 +2691,24 @@ xfs_sort_for_rename( | |||
2631 | * | 2691 | * |
2632 | * Note that the table may contain duplicates. e.g., dp1 == dp2. | 2692 | * Note that the table may contain duplicates. e.g., dp1 == dp2. |
2633 | */ | 2693 | */ |
2634 | i_tab[0] = dp1; | 2694 | i = 0; |
2635 | i_tab[1] = dp2; | 2695 | i_tab[i++] = dp1; |
2636 | i_tab[2] = ip1; | 2696 | i_tab[i++] = dp2; |
2637 | if (ip2) { | 2697 | i_tab[i++] = ip1; |
2638 | *num_inodes = 4; | 2698 | if (ip2) |
2639 | i_tab[3] = ip2; | 2699 | i_tab[i++] = ip2; |
2640 | } else { | 2700 | if (wip) |
2641 | *num_inodes = 3; | 2701 | i_tab[i++] = wip; |
2642 | i_tab[3] = NULL; | 2702 | *num_inodes = i; |
2643 | } | ||
2644 | 2703 | ||
2645 | /* | 2704 | /* |
2646 | * Sort the elements via bubble sort. (Remember, there are at | 2705 | * Sort the elements via bubble sort. (Remember, there are at |
2647 | * most 4 elements to sort, so this is adequate.) | 2706 | * most 5 elements to sort, so this is adequate.) |
2648 | */ | 2707 | */ |
2649 | for (i = 0; i < *num_inodes; i++) { | 2708 | for (i = 0; i < *num_inodes; i++) { |
2650 | for (j = 1; j < *num_inodes; j++) { | 2709 | for (j = 1; j < *num_inodes; j++) { |
2651 | if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { | 2710 | if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { |
2652 | temp = i_tab[j]; | 2711 | struct xfs_inode *temp = i_tab[j]; |
2653 | i_tab[j] = i_tab[j-1]; | 2712 | i_tab[j] = i_tab[j-1]; |
2654 | i_tab[j-1] = temp; | 2713 | i_tab[j-1] = temp; |
2655 | } | 2714 | } |
@@ -2657,6 +2716,31 @@ xfs_sort_for_rename( | |||
2657 | } | 2716 | } |
2658 | } | 2717 | } |
2659 | 2718 | ||
2719 | static int | ||
2720 | xfs_finish_rename( | ||
2721 | struct xfs_trans *tp, | ||
2722 | struct xfs_bmap_free *free_list) | ||
2723 | { | ||
2724 | int committed = 0; | ||
2725 | int error; | ||
2726 | |||
2727 | /* | ||
2728 | * If this is a synchronous mount, make sure that the rename transaction | ||
2729 | * goes to disk before returning to the user. | ||
2730 | */ | ||
2731 | if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) | ||
2732 | xfs_trans_set_sync(tp); | ||
2733 | |||
2734 | error = xfs_bmap_finish(&tp, free_list, &committed); | ||
2735 | if (error) { | ||
2736 | xfs_bmap_cancel(free_list); | ||
2737 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); | ||
2738 | return error; | ||
2739 | } | ||
2740 | |||
2741 | return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
2742 | } | ||
2743 | |||
2660 | /* | 2744 | /* |
2661 | * xfs_cross_rename() | 2745 | * xfs_cross_rename() |
2662 | * | 2746 | * |
@@ -2685,14 +2769,14 @@ xfs_cross_rename( | |||
2685 | ip2->i_ino, | 2769 | ip2->i_ino, |
2686 | first_block, free_list, spaceres); | 2770 | first_block, free_list, spaceres); |
2687 | if (error) | 2771 | if (error) |
2688 | goto out; | 2772 | goto out_trans_abort; |
2689 | 2773 | ||
2690 | /* Swap inode number for dirent in second parent */ | 2774 | /* Swap inode number for dirent in second parent */ |
2691 | error = xfs_dir_replace(tp, dp2, name2, | 2775 | error = xfs_dir_replace(tp, dp2, name2, |
2692 | ip1->i_ino, | 2776 | ip1->i_ino, |
2693 | first_block, free_list, spaceres); | 2777 | first_block, free_list, spaceres); |
2694 | if (error) | 2778 | if (error) |
2695 | goto out; | 2779 | goto out_trans_abort; |
2696 | 2780 | ||
2697 | /* | 2781 | /* |
2698 | * If we're renaming one or more directories across different parents, | 2782 | * If we're renaming one or more directories across different parents, |
@@ -2707,16 +2791,16 @@ xfs_cross_rename( | |||
2707 | dp1->i_ino, first_block, | 2791 | dp1->i_ino, first_block, |
2708 | free_list, spaceres); | 2792 | free_list, spaceres); |
2709 | if (error) | 2793 | if (error) |
2710 | goto out; | 2794 | goto out_trans_abort; |
2711 | 2795 | ||
2712 | /* transfer ip2 ".." reference to dp1 */ | 2796 | /* transfer ip2 ".." reference to dp1 */ |
2713 | if (!S_ISDIR(ip1->i_d.di_mode)) { | 2797 | if (!S_ISDIR(ip1->i_d.di_mode)) { |
2714 | error = xfs_droplink(tp, dp2); | 2798 | error = xfs_droplink(tp, dp2); |
2715 | if (error) | 2799 | if (error) |
2716 | goto out; | 2800 | goto out_trans_abort; |
2717 | error = xfs_bumplink(tp, dp1); | 2801 | error = xfs_bumplink(tp, dp1); |
2718 | if (error) | 2802 | if (error) |
2719 | goto out; | 2803 | goto out_trans_abort; |
2720 | } | 2804 | } |
2721 | 2805 | ||
2722 | /* | 2806 | /* |
@@ -2734,16 +2818,16 @@ xfs_cross_rename( | |||
2734 | dp2->i_ino, first_block, | 2818 | dp2->i_ino, first_block, |
2735 | free_list, spaceres); | 2819 | free_list, spaceres); |
2736 | if (error) | 2820 | if (error) |
2737 | goto out; | 2821 | goto out_trans_abort; |
2738 | 2822 | ||
2739 | /* transfer ip1 ".." reference to dp2 */ | 2823 | /* transfer ip1 ".." reference to dp2 */ |
2740 | if (!S_ISDIR(ip2->i_d.di_mode)) { | 2824 | if (!S_ISDIR(ip2->i_d.di_mode)) { |
2741 | error = xfs_droplink(tp, dp1); | 2825 | error = xfs_droplink(tp, dp1); |
2742 | if (error) | 2826 | if (error) |
2743 | goto out; | 2827 | goto out_trans_abort; |
2744 | error = xfs_bumplink(tp, dp2); | 2828 | error = xfs_bumplink(tp, dp2); |
2745 | if (error) | 2829 | if (error) |
2746 | goto out; | 2830 | goto out_trans_abort; |
2747 | } | 2831 | } |
2748 | 2832 | ||
2749 | /* | 2833 | /* |
@@ -2771,66 +2855,108 @@ xfs_cross_rename( | |||
2771 | } | 2855 | } |
2772 | xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 2856 | xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
2773 | xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); | 2857 | xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); |
2774 | out: | 2858 | return xfs_finish_rename(tp, free_list); |
2859 | |||
2860 | out_trans_abort: | ||
2861 | xfs_bmap_cancel(free_list); | ||
2862 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); | ||
2775 | return error; | 2863 | return error; |
2776 | } | 2864 | } |
2777 | 2865 | ||
2778 | /* | 2866 | /* |
2867 | * xfs_rename_alloc_whiteout() | ||
2868 | * | ||
2869 | * Return a referenced, unlinked, unlocked inode that that can be used as a | ||
2870 | * whiteout in a rename transaction. We use a tmpfile inode here so that if we | ||
2871 | * crash between allocating the inode and linking it into the rename transaction | ||
2872 | * recovery will free the inode and we won't leak it. | ||
2873 | */ | ||
2874 | static int | ||
2875 | xfs_rename_alloc_whiteout( | ||
2876 | struct xfs_inode *dp, | ||
2877 | struct xfs_inode **wip) | ||
2878 | { | ||
2879 | struct xfs_inode *tmpfile; | ||
2880 | int error; | ||
2881 | |||
2882 | error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile); | ||
2883 | if (error) | ||
2884 | return error; | ||
2885 | |||
2886 | /* Satisfy xfs_bumplink that this is a real tmpfile */ | ||
2887 | xfs_finish_inode_setup(tmpfile); | ||
2888 | VFS_I(tmpfile)->i_state |= I_LINKABLE; | ||
2889 | |||
2890 | *wip = tmpfile; | ||
2891 | return 0; | ||
2892 | } | ||
2893 | |||
2894 | /* | ||
2779 | * xfs_rename | 2895 | * xfs_rename |
2780 | */ | 2896 | */ |
2781 | int | 2897 | int |
2782 | xfs_rename( | 2898 | xfs_rename( |
2783 | xfs_inode_t *src_dp, | 2899 | struct xfs_inode *src_dp, |
2784 | struct xfs_name *src_name, | 2900 | struct xfs_name *src_name, |
2785 | xfs_inode_t *src_ip, | 2901 | struct xfs_inode *src_ip, |
2786 | xfs_inode_t *target_dp, | 2902 | struct xfs_inode *target_dp, |
2787 | struct xfs_name *target_name, | 2903 | struct xfs_name *target_name, |
2788 | xfs_inode_t *target_ip, | 2904 | struct xfs_inode *target_ip, |
2789 | unsigned int flags) | 2905 | unsigned int flags) |
2790 | { | 2906 | { |
2791 | xfs_trans_t *tp = NULL; | 2907 | struct xfs_mount *mp = src_dp->i_mount; |
2792 | xfs_mount_t *mp = src_dp->i_mount; | 2908 | struct xfs_trans *tp; |
2793 | int new_parent; /* moving to a new dir */ | 2909 | struct xfs_bmap_free free_list; |
2794 | int src_is_directory; /* src_name is a directory */ | 2910 | xfs_fsblock_t first_block; |
2795 | int error; | 2911 | struct xfs_inode *wip = NULL; /* whiteout inode */ |
2796 | xfs_bmap_free_t free_list; | 2912 | struct xfs_inode *inodes[__XFS_SORT_INODES]; |
2797 | xfs_fsblock_t first_block; | 2913 | int num_inodes = __XFS_SORT_INODES; |
2798 | int cancel_flags; | 2914 | bool new_parent = (src_dp != target_dp); |
2799 | int committed; | 2915 | bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode); |
2800 | xfs_inode_t *inodes[4]; | 2916 | int cancel_flags = 0; |
2801 | int spaceres; | 2917 | int spaceres; |
2802 | int num_inodes; | 2918 | int error; |
2803 | 2919 | ||
2804 | trace_xfs_rename(src_dp, target_dp, src_name, target_name); | 2920 | trace_xfs_rename(src_dp, target_dp, src_name, target_name); |
2805 | 2921 | ||
2806 | new_parent = (src_dp != target_dp); | 2922 | if ((flags & RENAME_EXCHANGE) && !target_ip) |
2807 | src_is_directory = S_ISDIR(src_ip->i_d.di_mode); | 2923 | return -EINVAL; |
2808 | 2924 | ||
2809 | xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, | 2925 | /* |
2926 | * If we are doing a whiteout operation, allocate the whiteout inode | ||
2927 | * we will be placing at the target and ensure the type is set | ||
2928 | * appropriately. | ||
2929 | */ | ||
2930 | if (flags & RENAME_WHITEOUT) { | ||
2931 | ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE))); | ||
2932 | error = xfs_rename_alloc_whiteout(target_dp, &wip); | ||
2933 | if (error) | ||
2934 | return error; | ||
2935 | |||
2936 | /* setup target dirent info as whiteout */ | ||
2937 | src_name->type = XFS_DIR3_FT_CHRDEV; | ||
2938 | } | ||
2939 | |||
2940 | xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, | ||
2810 | inodes, &num_inodes); | 2941 | inodes, &num_inodes); |
2811 | 2942 | ||
2812 | xfs_bmap_init(&free_list, &first_block); | ||
2813 | tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); | 2943 | tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); |
2814 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | ||
2815 | spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); | 2944 | spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); |
2816 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); | 2945 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); |
2817 | if (error == -ENOSPC) { | 2946 | if (error == -ENOSPC) { |
2818 | spaceres = 0; | 2947 | spaceres = 0; |
2819 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); | 2948 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); |
2820 | } | 2949 | } |
2821 | if (error) { | 2950 | if (error) |
2822 | xfs_trans_cancel(tp, 0); | 2951 | goto out_trans_cancel; |
2823 | goto std_return; | 2952 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
2824 | } | ||
2825 | 2953 | ||
2826 | /* | 2954 | /* |
2827 | * Attach the dquots to the inodes | 2955 | * Attach the dquots to the inodes |
2828 | */ | 2956 | */ |
2829 | error = xfs_qm_vop_rename_dqattach(inodes); | 2957 | error = xfs_qm_vop_rename_dqattach(inodes); |
2830 | if (error) { | 2958 | if (error) |
2831 | xfs_trans_cancel(tp, cancel_flags); | 2959 | goto out_trans_cancel; |
2832 | goto std_return; | ||
2833 | } | ||
2834 | 2960 | ||
2835 | /* | 2961 | /* |
2836 | * Lock all the participating inodes. Depending upon whether | 2962 | * Lock all the participating inodes. Depending upon whether |
@@ -2851,6 +2977,8 @@ xfs_rename( | |||
2851 | xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); | 2977 | xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); |
2852 | if (target_ip) | 2978 | if (target_ip) |
2853 | xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); | 2979 | xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); |
2980 | if (wip) | ||
2981 | xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL); | ||
2854 | 2982 | ||
2855 | /* | 2983 | /* |
2856 | * If we are using project inheritance, we only allow renames | 2984 | * If we are using project inheritance, we only allow renames |
@@ -2860,24 +2988,16 @@ xfs_rename( | |||
2860 | if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && | 2988 | if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
2861 | (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { | 2989 | (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { |
2862 | error = -EXDEV; | 2990 | error = -EXDEV; |
2863 | goto error_return; | 2991 | goto out_trans_cancel; |
2864 | } | 2992 | } |
2865 | 2993 | ||
2866 | /* | 2994 | xfs_bmap_init(&free_list, &first_block); |
2867 | * Handle RENAME_EXCHANGE flags | 2995 | |
2868 | */ | 2996 | /* RENAME_EXCHANGE is unique from here on. */ |
2869 | if (flags & RENAME_EXCHANGE) { | 2997 | if (flags & RENAME_EXCHANGE) |
2870 | if (target_ip == NULL) { | 2998 | return xfs_cross_rename(tp, src_dp, src_name, src_ip, |
2871 | error = -EINVAL; | 2999 | target_dp, target_name, target_ip, |
2872 | goto error_return; | 3000 | &free_list, &first_block, spaceres); |
2873 | } | ||
2874 | error = xfs_cross_rename(tp, src_dp, src_name, src_ip, | ||
2875 | target_dp, target_name, target_ip, | ||
2876 | &free_list, &first_block, spaceres); | ||
2877 | if (error) | ||
2878 | goto abort_return; | ||
2879 | goto finish_rename; | ||
2880 | } | ||
2881 | 3001 | ||
2882 | /* | 3002 | /* |
2883 | * Set up the target. | 3003 | * Set up the target. |
@@ -2890,7 +3010,7 @@ xfs_rename( | |||
2890 | if (!spaceres) { | 3010 | if (!spaceres) { |
2891 | error = xfs_dir_canenter(tp, target_dp, target_name); | 3011 | error = xfs_dir_canenter(tp, target_dp, target_name); |
2892 | if (error) | 3012 | if (error) |
2893 | goto error_return; | 3013 | goto out_trans_cancel; |
2894 | } | 3014 | } |
2895 | /* | 3015 | /* |
2896 | * If target does not exist and the rename crosses | 3016 | * If target does not exist and the rename crosses |
@@ -2901,9 +3021,9 @@ xfs_rename( | |||
2901 | src_ip->i_ino, &first_block, | 3021 | src_ip->i_ino, &first_block, |
2902 | &free_list, spaceres); | 3022 | &free_list, spaceres); |
2903 | if (error == -ENOSPC) | 3023 | if (error == -ENOSPC) |
2904 | goto error_return; | 3024 | goto out_bmap_cancel; |
2905 | if (error) | 3025 | if (error) |
2906 | goto abort_return; | 3026 | goto out_trans_abort; |
2907 | 3027 | ||
2908 | xfs_trans_ichgtime(tp, target_dp, | 3028 | xfs_trans_ichgtime(tp, target_dp, |
2909 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 3029 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
@@ -2911,7 +3031,7 @@ xfs_rename( | |||
2911 | if (new_parent && src_is_directory) { | 3031 | if (new_parent && src_is_directory) { |
2912 | error = xfs_bumplink(tp, target_dp); | 3032 | error = xfs_bumplink(tp, target_dp); |
2913 | if (error) | 3033 | if (error) |
2914 | goto abort_return; | 3034 | goto out_trans_abort; |
2915 | } | 3035 | } |
2916 | } else { /* target_ip != NULL */ | 3036 | } else { /* target_ip != NULL */ |
2917 | /* | 3037 | /* |
@@ -2926,7 +3046,7 @@ xfs_rename( | |||
2926 | if (!(xfs_dir_isempty(target_ip)) || | 3046 | if (!(xfs_dir_isempty(target_ip)) || |
2927 | (target_ip->i_d.di_nlink > 2)) { | 3047 | (target_ip->i_d.di_nlink > 2)) { |
2928 | error = -EEXIST; | 3048 | error = -EEXIST; |
2929 | goto error_return; | 3049 | goto out_trans_cancel; |
2930 | } | 3050 | } |
2931 | } | 3051 | } |
2932 | 3052 | ||
@@ -2943,7 +3063,7 @@ xfs_rename( | |||
2943 | src_ip->i_ino, | 3063 | src_ip->i_ino, |
2944 | &first_block, &free_list, spaceres); | 3064 | &first_block, &free_list, spaceres); |
2945 | if (error) | 3065 | if (error) |
2946 | goto abort_return; | 3066 | goto out_trans_abort; |
2947 | 3067 | ||
2948 | xfs_trans_ichgtime(tp, target_dp, | 3068 | xfs_trans_ichgtime(tp, target_dp, |
2949 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 3069 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
@@ -2954,7 +3074,7 @@ xfs_rename( | |||
2954 | */ | 3074 | */ |
2955 | error = xfs_droplink(tp, target_ip); | 3075 | error = xfs_droplink(tp, target_ip); |
2956 | if (error) | 3076 | if (error) |
2957 | goto abort_return; | 3077 | goto out_trans_abort; |
2958 | 3078 | ||
2959 | if (src_is_directory) { | 3079 | if (src_is_directory) { |
2960 | /* | 3080 | /* |
@@ -2962,7 +3082,7 @@ xfs_rename( | |||
2962 | */ | 3082 | */ |
2963 | error = xfs_droplink(tp, target_ip); | 3083 | error = xfs_droplink(tp, target_ip); |
2964 | if (error) | 3084 | if (error) |
2965 | goto abort_return; | 3085 | goto out_trans_abort; |
2966 | } | 3086 | } |
2967 | } /* target_ip != NULL */ | 3087 | } /* target_ip != NULL */ |
2968 | 3088 | ||
@@ -2979,7 +3099,7 @@ xfs_rename( | |||
2979 | &first_block, &free_list, spaceres); | 3099 | &first_block, &free_list, spaceres); |
2980 | ASSERT(error != -EEXIST); | 3100 | ASSERT(error != -EEXIST); |
2981 | if (error) | 3101 | if (error) |
2982 | goto abort_return; | 3102 | goto out_trans_abort; |
2983 | } | 3103 | } |
2984 | 3104 | ||
2985 | /* | 3105 | /* |
@@ -3005,49 +3125,67 @@ xfs_rename( | |||
3005 | */ | 3125 | */ |
3006 | error = xfs_droplink(tp, src_dp); | 3126 | error = xfs_droplink(tp, src_dp); |
3007 | if (error) | 3127 | if (error) |
3008 | goto abort_return; | 3128 | goto out_trans_abort; |
3009 | } | 3129 | } |
3010 | 3130 | ||
3011 | error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, | 3131 | /* |
3132 | * For whiteouts, we only need to update the source dirent with the | ||
3133 | * inode number of the whiteout inode rather than removing it | ||
3134 | * altogether. | ||
3135 | */ | ||
3136 | if (wip) { | ||
3137 | error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino, | ||
3012 | &first_block, &free_list, spaceres); | 3138 | &first_block, &free_list, spaceres); |
3139 | } else | ||
3140 | error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, | ||
3141 | &first_block, &free_list, spaceres); | ||
3013 | if (error) | 3142 | if (error) |
3014 | goto abort_return; | 3143 | goto out_trans_abort; |
3015 | |||
3016 | xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
3017 | xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); | ||
3018 | if (new_parent) | ||
3019 | xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); | ||
3020 | 3144 | ||
3021 | finish_rename: | ||
3022 | /* | 3145 | /* |
3023 | * If this is a synchronous mount, make sure that the | 3146 | * For whiteouts, we need to bump the link count on the whiteout inode. |
3024 | * rename transaction goes to disk before returning to | 3147 | * This means that failures all the way up to this point leave the inode |
3025 | * the user. | 3148 | * on the unlinked list and so cleanup is a simple matter of dropping |
3149 | * the remaining reference to it. If we fail here after bumping the link | ||
3150 | * count, we're shutting down the filesystem so we'll never see the | ||
3151 | * intermediate state on disk. | ||
3026 | */ | 3152 | */ |
3027 | if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { | 3153 | if (wip) { |
3028 | xfs_trans_set_sync(tp); | 3154 | ASSERT(wip->i_d.di_nlink == 0); |
3029 | } | 3155 | error = xfs_bumplink(tp, wip); |
3156 | if (error) | ||
3157 | goto out_trans_abort; | ||
3158 | error = xfs_iunlink_remove(tp, wip); | ||
3159 | if (error) | ||
3160 | goto out_trans_abort; | ||
3161 | xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); | ||
3030 | 3162 | ||
3031 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 3163 | /* |
3032 | if (error) { | 3164 | * Now we have a real link, clear the "I'm a tmpfile" state |
3033 | xfs_bmap_cancel(&free_list); | 3165 | * flag from the inode so it doesn't accidentally get misused in |
3034 | xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | | 3166 | * future. |
3035 | XFS_TRANS_ABORT)); | 3167 | */ |
3036 | goto std_return; | 3168 | VFS_I(wip)->i_state &= ~I_LINKABLE; |
3037 | } | 3169 | } |
3038 | 3170 | ||
3039 | /* | 3171 | xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
3040 | * trans_commit will unlock src_ip, target_ip & decrement | 3172 | xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); |
3041 | * the vnode references. | 3173 | if (new_parent) |
3042 | */ | 3174 | xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); |
3043 | return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
3044 | 3175 | ||
3045 | abort_return: | 3176 | error = xfs_finish_rename(tp, &free_list); |
3177 | if (wip) | ||
3178 | IRELE(wip); | ||
3179 | return error; | ||
3180 | |||
3181 | out_trans_abort: | ||
3046 | cancel_flags |= XFS_TRANS_ABORT; | 3182 | cancel_flags |= XFS_TRANS_ABORT; |
3047 | error_return: | 3183 | out_bmap_cancel: |
3048 | xfs_bmap_cancel(&free_list); | 3184 | xfs_bmap_cancel(&free_list); |
3185 | out_trans_cancel: | ||
3049 | xfs_trans_cancel(tp, cancel_flags); | 3186 | xfs_trans_cancel(tp, cancel_flags); |
3050 | std_return: | 3187 | if (wip) |
3188 | IRELE(wip); | ||
3051 | return error; | 3189 | return error; |
3052 | } | 3190 | } |
3053 | 3191 | ||
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index a1cd55f3f351..8f22d20368d8 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -56,6 +56,7 @@ typedef struct xfs_inode { | |||
56 | struct xfs_inode_log_item *i_itemp; /* logging information */ | 56 | struct xfs_inode_log_item *i_itemp; /* logging information */ |
57 | mrlock_t i_lock; /* inode lock */ | 57 | mrlock_t i_lock; /* inode lock */ |
58 | mrlock_t i_iolock; /* inode IO lock */ | 58 | mrlock_t i_iolock; /* inode IO lock */ |
59 | mrlock_t i_mmaplock; /* inode mmap IO lock */ | ||
59 | atomic_t i_pincount; /* inode pin count */ | 60 | atomic_t i_pincount; /* inode pin count */ |
60 | spinlock_t i_flags_lock; /* inode i_flags lock */ | 61 | spinlock_t i_flags_lock; /* inode i_flags lock */ |
61 | /* Miscellaneous state. */ | 62 | /* Miscellaneous state. */ |
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) | |||
263 | #define XFS_IOLOCK_SHARED (1<<1) | 264 | #define XFS_IOLOCK_SHARED (1<<1) |
264 | #define XFS_ILOCK_EXCL (1<<2) | 265 | #define XFS_ILOCK_EXCL (1<<2) |
265 | #define XFS_ILOCK_SHARED (1<<3) | 266 | #define XFS_ILOCK_SHARED (1<<3) |
267 | #define XFS_MMAPLOCK_EXCL (1<<4) | ||
268 | #define XFS_MMAPLOCK_SHARED (1<<5) | ||
266 | 269 | ||
267 | #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ | 270 | #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ |
268 | | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) | 271 | | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ |
272 | | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED) | ||
269 | 273 | ||
270 | #define XFS_LOCK_FLAGS \ | 274 | #define XFS_LOCK_FLAGS \ |
271 | { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ | 275 | { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ |
272 | { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ | 276 | { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ |
273 | { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ | 277 | { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ |
274 | { XFS_ILOCK_SHARED, "ILOCK_SHARED" } | 278 | { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ |
279 | { XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \ | ||
280 | { XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" } | ||
275 | 281 | ||
276 | 282 | ||
277 | /* | 283 | /* |
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) | |||
302 | #define XFS_IOLOCK_SHIFT 16 | 308 | #define XFS_IOLOCK_SHIFT 16 |
303 | #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) | 309 | #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) |
304 | 310 | ||
311 | #define XFS_MMAPLOCK_SHIFT 20 | ||
312 | |||
305 | #define XFS_ILOCK_SHIFT 24 | 313 | #define XFS_ILOCK_SHIFT 24 |
306 | #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) | 314 | #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) |
307 | #define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) | 315 | #define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) |
308 | #define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) | 316 | #define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) |
309 | 317 | ||
310 | #define XFS_IOLOCK_DEP_MASK 0x00ff0000 | 318 | #define XFS_IOLOCK_DEP_MASK 0x000f0000 |
319 | #define XFS_MMAPLOCK_DEP_MASK 0x00f00000 | ||
311 | #define XFS_ILOCK_DEP_MASK 0xff000000 | 320 | #define XFS_ILOCK_DEP_MASK 0xff000000 |
312 | #define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) | 321 | #define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \ |
322 | XFS_MMAPLOCK_DEP_MASK | \ | ||
323 | XFS_ILOCK_DEP_MASK) | ||
313 | 324 | ||
314 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) | 325 | #define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \ |
315 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) | 326 | >> XFS_IOLOCK_SHIFT) |
327 | #define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \ | ||
328 | >> XFS_MMAPLOCK_SHIFT) | ||
329 | #define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \ | ||
330 | >> XFS_ILOCK_SHIFT) | ||
316 | 331 | ||
317 | /* | 332 | /* |
318 | * For multiple groups support: if S_ISGID bit is set in the parent | 333 | * For multiple groups support: if S_ISGID bit is set in the parent |
@@ -391,6 +406,28 @@ int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, | |||
391 | int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); | 406 | int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); |
392 | 407 | ||
393 | 408 | ||
409 | /* from xfs_iops.c */ | ||
410 | /* | ||
411 | * When setting up a newly allocated inode, we need to call | ||
412 | * xfs_finish_inode_setup() once the inode is fully instantiated at | ||
413 | * the VFS level to prevent the rest of the world seeing the inode | ||
414 | * before we've completed instantiation. Otherwise we can do it | ||
415 | * the moment the inode lookup is complete. | ||
416 | */ | ||
417 | extern void xfs_setup_inode(struct xfs_inode *ip); | ||
418 | static inline void xfs_finish_inode_setup(struct xfs_inode *ip) | ||
419 | { | ||
420 | xfs_iflags_clear(ip, XFS_INEW); | ||
421 | barrier(); | ||
422 | unlock_new_inode(VFS_I(ip)); | ||
423 | } | ||
424 | |||
425 | static inline void xfs_setup_existing_inode(struct xfs_inode *ip) | ||
426 | { | ||
427 | xfs_setup_inode(ip); | ||
428 | xfs_finish_inode_setup(ip); | ||
429 | } | ||
430 | |||
394 | #define IHOLD(ip) \ | 431 | #define IHOLD(ip) \ |
395 | do { \ | 432 | do { \ |
396 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ | 433 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index ac4feae45eb3..5f4a396f5186 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -631,7 +631,7 @@ xfs_ioc_space( | |||
631 | 631 | ||
632 | if (filp->f_flags & O_DSYNC) | 632 | if (filp->f_flags & O_DSYNC) |
633 | flags |= XFS_PREALLOC_SYNC; | 633 | flags |= XFS_PREALLOC_SYNC; |
634 | if (ioflags & XFS_IO_INVIS) | 634 | if (ioflags & XFS_IO_INVIS) |
635 | flags |= XFS_PREALLOC_INVISIBLE; | 635 | flags |= XFS_PREALLOC_INVISIBLE; |
636 | 636 | ||
637 | error = mnt_want_write_file(filp); | 637 | error = mnt_want_write_file(filp); |
@@ -639,10 +639,13 @@ xfs_ioc_space( | |||
639 | return error; | 639 | return error; |
640 | 640 | ||
641 | xfs_ilock(ip, iolock); | 641 | xfs_ilock(ip, iolock); |
642 | error = xfs_break_layouts(inode, &iolock); | 642 | error = xfs_break_layouts(inode, &iolock, false); |
643 | if (error) | 643 | if (error) |
644 | goto out_unlock; | 644 | goto out_unlock; |
645 | 645 | ||
646 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); | ||
647 | iolock |= XFS_MMAPLOCK_EXCL; | ||
648 | |||
646 | switch (bf->l_whence) { | 649 | switch (bf->l_whence) { |
647 | case 0: /*SEEK_SET*/ | 650 | case 0: /*SEEK_SET*/ |
648 | break; | 651 | break; |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ccb1dd0d509e..38e633bad8c2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -460,8 +460,7 @@ xfs_iomap_prealloc_size( | |||
460 | alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), | 460 | alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), |
461 | alloc_blocks); | 461 | alloc_blocks); |
462 | 462 | ||
463 | xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); | 463 | freesp = percpu_counter_read_positive(&mp->m_fdblocks); |
464 | freesp = mp->m_sb.sb_fdblocks; | ||
465 | if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { | 464 | if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { |
466 | shift = 2; | 465 | shift = 2; |
467 | if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) | 466 | if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index e53a90331422..2f1839e4dd1b 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -187,6 +187,8 @@ xfs_generic_create( | |||
187 | else | 187 | else |
188 | d_instantiate(dentry, inode); | 188 | d_instantiate(dentry, inode); |
189 | 189 | ||
190 | xfs_finish_inode_setup(ip); | ||
191 | |||
190 | out_free_acl: | 192 | out_free_acl: |
191 | if (default_acl) | 193 | if (default_acl) |
192 | posix_acl_release(default_acl); | 194 | posix_acl_release(default_acl); |
@@ -195,6 +197,7 @@ xfs_generic_create( | |||
195 | return error; | 197 | return error; |
196 | 198 | ||
197 | out_cleanup_inode: | 199 | out_cleanup_inode: |
200 | xfs_finish_inode_setup(ip); | ||
198 | if (!tmpfile) | 201 | if (!tmpfile) |
199 | xfs_cleanup_inode(dir, inode, dentry); | 202 | xfs_cleanup_inode(dir, inode, dentry); |
200 | iput(inode); | 203 | iput(inode); |
@@ -367,9 +370,11 @@ xfs_vn_symlink( | |||
367 | goto out_cleanup_inode; | 370 | goto out_cleanup_inode; |
368 | 371 | ||
369 | d_instantiate(dentry, inode); | 372 | d_instantiate(dentry, inode); |
373 | xfs_finish_inode_setup(cip); | ||
370 | return 0; | 374 | return 0; |
371 | 375 | ||
372 | out_cleanup_inode: | 376 | out_cleanup_inode: |
377 | xfs_finish_inode_setup(cip); | ||
373 | xfs_cleanup_inode(dir, inode, dentry); | 378 | xfs_cleanup_inode(dir, inode, dentry); |
374 | iput(inode); | 379 | iput(inode); |
375 | out: | 380 | out: |
@@ -389,7 +394,7 @@ xfs_vn_rename( | |||
389 | struct xfs_name oname; | 394 | struct xfs_name oname; |
390 | struct xfs_name nname; | 395 | struct xfs_name nname; |
391 | 396 | ||
392 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | 397 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
393 | return -EINVAL; | 398 | return -EINVAL; |
394 | 399 | ||
395 | /* if we are exchanging files, we need to set i_mode of both files */ | 400 | /* if we are exchanging files, we need to set i_mode of both files */ |
@@ -766,6 +771,7 @@ xfs_setattr_size( | |||
766 | return error; | 771 | return error; |
767 | 772 | ||
768 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | 773 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); |
774 | ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); | ||
769 | ASSERT(S_ISREG(ip->i_d.di_mode)); | 775 | ASSERT(S_ISREG(ip->i_d.di_mode)); |
770 | ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| | 776 | ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| |
771 | ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); | 777 | ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); |
@@ -829,55 +835,27 @@ xfs_setattr_size( | |||
829 | inode_dio_wait(inode); | 835 | inode_dio_wait(inode); |
830 | 836 | ||
831 | /* | 837 | /* |
832 | * Do all the page cache truncate work outside the transaction context | 838 | * We've already locked out new page faults, so now we can safely remove |
833 | * as the "lock" order is page lock->log space reservation. i.e. | 839 | * pages from the page cache knowing they won't get refaulted until we |
834 | * locking pages inside the transaction can ABBA deadlock with | 840 | * drop the XFS_MMAP_EXCL lock after the extent manipulations are |
835 | * writeback. We have to do the VFS inode size update before we truncate | 841 | * complete. The truncate_setsize() call also cleans partial EOF page |
836 | * the pagecache, however, to avoid racing with page faults beyond the | 842 | * PTEs on extending truncates and hence ensures sub-page block size |
837 | * new EOF they are not serialised against truncate operations except by | 843 | * filesystems are correctly handled, too. |
838 | * page locks and size updates. | ||
839 | * | 844 | * |
840 | * Hence we are in a situation where a truncate can fail with ENOMEM | 845 | * We have to do all the page cache truncate work outside the |
841 | * from xfs_trans_reserve(), but having already truncated the in-memory | 846 | * transaction context as the "lock" order is page lock->log space |
842 | * version of the file (i.e. made user visible changes). There's not | 847 | * reservation as defined by extent allocation in the writeback path. |
843 | * much we can do about this, except to hope that the caller sees ENOMEM | 848 | * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but |
844 | * and retries the truncate operation. | 849 | * having already truncated the in-memory version of the file (i.e. made |
850 | * user visible changes). There's not much we can do about this, except | ||
851 | * to hope that the caller sees ENOMEM and retries the truncate | ||
852 | * operation. | ||
845 | */ | 853 | */ |
846 | error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); | 854 | error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); |
847 | if (error) | 855 | if (error) |
848 | return error; | 856 | return error; |
849 | truncate_setsize(inode, newsize); | 857 | truncate_setsize(inode, newsize); |
850 | 858 | ||
851 | /* | ||
852 | * The "we can't serialise against page faults" pain gets worse. | ||
853 | * | ||
854 | * If the file is mapped then we have to clean the page at the old EOF | ||
855 | * when extending the file. Extending the file can expose changes the | ||
856 | * underlying page mapping (e.g. from beyond EOF to a hole or | ||
857 | * unwritten), and so on the next attempt to write to that page we need | ||
858 | * to remap it for write. i.e. we need .page_mkwrite() to be called. | ||
859 | * Hence we need to clean the page to clean the pte and so a new write | ||
860 | * fault will be triggered appropriately. | ||
861 | * | ||
862 | * If we do it before we change the inode size, then we can race with a | ||
863 | * page fault that maps the page with exactly the same problem. If we do | ||
864 | * it after we change the file size, then a new page fault can come in | ||
865 | * and allocate space before we've run the rest of the truncate | ||
866 | * transaction. That's kinda grotesque, but it's better than have data | ||
867 | * over a hole, and so that's the lesser evil that has been chosen here. | ||
868 | * | ||
869 | * The real solution, however, is to have some mechanism for locking out | ||
870 | * page faults while a truncate is in progress. | ||
871 | */ | ||
872 | if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) { | ||
873 | error = filemap_write_and_wait_range( | ||
874 | VFS_I(ip)->i_mapping, | ||
875 | round_down(oldsize, PAGE_CACHE_SIZE), | ||
876 | round_up(oldsize, PAGE_CACHE_SIZE) - 1); | ||
877 | if (error) | ||
878 | return error; | ||
879 | } | ||
880 | |||
881 | tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); | 859 | tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); |
882 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); | 860 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); |
883 | if (error) | 861 | if (error) |
@@ -975,9 +953,13 @@ xfs_vn_setattr( | |||
975 | uint iolock = XFS_IOLOCK_EXCL; | 953 | uint iolock = XFS_IOLOCK_EXCL; |
976 | 954 | ||
977 | xfs_ilock(ip, iolock); | 955 | xfs_ilock(ip, iolock); |
978 | error = xfs_break_layouts(dentry->d_inode, &iolock); | 956 | error = xfs_break_layouts(dentry->d_inode, &iolock, true); |
979 | if (!error) | 957 | if (!error) { |
958 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); | ||
959 | iolock |= XFS_MMAPLOCK_EXCL; | ||
960 | |||
980 | error = xfs_setattr_size(ip, iattr); | 961 | error = xfs_setattr_size(ip, iattr); |
962 | } | ||
981 | xfs_iunlock(ip, iolock); | 963 | xfs_iunlock(ip, iolock); |
982 | } else { | 964 | } else { |
983 | error = xfs_setattr_nonsize(ip, iattr, 0); | 965 | error = xfs_setattr_nonsize(ip, iattr, 0); |
@@ -1228,16 +1210,12 @@ xfs_diflags_to_iflags( | |||
1228 | } | 1210 | } |
1229 | 1211 | ||
1230 | /* | 1212 | /* |
1231 | * Initialize the Linux inode, set up the operation vectors and | 1213 | * Initialize the Linux inode and set up the operation vectors. |
1232 | * unlock the inode. | ||
1233 | * | 1214 | * |
1234 | * When reading existing inodes from disk this is called directly | 1215 | * When reading existing inodes from disk this is called directly from xfs_iget, |
1235 | * from xfs_iget, when creating a new inode it is called from | 1216 | * when creating a new inode it is called from xfs_ialloc after setting up the |
1236 | * xfs_ialloc after setting up the inode. | 1217 | * inode. These callers have different criteria for clearing XFS_INEW, so leave |
1237 | * | 1218 | * it up to the caller to deal with unlocking the inode appropriately. |
1238 | * We are always called with an uninitialised linux inode here. | ||
1239 | * We need to initialise the necessary fields and take a reference | ||
1240 | * on it. | ||
1241 | */ | 1219 | */ |
1242 | void | 1220 | void |
1243 | xfs_setup_inode( | 1221 | xfs_setup_inode( |
@@ -1324,9 +1302,4 @@ xfs_setup_inode( | |||
1324 | inode_has_no_xattr(inode); | 1302 | inode_has_no_xattr(inode); |
1325 | cache_no_acl(inode); | 1303 | cache_no_acl(inode); |
1326 | } | 1304 | } |
1327 | |||
1328 | xfs_iflags_clear(ip, XFS_INEW); | ||
1329 | barrier(); | ||
1330 | |||
1331 | unlock_new_inode(inode); | ||
1332 | } | 1305 | } |
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index ea7a98e9cb70..a0f84abb0d09 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h | |||
@@ -25,8 +25,6 @@ extern const struct file_operations xfs_dir_file_operations; | |||
25 | 25 | ||
26 | extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); | 26 | extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); |
27 | 27 | ||
28 | extern void xfs_setup_inode(struct xfs_inode *); | ||
29 | |||
30 | /* | 28 | /* |
31 | * Internal setattr interfaces. | 29 | * Internal setattr interfaces. |
32 | */ | 30 | */ |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 82e314258f73..80429891dc9b 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -229,7 +229,7 @@ xfs_bulkstat_grab_ichunk( | |||
229 | error = xfs_inobt_get_rec(cur, irec, &stat); | 229 | error = xfs_inobt_get_rec(cur, irec, &stat); |
230 | if (error) | 230 | if (error) |
231 | return error; | 231 | return error; |
232 | XFS_WANT_CORRUPTED_RETURN(stat == 1); | 232 | XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1); |
233 | 233 | ||
234 | /* Check if the record contains the inode in request */ | 234 | /* Check if the record contains the inode in request */ |
235 | if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { | 235 | if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { |
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index c31d2c2eadc4..7c7842c85a08 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h | |||
@@ -116,15 +116,6 @@ typedef __uint64_t __psunsigned_t; | |||
116 | #undef XFS_NATIVE_HOST | 116 | #undef XFS_NATIVE_HOST |
117 | #endif | 117 | #endif |
118 | 118 | ||
119 | /* | ||
120 | * Feature macros (disable/enable) | ||
121 | */ | ||
122 | #ifdef CONFIG_SMP | ||
123 | #define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ | ||
124 | #else | ||
125 | #undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ | ||
126 | #endif | ||
127 | |||
128 | #define irix_sgid_inherit xfs_params.sgid_inherit.val | 119 | #define irix_sgid_inherit xfs_params.sgid_inherit.val |
129 | #define irix_symlink_mode xfs_params.symlink_mode.val | 120 | #define irix_symlink_mode xfs_params.symlink_mode.val |
130 | #define xfs_panic_mask xfs_params.panic_mask.val | 121 | #define xfs_panic_mask xfs_params.panic_mask.val |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index a5a945fc3bdc..4f5784f85a5b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -4463,10 +4463,10 @@ xlog_do_recover( | |||
4463 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); | 4463 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); |
4464 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); | 4464 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); |
4465 | ASSERT(xfs_sb_good_version(sbp)); | 4465 | ASSERT(xfs_sb_good_version(sbp)); |
4466 | xfs_reinit_percpu_counters(log->l_mp); | ||
4467 | |||
4466 | xfs_buf_relse(bp); | 4468 | xfs_buf_relse(bp); |
4467 | 4469 | ||
4468 | /* We've re-read the superblock so re-initialize per-cpu counters */ | ||
4469 | xfs_icsb_reinit_counters(log->l_mp); | ||
4470 | 4470 | ||
4471 | xlog_recover_check_summary(log); | 4471 | xlog_recover_check_summary(log); |
4472 | 4472 | ||
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 4fa80e63eea2..2ce7ee3b4ec1 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -43,18 +43,6 @@ | |||
43 | #include "xfs_sysfs.h" | 43 | #include "xfs_sysfs.h" |
44 | 44 | ||
45 | 45 | ||
46 | #ifdef HAVE_PERCPU_SB | ||
47 | STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, | ||
48 | int); | ||
49 | STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, | ||
50 | int); | ||
51 | STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); | ||
52 | #else | ||
53 | |||
54 | #define xfs_icsb_balance_counter(mp, a, b) do { } while (0) | ||
55 | #define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) | ||
56 | #endif | ||
57 | |||
58 | static DEFINE_MUTEX(xfs_uuid_table_mutex); | 46 | static DEFINE_MUTEX(xfs_uuid_table_mutex); |
59 | static int xfs_uuid_table_size; | 47 | static int xfs_uuid_table_size; |
60 | static uuid_t *xfs_uuid_table; | 48 | static uuid_t *xfs_uuid_table; |
@@ -347,8 +335,7 @@ reread: | |||
347 | goto reread; | 335 | goto reread; |
348 | } | 336 | } |
349 | 337 | ||
350 | /* Initialize per-cpu counters */ | 338 | xfs_reinit_percpu_counters(mp); |
351 | xfs_icsb_reinit_counters(mp); | ||
352 | 339 | ||
353 | /* no need to be quiet anymore, so reset the buf ops */ | 340 | /* no need to be quiet anymore, so reset the buf ops */ |
354 | bp->b_ops = &xfs_sb_buf_ops; | 341 | bp->b_ops = &xfs_sb_buf_ops; |
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp) | |||
1087 | if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) | 1074 | if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) |
1088 | return 0; | 1075 | return 0; |
1089 | 1076 | ||
1090 | xfs_icsb_sync_counters(mp, 0); | ||
1091 | |||
1092 | /* | 1077 | /* |
1093 | * we don't need to do this if we are updating the superblock | 1078 | * we don't need to do this if we are updating the superblock |
1094 | * counters on every modification. | 1079 | * counters on every modification. |
@@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp) | |||
1099 | return xfs_sync_sb(mp, true); | 1084 | return xfs_sync_sb(mp, true); |
1100 | } | 1085 | } |
1101 | 1086 | ||
1102 | /* | 1087 | int |
1103 | * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply | 1088 | xfs_mod_icount( |
1104 | * a delta to a specified field in the in-core superblock. Simply | 1089 | struct xfs_mount *mp, |
1105 | * switch on the field indicated and apply the delta to that field. | 1090 | int64_t delta) |
1106 | * Fields are not allowed to dip below zero, so if the delta would | ||
1107 | * do this do not apply it and return EINVAL. | ||
1108 | * | ||
1109 | * The m_sb_lock must be held when this routine is called. | ||
1110 | */ | ||
1111 | STATIC int | ||
1112 | xfs_mod_incore_sb_unlocked( | ||
1113 | xfs_mount_t *mp, | ||
1114 | xfs_sb_field_t field, | ||
1115 | int64_t delta, | ||
1116 | int rsvd) | ||
1117 | { | 1091 | { |
1118 | int scounter; /* short counter for 32 bit fields */ | 1092 | /* deltas are +/-64, hence the large batch size of 128. */ |
1119 | long long lcounter; /* long counter for 64 bit fields */ | 1093 | __percpu_counter_add(&mp->m_icount, delta, 128); |
1120 | long long res_used, rem; | 1094 | if (percpu_counter_compare(&mp->m_icount, 0) < 0) { |
1121 | |||
1122 | /* | ||
1123 | * With the in-core superblock spin lock held, switch | ||
1124 | * on the indicated field. Apply the delta to the | ||
1125 | * proper field. If the fields value would dip below | ||
1126 | * 0, then do not apply the delta and return EINVAL. | ||
1127 | */ | ||
1128 | switch (field) { | ||
1129 | case XFS_SBS_ICOUNT: | ||
1130 | lcounter = (long long)mp->m_sb.sb_icount; | ||
1131 | lcounter += delta; | ||
1132 | if (lcounter < 0) { | ||
1133 | ASSERT(0); | ||
1134 | return -EINVAL; | ||
1135 | } | ||
1136 | mp->m_sb.sb_icount = lcounter; | ||
1137 | return 0; | ||
1138 | case XFS_SBS_IFREE: | ||
1139 | lcounter = (long long)mp->m_sb.sb_ifree; | ||
1140 | lcounter += delta; | ||
1141 | if (lcounter < 0) { | ||
1142 | ASSERT(0); | ||
1143 | return -EINVAL; | ||
1144 | } | ||
1145 | mp->m_sb.sb_ifree = lcounter; | ||
1146 | return 0; | ||
1147 | case XFS_SBS_FDBLOCKS: | ||
1148 | lcounter = (long long) | ||
1149 | mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); | ||
1150 | res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); | ||
1151 | |||
1152 | if (delta > 0) { /* Putting blocks back */ | ||
1153 | if (res_used > delta) { | ||
1154 | mp->m_resblks_avail += delta; | ||
1155 | } else { | ||
1156 | rem = delta - res_used; | ||
1157 | mp->m_resblks_avail = mp->m_resblks; | ||
1158 | lcounter += rem; | ||
1159 | } | ||
1160 | } else { /* Taking blocks away */ | ||
1161 | lcounter += delta; | ||
1162 | if (lcounter >= 0) { | ||
1163 | mp->m_sb.sb_fdblocks = lcounter + | ||
1164 | XFS_ALLOC_SET_ASIDE(mp); | ||
1165 | return 0; | ||
1166 | } | ||
1167 | |||
1168 | /* | ||
1169 | * We are out of blocks, use any available reserved | ||
1170 | * blocks if were allowed to. | ||
1171 | */ | ||
1172 | if (!rsvd) | ||
1173 | return -ENOSPC; | ||
1174 | |||
1175 | lcounter = (long long)mp->m_resblks_avail + delta; | ||
1176 | if (lcounter >= 0) { | ||
1177 | mp->m_resblks_avail = lcounter; | ||
1178 | return 0; | ||
1179 | } | ||
1180 | printk_once(KERN_WARNING | ||
1181 | "Filesystem \"%s\": reserve blocks depleted! " | ||
1182 | "Consider increasing reserve pool size.", | ||
1183 | mp->m_fsname); | ||
1184 | return -ENOSPC; | ||
1185 | } | ||
1186 | |||
1187 | mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); | ||
1188 | return 0; | ||
1189 | case XFS_SBS_FREXTENTS: | ||
1190 | lcounter = (long long)mp->m_sb.sb_frextents; | ||
1191 | lcounter += delta; | ||
1192 | if (lcounter < 0) { | ||
1193 | return -ENOSPC; | ||
1194 | } | ||
1195 | mp->m_sb.sb_frextents = lcounter; | ||
1196 | return 0; | ||
1197 | case XFS_SBS_DBLOCKS: | ||
1198 | lcounter = (long long)mp->m_sb.sb_dblocks; | ||
1199 | lcounter += delta; | ||
1200 | if (lcounter < 0) { | ||
1201 | ASSERT(0); | ||
1202 | return -EINVAL; | ||
1203 | } | ||
1204 | mp->m_sb.sb_dblocks = lcounter; | ||
1205 | return 0; | ||
1206 | case XFS_SBS_AGCOUNT: | ||
1207 | scounter = mp->m_sb.sb_agcount; | ||
1208 | scounter += delta; | ||
1209 | if (scounter < 0) { | ||
1210 | ASSERT(0); | ||
1211 | return -EINVAL; | ||
1212 | } | ||
1213 | mp->m_sb.sb_agcount = scounter; | ||
1214 | return 0; | ||
1215 | case XFS_SBS_IMAX_PCT: | ||
1216 | scounter = mp->m_sb.sb_imax_pct; | ||
1217 | scounter += delta; | ||
1218 | if (scounter < 0) { | ||
1219 | ASSERT(0); | ||
1220 | return -EINVAL; | ||
1221 | } | ||
1222 | mp->m_sb.sb_imax_pct = scounter; | ||
1223 | return 0; | ||
1224 | case XFS_SBS_REXTSIZE: | ||
1225 | scounter = mp->m_sb.sb_rextsize; | ||
1226 | scounter += delta; | ||
1227 | if (scounter < 0) { | ||
1228 | ASSERT(0); | ||
1229 | return -EINVAL; | ||
1230 | } | ||
1231 | mp->m_sb.sb_rextsize = scounter; | ||
1232 | return 0; | ||
1233 | case XFS_SBS_RBMBLOCKS: | ||
1234 | scounter = mp->m_sb.sb_rbmblocks; | ||
1235 | scounter += delta; | ||
1236 | if (scounter < 0) { | ||
1237 | ASSERT(0); | ||
1238 | return -EINVAL; | ||
1239 | } | ||
1240 | mp->m_sb.sb_rbmblocks = scounter; | ||
1241 | return 0; | ||
1242 | case XFS_SBS_RBLOCKS: | ||
1243 | lcounter = (long long)mp->m_sb.sb_rblocks; | ||
1244 | lcounter += delta; | ||
1245 | if (lcounter < 0) { | ||
1246 | ASSERT(0); | ||
1247 | return -EINVAL; | ||
1248 | } | ||
1249 | mp->m_sb.sb_rblocks = lcounter; | ||
1250 | return 0; | ||
1251 | case XFS_SBS_REXTENTS: | ||
1252 | lcounter = (long long)mp->m_sb.sb_rextents; | ||
1253 | lcounter += delta; | ||
1254 | if (lcounter < 0) { | ||
1255 | ASSERT(0); | ||
1256 | return -EINVAL; | ||
1257 | } | ||
1258 | mp->m_sb.sb_rextents = lcounter; | ||
1259 | return 0; | ||
1260 | case XFS_SBS_REXTSLOG: | ||
1261 | scounter = mp->m_sb.sb_rextslog; | ||
1262 | scounter += delta; | ||
1263 | if (scounter < 0) { | ||
1264 | ASSERT(0); | ||
1265 | return -EINVAL; | ||
1266 | } | ||
1267 | mp->m_sb.sb_rextslog = scounter; | ||
1268 | return 0; | ||
1269 | default: | ||
1270 | ASSERT(0); | 1095 | ASSERT(0); |
1096 | percpu_counter_add(&mp->m_icount, -delta); | ||
1271 | return -EINVAL; | 1097 | return -EINVAL; |
1272 | } | 1098 | } |
1099 | return 0; | ||
1273 | } | 1100 | } |
1274 | 1101 | ||
1275 | /* | ||
1276 | * xfs_mod_incore_sb() is used to change a field in the in-core | ||
1277 | * superblock structure by the specified delta. This modification | ||
1278 | * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked() | ||
1279 | * routine to do the work. | ||
1280 | */ | ||
1281 | int | 1102 | int |
1282 | xfs_mod_incore_sb( | 1103 | xfs_mod_ifree( |
1283 | struct xfs_mount *mp, | 1104 | struct xfs_mount *mp, |
1284 | xfs_sb_field_t field, | 1105 | int64_t delta) |
1285 | int64_t delta, | ||
1286 | int rsvd) | ||
1287 | { | 1106 | { |
1288 | int status; | 1107 | percpu_counter_add(&mp->m_ifree, delta); |
1289 | 1108 | if (percpu_counter_compare(&mp->m_ifree, 0) < 0) { | |
1290 | #ifdef HAVE_PERCPU_SB | 1109 | ASSERT(0); |
1291 | ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); | 1110 | percpu_counter_add(&mp->m_ifree, -delta); |
1292 | #endif | 1111 | return -EINVAL; |
1293 | spin_lock(&mp->m_sb_lock); | 1112 | } |
1294 | status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); | 1113 | return 0; |
1295 | spin_unlock(&mp->m_sb_lock); | ||
1296 | |||
1297 | return status; | ||
1298 | } | 1114 | } |
1299 | 1115 | ||
1300 | /* | ||
1301 | * Change more than one field in the in-core superblock structure at a time. | ||
1302 | * | ||
1303 | * The fields and changes to those fields are specified in the array of | ||
1304 | * xfs_mod_sb structures passed in. Either all of the specified deltas | ||
1305 | * will be applied or none of them will. If any modified field dips below 0, | ||
1306 | * then all modifications will be backed out and EINVAL will be returned. | ||
1307 | * | ||
1308 | * Note that this function may not be used for the superblock values that | ||
1309 | * are tracked with the in-memory per-cpu counters - a direct call to | ||
1310 | * xfs_icsb_modify_counters is required for these. | ||
1311 | */ | ||
1312 | int | 1116 | int |
1313 | xfs_mod_incore_sb_batch( | 1117 | xfs_mod_fdblocks( |
1314 | struct xfs_mount *mp, | 1118 | struct xfs_mount *mp, |
1315 | xfs_mod_sb_t *msb, | 1119 | int64_t delta, |
1316 | uint nmsb, | 1120 | bool rsvd) |
1317 | int rsvd) | ||
1318 | { | 1121 | { |
1319 | xfs_mod_sb_t *msbp; | 1122 | int64_t lcounter; |
1320 | int error = 0; | 1123 | long long res_used; |
1124 | s32 batch; | ||
1125 | |||
1126 | if (delta > 0) { | ||
1127 | /* | ||
1128 | * If the reserve pool is depleted, put blocks back into it | ||
1129 | * first. Most of the time the pool is full. | ||
1130 | */ | ||
1131 | if (likely(mp->m_resblks == mp->m_resblks_avail)) { | ||
1132 | percpu_counter_add(&mp->m_fdblocks, delta); | ||
1133 | return 0; | ||
1134 | } | ||
1135 | |||
1136 | spin_lock(&mp->m_sb_lock); | ||
1137 | res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); | ||
1138 | |||
1139 | if (res_used > delta) { | ||
1140 | mp->m_resblks_avail += delta; | ||
1141 | } else { | ||
1142 | delta -= res_used; | ||
1143 | mp->m_resblks_avail = mp->m_resblks; | ||
1144 | percpu_counter_add(&mp->m_fdblocks, delta); | ||
1145 | } | ||
1146 | spin_unlock(&mp->m_sb_lock); | ||
1147 | return 0; | ||
1148 | } | ||
1321 | 1149 | ||
1322 | /* | 1150 | /* |
1323 | * Loop through the array of mod structures and apply each individually. | 1151 | * Taking blocks away, need to be more accurate the closer we |
1324 | * If any fail, then back out all those which have already been applied. | 1152 | * are to zero. |
1325 | * Do all of this within the scope of the m_sb_lock so that all of the | 1153 | * |
1326 | * changes will be atomic. | 1154 | * batch size is set to a maximum of 1024 blocks - if we are |
1155 | * allocating of freeing extents larger than this then we aren't | ||
1156 | * going to be hammering the counter lock so a lock per update | ||
1157 | * is not a problem. | ||
1158 | * | ||
1159 | * If the counter has a value of less than 2 * max batch size, | ||
1160 | * then make everything serialise as we are real close to | ||
1161 | * ENOSPC. | ||
1162 | */ | ||
1163 | #define __BATCH 1024 | ||
1164 | if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0) | ||
1165 | batch = 1; | ||
1166 | else | ||
1167 | batch = __BATCH; | ||
1168 | #undef __BATCH | ||
1169 | |||
1170 | __percpu_counter_add(&mp->m_fdblocks, delta, batch); | ||
1171 | if (percpu_counter_compare(&mp->m_fdblocks, | ||
1172 | XFS_ALLOC_SET_ASIDE(mp)) >= 0) { | ||
1173 | /* we had space! */ | ||
1174 | return 0; | ||
1175 | } | ||
1176 | |||
1177 | /* | ||
1178 | * lock up the sb for dipping into reserves before releasing the space | ||
1179 | * that took us to ENOSPC. | ||
1327 | */ | 1180 | */ |
1328 | spin_lock(&mp->m_sb_lock); | 1181 | spin_lock(&mp->m_sb_lock); |
1329 | for (msbp = msb; msbp < (msb + nmsb); msbp++) { | 1182 | percpu_counter_add(&mp->m_fdblocks, -delta); |
1330 | ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || | 1183 | if (!rsvd) |
1331 | msbp->msb_field > XFS_SBS_FDBLOCKS); | 1184 | goto fdblocks_enospc; |
1332 | 1185 | ||
1333 | error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, | 1186 | lcounter = (long long)mp->m_resblks_avail + delta; |
1334 | msbp->msb_delta, rsvd); | 1187 | if (lcounter >= 0) { |
1335 | if (error) | 1188 | mp->m_resblks_avail = lcounter; |
1336 | goto unwind; | 1189 | spin_unlock(&mp->m_sb_lock); |
1190 | return 0; | ||
1337 | } | 1191 | } |
1192 | printk_once(KERN_WARNING | ||
1193 | "Filesystem \"%s\": reserve blocks depleted! " | ||
1194 | "Consider increasing reserve pool size.", | ||
1195 | mp->m_fsname); | ||
1196 | fdblocks_enospc: | ||
1338 | spin_unlock(&mp->m_sb_lock); | 1197 | spin_unlock(&mp->m_sb_lock); |
1339 | return 0; | 1198 | return -ENOSPC; |
1199 | } | ||
1340 | 1200 | ||
1341 | unwind: | 1201 | int |
1342 | while (--msbp >= msb) { | 1202 | xfs_mod_frextents( |
1343 | error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, | 1203 | struct xfs_mount *mp, |
1344 | -msbp->msb_delta, rsvd); | 1204 | int64_t delta) |
1345 | ASSERT(error == 0); | 1205 | { |
1346 | } | 1206 | int64_t lcounter; |
1207 | int ret = 0; | ||
1208 | |||
1209 | spin_lock(&mp->m_sb_lock); | ||
1210 | lcounter = mp->m_sb.sb_frextents + delta; | ||
1211 | if (lcounter < 0) | ||
1212 | ret = -ENOSPC; | ||
1213 | else | ||
1214 | mp->m_sb.sb_frextents = lcounter; | ||
1347 | spin_unlock(&mp->m_sb_lock); | 1215 | spin_unlock(&mp->m_sb_lock); |
1348 | return error; | 1216 | return ret; |
1349 | } | 1217 | } |
1350 | 1218 | ||
1351 | /* | 1219 | /* |
@@ -1407,573 +1275,3 @@ xfs_dev_is_read_only( | |||
1407 | } | 1275 | } |
1408 | return 0; | 1276 | return 0; |
1409 | } | 1277 | } |
1410 | |||
1411 | #ifdef HAVE_PERCPU_SB | ||
1412 | /* | ||
1413 | * Per-cpu incore superblock counters | ||
1414 | * | ||
1415 | * Simple concept, difficult implementation | ||
1416 | * | ||
1417 | * Basically, replace the incore superblock counters with a distributed per cpu | ||
1418 | * counter for contended fields (e.g. free block count). | ||
1419 | * | ||
1420 | * Difficulties arise in that the incore sb is used for ENOSPC checking, and | ||
1421 | * hence needs to be accurately read when we are running low on space. Hence | ||
1422 | * there is a method to enable and disable the per-cpu counters based on how | ||
1423 | * much "stuff" is available in them. | ||
1424 | * | ||
1425 | * Basically, a counter is enabled if there is enough free resource to justify | ||
1426 | * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local | ||
1427 | * ENOSPC), then we disable the counters to synchronise all callers and | ||
1428 | * re-distribute the available resources. | ||
1429 | * | ||
1430 | * If, once we redistributed the available resources, we still get a failure, | ||
1431 | * we disable the per-cpu counter and go through the slow path. | ||
1432 | * | ||
1433 | * The slow path is the current xfs_mod_incore_sb() function. This means that | ||
1434 | * when we disable a per-cpu counter, we need to drain its resources back to | ||
1435 | * the global superblock. We do this after disabling the counter to prevent | ||
1436 | * more threads from queueing up on the counter. | ||
1437 | * | ||
1438 | * Essentially, this means that we still need a lock in the fast path to enable | ||
1439 | * synchronisation between the global counters and the per-cpu counters. This | ||
1440 | * is not a problem because the lock will be local to a CPU almost all the time | ||
1441 | * and have little contention except when we get to ENOSPC conditions. | ||
1442 | * | ||
1443 | * Basically, this lock becomes a barrier that enables us to lock out the fast | ||
1444 | * path while we do things like enabling and disabling counters and | ||
1445 | * synchronising the counters. | ||
1446 | * | ||
1447 | * Locking rules: | ||
1448 | * | ||
1449 | * 1. m_sb_lock before picking up per-cpu locks | ||
1450 | * 2. per-cpu locks always picked up via for_each_online_cpu() order | ||
1451 | * 3. accurate counter sync requires m_sb_lock + per cpu locks | ||
1452 | * 4. modifying per-cpu counters requires holding per-cpu lock | ||
1453 | * 5. modifying global counters requires holding m_sb_lock | ||
1454 | * 6. enabling or disabling a counter requires holding the m_sb_lock | ||
1455 | * and _none_ of the per-cpu locks. | ||
1456 | * | ||
1457 | * Disabled counters are only ever re-enabled by a balance operation | ||
1458 | * that results in more free resources per CPU than a given threshold. | ||
1459 | * To ensure counters don't remain disabled, they are rebalanced when | ||
1460 | * the global resource goes above a higher threshold (i.e. some hysteresis | ||
1461 | * is present to prevent thrashing). | ||
1462 | */ | ||
1463 | |||
1464 | #ifdef CONFIG_HOTPLUG_CPU | ||
1465 | /* | ||
1466 | * hot-plug CPU notifier support. | ||
1467 | * | ||
1468 | * We need a notifier per filesystem as we need to be able to identify | ||
1469 | * the filesystem to balance the counters out. This is achieved by | ||
1470 | * having a notifier block embedded in the xfs_mount_t and doing pointer | ||
1471 | * magic to get the mount pointer from the notifier block address. | ||
1472 | */ | ||
1473 | STATIC int | ||
1474 | xfs_icsb_cpu_notify( | ||
1475 | struct notifier_block *nfb, | ||
1476 | unsigned long action, | ||
1477 | void *hcpu) | ||
1478 | { | ||
1479 | xfs_icsb_cnts_t *cntp; | ||
1480 | xfs_mount_t *mp; | ||
1481 | |||
1482 | mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); | ||
1483 | cntp = (xfs_icsb_cnts_t *) | ||
1484 | per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); | ||
1485 | switch (action) { | ||
1486 | case CPU_UP_PREPARE: | ||
1487 | case CPU_UP_PREPARE_FROZEN: | ||
1488 | /* Easy Case - initialize the area and locks, and | ||
1489 | * then rebalance when online does everything else for us. */ | ||
1490 | memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); | ||
1491 | break; | ||
1492 | case CPU_ONLINE: | ||
1493 | case CPU_ONLINE_FROZEN: | ||
1494 | xfs_icsb_lock(mp); | ||
1495 | xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); | ||
1496 | xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); | ||
1497 | xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); | ||
1498 | xfs_icsb_unlock(mp); | ||
1499 | break; | ||
1500 | case CPU_DEAD: | ||
1501 | case CPU_DEAD_FROZEN: | ||
1502 | /* Disable all the counters, then fold the dead cpu's | ||
1503 | * count into the total on the global superblock and | ||
1504 | * re-enable the counters. */ | ||
1505 | xfs_icsb_lock(mp); | ||
1506 | spin_lock(&mp->m_sb_lock); | ||
1507 | xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); | ||
1508 | xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); | ||
1509 | xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); | ||
1510 | |||
1511 | mp->m_sb.sb_icount += cntp->icsb_icount; | ||
1512 | mp->m_sb.sb_ifree += cntp->icsb_ifree; | ||
1513 | mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; | ||
1514 | |||
1515 | memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); | ||
1516 | |||
1517 | xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0); | ||
1518 | xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0); | ||
1519 | xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0); | ||
1520 | spin_unlock(&mp->m_sb_lock); | ||
1521 | xfs_icsb_unlock(mp); | ||
1522 | break; | ||
1523 | } | ||
1524 | |||
1525 | return NOTIFY_OK; | ||
1526 | } | ||
1527 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
1528 | |||
1529 | int | ||
1530 | xfs_icsb_init_counters( | ||
1531 | xfs_mount_t *mp) | ||
1532 | { | ||
1533 | xfs_icsb_cnts_t *cntp; | ||
1534 | int i; | ||
1535 | |||
1536 | mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); | ||
1537 | if (mp->m_sb_cnts == NULL) | ||
1538 | return -ENOMEM; | ||
1539 | |||
1540 | for_each_online_cpu(i) { | ||
1541 | cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); | ||
1542 | memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); | ||
1543 | } | ||
1544 | |||
1545 | mutex_init(&mp->m_icsb_mutex); | ||
1546 | |||
1547 | /* | ||
1548 | * start with all counters disabled so that the | ||
1549 | * initial balance kicks us off correctly | ||
1550 | */ | ||
1551 | mp->m_icsb_counters = -1; | ||
1552 | |||
1553 | #ifdef CONFIG_HOTPLUG_CPU | ||
1554 | mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; | ||
1555 | mp->m_icsb_notifier.priority = 0; | ||
1556 | register_hotcpu_notifier(&mp->m_icsb_notifier); | ||
1557 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
1558 | |||
1559 | return 0; | ||
1560 | } | ||
1561 | |||
1562 | void | ||
1563 | xfs_icsb_reinit_counters( | ||
1564 | xfs_mount_t *mp) | ||
1565 | { | ||
1566 | xfs_icsb_lock(mp); | ||
1567 | /* | ||
1568 | * start with all counters disabled so that the | ||
1569 | * initial balance kicks us off correctly | ||
1570 | */ | ||
1571 | mp->m_icsb_counters = -1; | ||
1572 | xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); | ||
1573 | xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); | ||
1574 | xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); | ||
1575 | xfs_icsb_unlock(mp); | ||
1576 | } | ||
1577 | |||
1578 | void | ||
1579 | xfs_icsb_destroy_counters( | ||
1580 | xfs_mount_t *mp) | ||
1581 | { | ||
1582 | if (mp->m_sb_cnts) { | ||
1583 | unregister_hotcpu_notifier(&mp->m_icsb_notifier); | ||
1584 | free_percpu(mp->m_sb_cnts); | ||
1585 | } | ||
1586 | mutex_destroy(&mp->m_icsb_mutex); | ||
1587 | } | ||
1588 | |||
1589 | STATIC void | ||
1590 | xfs_icsb_lock_cntr( | ||
1591 | xfs_icsb_cnts_t *icsbp) | ||
1592 | { | ||
1593 | while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) { | ||
1594 | ndelay(1000); | ||
1595 | } | ||
1596 | } | ||
1597 | |||
1598 | STATIC void | ||
1599 | xfs_icsb_unlock_cntr( | ||
1600 | xfs_icsb_cnts_t *icsbp) | ||
1601 | { | ||
1602 | clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags); | ||
1603 | } | ||
1604 | |||
1605 | |||
1606 | STATIC void | ||
1607 | xfs_icsb_lock_all_counters( | ||
1608 | xfs_mount_t *mp) | ||
1609 | { | ||
1610 | xfs_icsb_cnts_t *cntp; | ||
1611 | int i; | ||
1612 | |||
1613 | for_each_online_cpu(i) { | ||
1614 | cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); | ||
1615 | xfs_icsb_lock_cntr(cntp); | ||
1616 | } | ||
1617 | } | ||
1618 | |||
1619 | STATIC void | ||
1620 | xfs_icsb_unlock_all_counters( | ||
1621 | xfs_mount_t *mp) | ||
1622 | { | ||
1623 | xfs_icsb_cnts_t *cntp; | ||
1624 | int i; | ||
1625 | |||
1626 | for_each_online_cpu(i) { | ||
1627 | cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); | ||
1628 | xfs_icsb_unlock_cntr(cntp); | ||
1629 | } | ||
1630 | } | ||
1631 | |||
1632 | STATIC void | ||
1633 | xfs_icsb_count( | ||
1634 | xfs_mount_t *mp, | ||
1635 | xfs_icsb_cnts_t *cnt, | ||
1636 | int flags) | ||
1637 | { | ||
1638 | xfs_icsb_cnts_t *cntp; | ||
1639 | int i; | ||
1640 | |||
1641 | memset(cnt, 0, sizeof(xfs_icsb_cnts_t)); | ||
1642 | |||
1643 | if (!(flags & XFS_ICSB_LAZY_COUNT)) | ||
1644 | xfs_icsb_lock_all_counters(mp); | ||
1645 | |||
1646 | for_each_online_cpu(i) { | ||
1647 | cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); | ||
1648 | cnt->icsb_icount += cntp->icsb_icount; | ||
1649 | cnt->icsb_ifree += cntp->icsb_ifree; | ||
1650 | cnt->icsb_fdblocks += cntp->icsb_fdblocks; | ||
1651 | } | ||
1652 | |||
1653 | if (!(flags & XFS_ICSB_LAZY_COUNT)) | ||
1654 | xfs_icsb_unlock_all_counters(mp); | ||
1655 | } | ||
1656 | |||
1657 | STATIC int | ||
1658 | xfs_icsb_counter_disabled( | ||
1659 | xfs_mount_t *mp, | ||
1660 | xfs_sb_field_t field) | ||
1661 | { | ||
1662 | ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); | ||
1663 | return test_bit(field, &mp->m_icsb_counters); | ||
1664 | } | ||
1665 | |||
1666 | STATIC void | ||
1667 | xfs_icsb_disable_counter( | ||
1668 | xfs_mount_t *mp, | ||
1669 | xfs_sb_field_t field) | ||
1670 | { | ||
1671 | xfs_icsb_cnts_t cnt; | ||
1672 | |||
1673 | ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); | ||
1674 | |||
1675 | /* | ||
1676 | * If we are already disabled, then there is nothing to do | ||
1677 | * here. We check before locking all the counters to avoid | ||
1678 | * the expensive lock operation when being called in the | ||
1679 | * slow path and the counter is already disabled. This is | ||
1680 | * safe because the only time we set or clear this state is under | ||
1681 | * the m_icsb_mutex. | ||
1682 | */ | ||
1683 | if (xfs_icsb_counter_disabled(mp, field)) | ||
1684 | return; | ||
1685 | |||
1686 | xfs_icsb_lock_all_counters(mp); | ||
1687 | if (!test_and_set_bit(field, &mp->m_icsb_counters)) { | ||
1688 | /* drain back to superblock */ | ||
1689 | |||
1690 | xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT); | ||
1691 | switch(field) { | ||
1692 | case XFS_SBS_ICOUNT: | ||
1693 | mp->m_sb.sb_icount = cnt.icsb_icount; | ||
1694 | break; | ||
1695 | case XFS_SBS_IFREE: | ||
1696 | mp->m_sb.sb_ifree = cnt.icsb_ifree; | ||
1697 | break; | ||
1698 | case XFS_SBS_FDBLOCKS: | ||
1699 | mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; | ||
1700 | break; | ||
1701 | default: | ||
1702 | BUG(); | ||
1703 | } | ||
1704 | } | ||
1705 | |||
1706 | xfs_icsb_unlock_all_counters(mp); | ||
1707 | } | ||
1708 | |||
1709 | STATIC void | ||
1710 | xfs_icsb_enable_counter( | ||
1711 | xfs_mount_t *mp, | ||
1712 | xfs_sb_field_t field, | ||
1713 | uint64_t count, | ||
1714 | uint64_t resid) | ||
1715 | { | ||
1716 | xfs_icsb_cnts_t *cntp; | ||
1717 | int i; | ||
1718 | |||
1719 | ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); | ||
1720 | |||
1721 | xfs_icsb_lock_all_counters(mp); | ||
1722 | for_each_online_cpu(i) { | ||
1723 | cntp = per_cpu_ptr(mp->m_sb_cnts, i); | ||
1724 | switch (field) { | ||
1725 | case XFS_SBS_ICOUNT: | ||
1726 | cntp->icsb_icount = count + resid; | ||
1727 | break; | ||
1728 | case XFS_SBS_IFREE: | ||
1729 | cntp->icsb_ifree = count + resid; | ||
1730 | break; | ||
1731 | case XFS_SBS_FDBLOCKS: | ||
1732 | cntp->icsb_fdblocks = count + resid; | ||
1733 | break; | ||
1734 | default: | ||
1735 | BUG(); | ||
1736 | break; | ||
1737 | } | ||
1738 | resid = 0; | ||
1739 | } | ||
1740 | clear_bit(field, &mp->m_icsb_counters); | ||
1741 | xfs_icsb_unlock_all_counters(mp); | ||
1742 | } | ||
1743 | |||
1744 | void | ||
1745 | xfs_icsb_sync_counters_locked( | ||
1746 | xfs_mount_t *mp, | ||
1747 | int flags) | ||
1748 | { | ||
1749 | xfs_icsb_cnts_t cnt; | ||
1750 | |||
1751 | xfs_icsb_count(mp, &cnt, flags); | ||
1752 | |||
1753 | if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) | ||
1754 | mp->m_sb.sb_icount = cnt.icsb_icount; | ||
1755 | if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) | ||
1756 | mp->m_sb.sb_ifree = cnt.icsb_ifree; | ||
1757 | if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) | ||
1758 | mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; | ||
1759 | } | ||
1760 | |||
1761 | /* | ||
1762 | * Accurate update of per-cpu counters to incore superblock | ||
1763 | */ | ||
1764 | void | ||
1765 | xfs_icsb_sync_counters( | ||
1766 | xfs_mount_t *mp, | ||
1767 | int flags) | ||
1768 | { | ||
1769 | spin_lock(&mp->m_sb_lock); | ||
1770 | xfs_icsb_sync_counters_locked(mp, flags); | ||
1771 | spin_unlock(&mp->m_sb_lock); | ||
1772 | } | ||
1773 | |||
1774 | /* | ||
1775 | * Balance and enable/disable counters as necessary. | ||
1776 | * | ||
1777 | * Thresholds for re-enabling counters are somewhat magic. inode counts are | ||
1778 | * chosen to be the same number as single on disk allocation chunk per CPU, and | ||
1779 | * free blocks is something far enough zero that we aren't going thrash when we | ||
1780 | * get near ENOSPC. We also need to supply a minimum we require per cpu to | ||
1781 | * prevent looping endlessly when xfs_alloc_space asks for more than will | ||
1782 | * be distributed to a single CPU but each CPU has enough blocks to be | ||
1783 | * reenabled. | ||
1784 | * | ||
1785 | * Note that we can be called when counters are already disabled. | ||
1786 | * xfs_icsb_disable_counter() optimises the counter locking in this case to | ||
1787 | * prevent locking every per-cpu counter needlessly. | ||
1788 | */ | ||
1789 | |||
1790 | #define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64 | ||
1791 | #define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ | ||
1792 | (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp)) | ||
1793 | STATIC void | ||
1794 | xfs_icsb_balance_counter_locked( | ||
1795 | xfs_mount_t *mp, | ||
1796 | xfs_sb_field_t field, | ||
1797 | int min_per_cpu) | ||
1798 | { | ||
1799 | uint64_t count, resid; | ||
1800 | int weight = num_online_cpus(); | ||
1801 | uint64_t min = (uint64_t)min_per_cpu; | ||
1802 | |||
1803 | /* disable counter and sync counter */ | ||
1804 | xfs_icsb_disable_counter(mp, field); | ||
1805 | |||
1806 | /* update counters - first CPU gets residual*/ | ||
1807 | switch (field) { | ||
1808 | case XFS_SBS_ICOUNT: | ||
1809 | count = mp->m_sb.sb_icount; | ||
1810 | resid = do_div(count, weight); | ||
1811 | if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) | ||
1812 | return; | ||
1813 | break; | ||
1814 | case XFS_SBS_IFREE: | ||
1815 | count = mp->m_sb.sb_ifree; | ||
1816 | resid = do_div(count, weight); | ||
1817 | if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) | ||
1818 | return; | ||
1819 | break; | ||
1820 | case XFS_SBS_FDBLOCKS: | ||
1821 | count = mp->m_sb.sb_fdblocks; | ||
1822 | resid = do_div(count, weight); | ||
1823 | if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp))) | ||
1824 | return; | ||
1825 | break; | ||
1826 | default: | ||
1827 | BUG(); | ||
1828 | count = resid = 0; /* quiet, gcc */ | ||
1829 | break; | ||
1830 | } | ||
1831 | |||
1832 | xfs_icsb_enable_counter(mp, field, count, resid); | ||
1833 | } | ||
1834 | |||
1835 | STATIC void | ||
1836 | xfs_icsb_balance_counter( | ||
1837 | xfs_mount_t *mp, | ||
1838 | xfs_sb_field_t fields, | ||
1839 | int min_per_cpu) | ||
1840 | { | ||
1841 | spin_lock(&mp->m_sb_lock); | ||
1842 | xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu); | ||
1843 | spin_unlock(&mp->m_sb_lock); | ||
1844 | } | ||
1845 | |||
1846 | int | ||
1847 | xfs_icsb_modify_counters( | ||
1848 | xfs_mount_t *mp, | ||
1849 | xfs_sb_field_t field, | ||
1850 | int64_t delta, | ||
1851 | int rsvd) | ||
1852 | { | ||
1853 | xfs_icsb_cnts_t *icsbp; | ||
1854 | long long lcounter; /* long counter for 64 bit fields */ | ||
1855 | int ret = 0; | ||
1856 | |||
1857 | might_sleep(); | ||
1858 | again: | ||
1859 | preempt_disable(); | ||
1860 | icsbp = this_cpu_ptr(mp->m_sb_cnts); | ||
1861 | |||
1862 | /* | ||
1863 | * if the counter is disabled, go to slow path | ||
1864 | */ | ||
1865 | if (unlikely(xfs_icsb_counter_disabled(mp, field))) | ||
1866 | goto slow_path; | ||
1867 | xfs_icsb_lock_cntr(icsbp); | ||
1868 | if (unlikely(xfs_icsb_counter_disabled(mp, field))) { | ||
1869 | xfs_icsb_unlock_cntr(icsbp); | ||
1870 | goto slow_path; | ||
1871 | } | ||
1872 | |||
1873 | switch (field) { | ||
1874 | case XFS_SBS_ICOUNT: | ||
1875 | lcounter = icsbp->icsb_icount; | ||
1876 | lcounter += delta; | ||
1877 | if (unlikely(lcounter < 0)) | ||
1878 | goto balance_counter; | ||
1879 | icsbp->icsb_icount = lcounter; | ||
1880 | break; | ||
1881 | |||
1882 | case XFS_SBS_IFREE: | ||
1883 | lcounter = icsbp->icsb_ifree; | ||
1884 | lcounter += delta; | ||
1885 | if (unlikely(lcounter < 0)) | ||
1886 | goto balance_counter; | ||
1887 | icsbp->icsb_ifree = lcounter; | ||
1888 | break; | ||
1889 | |||
1890 | case XFS_SBS_FDBLOCKS: | ||
1891 | BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); | ||
1892 | |||
1893 | lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); | ||
1894 | lcounter += delta; | ||
1895 | if (unlikely(lcounter < 0)) | ||
1896 | goto balance_counter; | ||
1897 | icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); | ||
1898 | break; | ||
1899 | default: | ||
1900 | BUG(); | ||
1901 | break; | ||
1902 | } | ||
1903 | xfs_icsb_unlock_cntr(icsbp); | ||
1904 | preempt_enable(); | ||
1905 | return 0; | ||
1906 | |||
1907 | slow_path: | ||
1908 | preempt_enable(); | ||
1909 | |||
1910 | /* | ||
1911 | * serialise with a mutex so we don't burn lots of cpu on | ||
1912 | * the superblock lock. We still need to hold the superblock | ||
1913 | * lock, however, when we modify the global structures. | ||
1914 | */ | ||
1915 | xfs_icsb_lock(mp); | ||
1916 | |||
1917 | /* | ||
1918 | * Now running atomically. | ||
1919 | * | ||
1920 | * If the counter is enabled, someone has beaten us to rebalancing. | ||
1921 | * Drop the lock and try again in the fast path.... | ||
1922 | */ | ||
1923 | if (!(xfs_icsb_counter_disabled(mp, field))) { | ||
1924 | xfs_icsb_unlock(mp); | ||
1925 | goto again; | ||
1926 | } | ||
1927 | |||
1928 | /* | ||
1929 | * The counter is currently disabled. Because we are | ||
1930 | * running atomically here, we know a rebalance cannot | ||
1931 | * be in progress. Hence we can go straight to operating | ||
1932 | * on the global superblock. We do not call xfs_mod_incore_sb() | ||
1933 | * here even though we need to get the m_sb_lock. Doing so | ||
1934 | * will cause us to re-enter this function and deadlock. | ||
1935 | * Hence we get the m_sb_lock ourselves and then call | ||
1936 | * xfs_mod_incore_sb_unlocked() as the unlocked path operates | ||
1937 | * directly on the global counters. | ||
1938 | */ | ||
1939 | spin_lock(&mp->m_sb_lock); | ||
1940 | ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); | ||
1941 | spin_unlock(&mp->m_sb_lock); | ||
1942 | |||
1943 | /* | ||
1944 | * Now that we've modified the global superblock, we | ||
1945 | * may be able to re-enable the distributed counters | ||
1946 | * (e.g. lots of space just got freed). After that | ||
1947 | * we are done. | ||
1948 | */ | ||
1949 | if (ret != -ENOSPC) | ||
1950 | xfs_icsb_balance_counter(mp, field, 0); | ||
1951 | xfs_icsb_unlock(mp); | ||
1952 | return ret; | ||
1953 | |||
1954 | balance_counter: | ||
1955 | xfs_icsb_unlock_cntr(icsbp); | ||
1956 | preempt_enable(); | ||
1957 | |||
1958 | /* | ||
1959 | * We may have multiple threads here if multiple per-cpu | ||
1960 | * counters run dry at the same time. This will mean we can | ||
1961 | * do more balances than strictly necessary but it is not | ||
1962 | * the common slowpath case. | ||
1963 | */ | ||
1964 | xfs_icsb_lock(mp); | ||
1965 | |||
1966 | /* | ||
1967 | * running atomically. | ||
1968 | * | ||
1969 | * This will leave the counter in the correct state for future | ||
1970 | * accesses. After the rebalance, we simply try again and our retry | ||
1971 | * will either succeed through the fast path or slow path without | ||
1972 | * another balance operation being required. | ||
1973 | */ | ||
1974 | xfs_icsb_balance_counter(mp, field, delta); | ||
1975 | xfs_icsb_unlock(mp); | ||
1976 | goto again; | ||
1977 | } | ||
1978 | |||
1979 | #endif | ||
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 0d8abd6364d9..8c995a2ccb6f 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -18,8 +18,6 @@ | |||
18 | #ifndef __XFS_MOUNT_H__ | 18 | #ifndef __XFS_MOUNT_H__ |
19 | #define __XFS_MOUNT_H__ | 19 | #define __XFS_MOUNT_H__ |
20 | 20 | ||
21 | #ifdef __KERNEL__ | ||
22 | |||
23 | struct xlog; | 21 | struct xlog; |
24 | struct xfs_inode; | 22 | struct xfs_inode; |
25 | struct xfs_mru_cache; | 23 | struct xfs_mru_cache; |
@@ -29,44 +27,6 @@ struct xfs_quotainfo; | |||
29 | struct xfs_dir_ops; | 27 | struct xfs_dir_ops; |
30 | struct xfs_da_geometry; | 28 | struct xfs_da_geometry; |
31 | 29 | ||
32 | #ifdef HAVE_PERCPU_SB | ||
33 | |||
34 | /* | ||
35 | * Valid per-cpu incore superblock counters. Note that if you add new counters, | ||
36 | * you may need to define new counter disabled bit field descriptors as there | ||
37 | * are more possible fields in the superblock that can fit in a bitfield on a | ||
38 | * 32 bit platform. The XFS_SBS_* values for the current current counters just | ||
39 | * fit. | ||
40 | */ | ||
41 | typedef struct xfs_icsb_cnts { | ||
42 | uint64_t icsb_fdblocks; | ||
43 | uint64_t icsb_ifree; | ||
44 | uint64_t icsb_icount; | ||
45 | unsigned long icsb_flags; | ||
46 | } xfs_icsb_cnts_t; | ||
47 | |||
48 | #define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */ | ||
49 | |||
50 | #define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */ | ||
51 | |||
52 | extern int xfs_icsb_init_counters(struct xfs_mount *); | ||
53 | extern void xfs_icsb_reinit_counters(struct xfs_mount *); | ||
54 | extern void xfs_icsb_destroy_counters(struct xfs_mount *); | ||
55 | extern void xfs_icsb_sync_counters(struct xfs_mount *, int); | ||
56 | extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); | ||
57 | extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t, | ||
58 | int64_t, int); | ||
59 | |||
60 | #else | ||
61 | #define xfs_icsb_init_counters(mp) (0) | ||
62 | #define xfs_icsb_destroy_counters(mp) do { } while (0) | ||
63 | #define xfs_icsb_reinit_counters(mp) do { } while (0) | ||
64 | #define xfs_icsb_sync_counters(mp, flags) do { } while (0) | ||
65 | #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) | ||
66 | #define xfs_icsb_modify_counters(mp, field, delta, rsvd) \ | ||
67 | xfs_mod_incore_sb(mp, field, delta, rsvd) | ||
68 | #endif | ||
69 | |||
70 | /* dynamic preallocation free space thresholds, 5% down to 1% */ | 30 | /* dynamic preallocation free space thresholds, 5% down to 1% */ |
71 | enum { | 31 | enum { |
72 | XFS_LOWSP_1_PCNT = 0, | 32 | XFS_LOWSP_1_PCNT = 0, |
@@ -81,8 +41,13 @@ typedef struct xfs_mount { | |||
81 | struct super_block *m_super; | 41 | struct super_block *m_super; |
82 | xfs_tid_t m_tid; /* next unused tid for fs */ | 42 | xfs_tid_t m_tid; /* next unused tid for fs */ |
83 | struct xfs_ail *m_ail; /* fs active log item list */ | 43 | struct xfs_ail *m_ail; /* fs active log item list */ |
84 | xfs_sb_t m_sb; /* copy of fs superblock */ | 44 | |
45 | struct xfs_sb m_sb; /* copy of fs superblock */ | ||
85 | spinlock_t m_sb_lock; /* sb counter lock */ | 46 | spinlock_t m_sb_lock; /* sb counter lock */ |
47 | struct percpu_counter m_icount; /* allocated inodes counter */ | ||
48 | struct percpu_counter m_ifree; /* free inodes counter */ | ||
49 | struct percpu_counter m_fdblocks; /* free block counter */ | ||
50 | |||
86 | struct xfs_buf *m_sb_bp; /* buffer for superblock */ | 51 | struct xfs_buf *m_sb_bp; /* buffer for superblock */ |
87 | char *m_fsname; /* filesystem name */ | 52 | char *m_fsname; /* filesystem name */ |
88 | int m_fsname_len; /* strlen of fs name */ | 53 | int m_fsname_len; /* strlen of fs name */ |
@@ -152,12 +117,6 @@ typedef struct xfs_mount { | |||
152 | const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ | 117 | const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ |
153 | uint m_chsize; /* size of next field */ | 118 | uint m_chsize; /* size of next field */ |
154 | atomic_t m_active_trans; /* number trans frozen */ | 119 | atomic_t m_active_trans; /* number trans frozen */ |
155 | #ifdef HAVE_PERCPU_SB | ||
156 | xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ | ||
157 | unsigned long m_icsb_counters; /* disabled per-cpu counters */ | ||
158 | struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ | ||
159 | struct mutex m_icsb_mutex; /* balancer sync lock */ | ||
160 | #endif | ||
161 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ | 120 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ |
162 | struct delayed_work m_reclaim_work; /* background inode reclaim */ | 121 | struct delayed_work m_reclaim_work; /* background inode reclaim */ |
163 | struct delayed_work m_eofblocks_work; /* background eof blocks | 122 | struct delayed_work m_eofblocks_work; /* background eof blocks |
@@ -301,35 +260,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) | |||
301 | } | 260 | } |
302 | 261 | ||
303 | /* | 262 | /* |
304 | * Per-cpu superblock locking functions | ||
305 | */ | ||
306 | #ifdef HAVE_PERCPU_SB | ||
307 | static inline void | ||
308 | xfs_icsb_lock(xfs_mount_t *mp) | ||
309 | { | ||
310 | mutex_lock(&mp->m_icsb_mutex); | ||
311 | } | ||
312 | |||
313 | static inline void | ||
314 | xfs_icsb_unlock(xfs_mount_t *mp) | ||
315 | { | ||
316 | mutex_unlock(&mp->m_icsb_mutex); | ||
317 | } | ||
318 | #else | ||
319 | #define xfs_icsb_lock(mp) | ||
320 | #define xfs_icsb_unlock(mp) | ||
321 | #endif | ||
322 | |||
323 | /* | ||
324 | * This structure is for use by the xfs_mod_incore_sb_batch() routine. | ||
325 | * xfs_growfs can specify a few fields which are more than int limit | ||
326 | */ | ||
327 | typedef struct xfs_mod_sb { | ||
328 | xfs_sb_field_t msb_field; /* Field to modify, see below */ | ||
329 | int64_t msb_delta; /* Change to make to specified field */ | ||
330 | } xfs_mod_sb_t; | ||
331 | |||
332 | /* | ||
333 | * Per-ag incore structure, copies of information in agf and agi, to improve the | 263 | * Per-ag incore structure, copies of information in agf and agi, to improve the |
334 | * performance of allocation group selection. | 264 | * performance of allocation group selection. |
335 | */ | 265 | */ |
@@ -383,11 +313,14 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); | |||
383 | extern int xfs_mountfs(xfs_mount_t *mp); | 313 | extern int xfs_mountfs(xfs_mount_t *mp); |
384 | extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount, | 314 | extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount, |
385 | xfs_agnumber_t *maxagi); | 315 | xfs_agnumber_t *maxagi); |
386 | |||
387 | extern void xfs_unmountfs(xfs_mount_t *); | 316 | extern void xfs_unmountfs(xfs_mount_t *); |
388 | extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); | 317 | |
389 | extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, | 318 | extern int xfs_mod_icount(struct xfs_mount *mp, int64_t delta); |
390 | uint, int); | 319 | extern int xfs_mod_ifree(struct xfs_mount *mp, int64_t delta); |
320 | extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, | ||
321 | bool reserved); | ||
322 | extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); | ||
323 | |||
391 | extern int xfs_mount_log_sb(xfs_mount_t *); | 324 | extern int xfs_mount_log_sb(xfs_mount_t *); |
392 | extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); | 325 | extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); |
393 | extern int xfs_readsb(xfs_mount_t *, int); | 326 | extern int xfs_readsb(xfs_mount_t *, int); |
@@ -399,6 +332,4 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *); | |||
399 | 332 | ||
400 | extern void xfs_set_low_space_thresholds(struct xfs_mount *); | 333 | extern void xfs_set_low_space_thresholds(struct xfs_mount *); |
401 | 334 | ||
402 | #endif /* __KERNEL__ */ | ||
403 | |||
404 | #endif /* __XFS_MOUNT_H__ */ | 335 | #endif /* __XFS_MOUNT_H__ */ |
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 30ecca3037e3..f8a674d7f092 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c | |||
@@ -437,7 +437,7 @@ xfs_mru_cache_insert( | |||
437 | if (!mru || !mru->lists) | 437 | if (!mru || !mru->lists) |
438 | return -EINVAL; | 438 | return -EINVAL; |
439 | 439 | ||
440 | if (radix_tree_preload(GFP_KERNEL)) | 440 | if (radix_tree_preload(GFP_NOFS)) |
441 | return -ENOMEM; | 441 | return -ENOMEM; |
442 | 442 | ||
443 | INIT_LIST_HEAD(&elem->list_node); | 443 | INIT_LIST_HEAD(&elem->list_node); |
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 365dd57ea760..981a657eca39 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c | |||
@@ -31,7 +31,8 @@ | |||
31 | int | 31 | int |
32 | xfs_break_layouts( | 32 | xfs_break_layouts( |
33 | struct inode *inode, | 33 | struct inode *inode, |
34 | uint *iolock) | 34 | uint *iolock, |
35 | bool with_imutex) | ||
35 | { | 36 | { |
36 | struct xfs_inode *ip = XFS_I(inode); | 37 | struct xfs_inode *ip = XFS_I(inode); |
37 | int error; | 38 | int error; |
@@ -40,8 +41,12 @@ xfs_break_layouts( | |||
40 | 41 | ||
41 | while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { | 42 | while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { |
42 | xfs_iunlock(ip, *iolock); | 43 | xfs_iunlock(ip, *iolock); |
44 | if (with_imutex && (*iolock & XFS_IOLOCK_EXCL)) | ||
45 | mutex_unlock(&inode->i_mutex); | ||
43 | error = break_layout(inode, true); | 46 | error = break_layout(inode, true); |
44 | *iolock = XFS_IOLOCK_EXCL; | 47 | *iolock = XFS_IOLOCK_EXCL; |
48 | if (with_imutex) | ||
49 | mutex_lock(&inode->i_mutex); | ||
45 | xfs_ilock(ip, *iolock); | 50 | xfs_ilock(ip, *iolock); |
46 | } | 51 | } |
47 | 52 | ||
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h index b7fbfce660f6..8147ac108820 100644 --- a/fs/xfs/xfs_pnfs.h +++ b/fs/xfs/xfs_pnfs.h | |||
@@ -8,9 +8,10 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length, | |||
8 | int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, | 8 | int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, |
9 | struct iattr *iattr); | 9 | struct iattr *iattr); |
10 | 10 | ||
11 | int xfs_break_layouts(struct inode *inode, uint *iolock); | 11 | int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex); |
12 | #else | 12 | #else |
13 | static inline int xfs_break_layouts(struct inode *inode, uint *iolock) | 13 | static inline int |
14 | xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex) | ||
14 | { | 15 | { |
15 | return 0; | 16 | return 0; |
16 | } | 17 | } |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index fbbb9e62e274..5538468c7f63 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -719,6 +719,7 @@ xfs_qm_qino_alloc( | |||
719 | xfs_trans_t *tp; | 719 | xfs_trans_t *tp; |
720 | int error; | 720 | int error; |
721 | int committed; | 721 | int committed; |
722 | bool need_alloc = true; | ||
722 | 723 | ||
723 | *ip = NULL; | 724 | *ip = NULL; |
724 | /* | 725 | /* |
@@ -747,6 +748,7 @@ xfs_qm_qino_alloc( | |||
747 | return error; | 748 | return error; |
748 | mp->m_sb.sb_gquotino = NULLFSINO; | 749 | mp->m_sb.sb_gquotino = NULLFSINO; |
749 | mp->m_sb.sb_pquotino = NULLFSINO; | 750 | mp->m_sb.sb_pquotino = NULLFSINO; |
751 | need_alloc = false; | ||
750 | } | 752 | } |
751 | } | 753 | } |
752 | 754 | ||
@@ -758,7 +760,7 @@ xfs_qm_qino_alloc( | |||
758 | return error; | 760 | return error; |
759 | } | 761 | } |
760 | 762 | ||
761 | if (!*ip) { | 763 | if (need_alloc) { |
762 | error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, | 764 | error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, |
763 | &committed); | 765 | &committed); |
764 | if (error) { | 766 | if (error) { |
@@ -794,11 +796,14 @@ xfs_qm_qino_alloc( | |||
794 | spin_unlock(&mp->m_sb_lock); | 796 | spin_unlock(&mp->m_sb_lock); |
795 | xfs_log_sb(tp); | 797 | xfs_log_sb(tp); |
796 | 798 | ||
797 | if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { | 799 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
800 | if (error) { | ||
801 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); | ||
798 | xfs_alert(mp, "%s failed (error %d)!", __func__, error); | 802 | xfs_alert(mp, "%s failed (error %d)!", __func__, error); |
799 | return error; | ||
800 | } | 803 | } |
801 | return 0; | 804 | if (need_alloc) |
805 | xfs_finish_inode_setup(*ip); | ||
806 | return error; | ||
802 | } | 807 | } |
803 | 808 | ||
804 | 809 | ||
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 8fcc4ccc5c79..5f357ca97e76 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -109,8 +109,6 @@ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ | |||
109 | #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ | 109 | #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ |
110 | #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ | 110 | #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ |
111 | #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ | 111 | #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ |
112 | #define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */ | ||
113 | #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */ | ||
114 | #define MNTOPT_DISCARD "discard" /* Discard unused blocks */ | 112 | #define MNTOPT_DISCARD "discard" /* Discard unused blocks */ |
115 | #define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ | 113 | #define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ |
116 | 114 | ||
@@ -361,28 +359,10 @@ xfs_parseargs( | |||
361 | } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { | 359 | } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { |
362 | mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); | 360 | mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); |
363 | mp->m_qflags &= ~XFS_GQUOTA_ENFD; | 361 | mp->m_qflags &= ~XFS_GQUOTA_ENFD; |
364 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { | ||
365 | xfs_warn(mp, | ||
366 | "delaylog is the default now, option is deprecated."); | ||
367 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { | ||
368 | xfs_warn(mp, | ||
369 | "nodelaylog support has been removed, option is deprecated."); | ||
370 | } else if (!strcmp(this_char, MNTOPT_DISCARD)) { | 362 | } else if (!strcmp(this_char, MNTOPT_DISCARD)) { |
371 | mp->m_flags |= XFS_MOUNT_DISCARD; | 363 | mp->m_flags |= XFS_MOUNT_DISCARD; |
372 | } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { | 364 | } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { |
373 | mp->m_flags &= ~XFS_MOUNT_DISCARD; | 365 | mp->m_flags &= ~XFS_MOUNT_DISCARD; |
374 | } else if (!strcmp(this_char, "ihashsize")) { | ||
375 | xfs_warn(mp, | ||
376 | "ihashsize no longer used, option is deprecated."); | ||
377 | } else if (!strcmp(this_char, "osyncisdsync")) { | ||
378 | xfs_warn(mp, | ||
379 | "osyncisdsync has no effect, option is deprecated."); | ||
380 | } else if (!strcmp(this_char, "osyncisosync")) { | ||
381 | xfs_warn(mp, | ||
382 | "osyncisosync has no effect, option is deprecated."); | ||
383 | } else if (!strcmp(this_char, "irixsgid")) { | ||
384 | xfs_warn(mp, | ||
385 | "irixsgid is now a sysctl(2) variable, option is deprecated."); | ||
386 | } else { | 366 | } else { |
387 | xfs_warn(mp, "unknown mount option [%s].", this_char); | 367 | xfs_warn(mp, "unknown mount option [%s].", this_char); |
388 | return -EINVAL; | 368 | return -EINVAL; |
@@ -986,6 +966,8 @@ xfs_fs_inode_init_once( | |||
986 | atomic_set(&ip->i_pincount, 0); | 966 | atomic_set(&ip->i_pincount, 0); |
987 | spin_lock_init(&ip->i_flags_lock); | 967 | spin_lock_init(&ip->i_flags_lock); |
988 | 968 | ||
969 | mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, | ||
970 | "xfsino", ip->i_ino); | ||
989 | mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, | 971 | mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, |
990 | "xfsino", ip->i_ino); | 972 | "xfsino", ip->i_ino); |
991 | } | 973 | } |
@@ -1033,23 +1015,6 @@ xfs_free_fsname( | |||
1033 | kfree(mp->m_logname); | 1015 | kfree(mp->m_logname); |
1034 | } | 1016 | } |
1035 | 1017 | ||
1036 | STATIC void | ||
1037 | xfs_fs_put_super( | ||
1038 | struct super_block *sb) | ||
1039 | { | ||
1040 | struct xfs_mount *mp = XFS_M(sb); | ||
1041 | |||
1042 | xfs_filestream_unmount(mp); | ||
1043 | xfs_unmountfs(mp); | ||
1044 | |||
1045 | xfs_freesb(mp); | ||
1046 | xfs_icsb_destroy_counters(mp); | ||
1047 | xfs_destroy_mount_workqueues(mp); | ||
1048 | xfs_close_devices(mp); | ||
1049 | xfs_free_fsname(mp); | ||
1050 | kfree(mp); | ||
1051 | } | ||
1052 | |||
1053 | STATIC int | 1018 | STATIC int |
1054 | xfs_fs_sync_fs( | 1019 | xfs_fs_sync_fs( |
1055 | struct super_block *sb, | 1020 | struct super_block *sb, |
@@ -1085,6 +1050,9 @@ xfs_fs_statfs( | |||
1085 | xfs_sb_t *sbp = &mp->m_sb; | 1050 | xfs_sb_t *sbp = &mp->m_sb; |
1086 | struct xfs_inode *ip = XFS_I(dentry->d_inode); | 1051 | struct xfs_inode *ip = XFS_I(dentry->d_inode); |
1087 | __uint64_t fakeinos, id; | 1052 | __uint64_t fakeinos, id; |
1053 | __uint64_t icount; | ||
1054 | __uint64_t ifree; | ||
1055 | __uint64_t fdblocks; | ||
1088 | xfs_extlen_t lsize; | 1056 | xfs_extlen_t lsize; |
1089 | __int64_t ffree; | 1057 | __int64_t ffree; |
1090 | 1058 | ||
@@ -1095,17 +1063,21 @@ xfs_fs_statfs( | |||
1095 | statp->f_fsid.val[0] = (u32)id; | 1063 | statp->f_fsid.val[0] = (u32)id; |
1096 | statp->f_fsid.val[1] = (u32)(id >> 32); | 1064 | statp->f_fsid.val[1] = (u32)(id >> 32); |
1097 | 1065 | ||
1098 | xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); | 1066 | icount = percpu_counter_sum(&mp->m_icount); |
1067 | ifree = percpu_counter_sum(&mp->m_ifree); | ||
1068 | fdblocks = percpu_counter_sum(&mp->m_fdblocks); | ||
1099 | 1069 | ||
1100 | spin_lock(&mp->m_sb_lock); | 1070 | spin_lock(&mp->m_sb_lock); |
1101 | statp->f_bsize = sbp->sb_blocksize; | 1071 | statp->f_bsize = sbp->sb_blocksize; |
1102 | lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; | 1072 | lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; |
1103 | statp->f_blocks = sbp->sb_dblocks - lsize; | 1073 | statp->f_blocks = sbp->sb_dblocks - lsize; |
1104 | statp->f_bfree = statp->f_bavail = | 1074 | spin_unlock(&mp->m_sb_lock); |
1105 | sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); | 1075 | |
1076 | statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp); | ||
1077 | statp->f_bavail = statp->f_bfree; | ||
1078 | |||
1106 | fakeinos = statp->f_bfree << sbp->sb_inopblog; | 1079 | fakeinos = statp->f_bfree << sbp->sb_inopblog; |
1107 | statp->f_files = | 1080 | statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); |
1108 | MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); | ||
1109 | if (mp->m_maxicount) | 1081 | if (mp->m_maxicount) |
1110 | statp->f_files = min_t(typeof(statp->f_files), | 1082 | statp->f_files = min_t(typeof(statp->f_files), |
1111 | statp->f_files, | 1083 | statp->f_files, |
@@ -1117,10 +1089,9 @@ xfs_fs_statfs( | |||
1117 | sbp->sb_icount); | 1089 | sbp->sb_icount); |
1118 | 1090 | ||
1119 | /* make sure statp->f_ffree does not underflow */ | 1091 | /* make sure statp->f_ffree does not underflow */ |
1120 | ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); | 1092 | ffree = statp->f_files - (icount - ifree); |
1121 | statp->f_ffree = max_t(__int64_t, ffree, 0); | 1093 | statp->f_ffree = max_t(__int64_t, ffree, 0); |
1122 | 1094 | ||
1123 | spin_unlock(&mp->m_sb_lock); | ||
1124 | 1095 | ||
1125 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && | 1096 | if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
1126 | ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == | 1097 | ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == |
@@ -1256,6 +1227,12 @@ xfs_fs_remount( | |||
1256 | 1227 | ||
1257 | /* ro -> rw */ | 1228 | /* ro -> rw */ |
1258 | if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { | 1229 | if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { |
1230 | if (mp->m_flags & XFS_MOUNT_NORECOVERY) { | ||
1231 | xfs_warn(mp, | ||
1232 | "ro->rw transition prohibited on norecovery mount"); | ||
1233 | return -EINVAL; | ||
1234 | } | ||
1235 | |||
1259 | mp->m_flags &= ~XFS_MOUNT_RDONLY; | 1236 | mp->m_flags &= ~XFS_MOUNT_RDONLY; |
1260 | 1237 | ||
1261 | /* | 1238 | /* |
@@ -1401,6 +1378,51 @@ xfs_finish_flags( | |||
1401 | return 0; | 1378 | return 0; |
1402 | } | 1379 | } |
1403 | 1380 | ||
1381 | static int | ||
1382 | xfs_init_percpu_counters( | ||
1383 | struct xfs_mount *mp) | ||
1384 | { | ||
1385 | int error; | ||
1386 | |||
1387 | error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); | ||
1388 | if (error) | ||
1389 | return -ENOMEM; | ||
1390 | |||
1391 | error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); | ||
1392 | if (error) | ||
1393 | goto free_icount; | ||
1394 | |||
1395 | error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL); | ||
1396 | if (error) | ||
1397 | goto free_ifree; | ||
1398 | |||
1399 | return 0; | ||
1400 | |||
1401 | free_ifree: | ||
1402 | percpu_counter_destroy(&mp->m_ifree); | ||
1403 | free_icount: | ||
1404 | percpu_counter_destroy(&mp->m_icount); | ||
1405 | return -ENOMEM; | ||
1406 | } | ||
1407 | |||
1408 | void | ||
1409 | xfs_reinit_percpu_counters( | ||
1410 | struct xfs_mount *mp) | ||
1411 | { | ||
1412 | percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); | ||
1413 | percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); | ||
1414 | percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks); | ||
1415 | } | ||
1416 | |||
1417 | static void | ||
1418 | xfs_destroy_percpu_counters( | ||
1419 | struct xfs_mount *mp) | ||
1420 | { | ||
1421 | percpu_counter_destroy(&mp->m_icount); | ||
1422 | percpu_counter_destroy(&mp->m_ifree); | ||
1423 | percpu_counter_destroy(&mp->m_fdblocks); | ||
1424 | } | ||
1425 | |||
1404 | STATIC int | 1426 | STATIC int |
1405 | xfs_fs_fill_super( | 1427 | xfs_fs_fill_super( |
1406 | struct super_block *sb, | 1428 | struct super_block *sb, |
@@ -1449,7 +1471,7 @@ xfs_fs_fill_super( | |||
1449 | if (error) | 1471 | if (error) |
1450 | goto out_close_devices; | 1472 | goto out_close_devices; |
1451 | 1473 | ||
1452 | error = xfs_icsb_init_counters(mp); | 1474 | error = xfs_init_percpu_counters(mp); |
1453 | if (error) | 1475 | if (error) |
1454 | goto out_destroy_workqueues; | 1476 | goto out_destroy_workqueues; |
1455 | 1477 | ||
@@ -1507,7 +1529,7 @@ xfs_fs_fill_super( | |||
1507 | out_free_sb: | 1529 | out_free_sb: |
1508 | xfs_freesb(mp); | 1530 | xfs_freesb(mp); |
1509 | out_destroy_counters: | 1531 | out_destroy_counters: |
1510 | xfs_icsb_destroy_counters(mp); | 1532 | xfs_destroy_percpu_counters(mp); |
1511 | out_destroy_workqueues: | 1533 | out_destroy_workqueues: |
1512 | xfs_destroy_mount_workqueues(mp); | 1534 | xfs_destroy_mount_workqueues(mp); |
1513 | out_close_devices: | 1535 | out_close_devices: |
@@ -1524,6 +1546,24 @@ out_destroy_workqueues: | |||
1524 | goto out_free_sb; | 1546 | goto out_free_sb; |
1525 | } | 1547 | } |
1526 | 1548 | ||
1549 | STATIC void | ||
1550 | xfs_fs_put_super( | ||
1551 | struct super_block *sb) | ||
1552 | { | ||
1553 | struct xfs_mount *mp = XFS_M(sb); | ||
1554 | |||
1555 | xfs_notice(mp, "Unmounting Filesystem"); | ||
1556 | xfs_filestream_unmount(mp); | ||
1557 | xfs_unmountfs(mp); | ||
1558 | |||
1559 | xfs_freesb(mp); | ||
1560 | xfs_destroy_percpu_counters(mp); | ||
1561 | xfs_destroy_mount_workqueues(mp); | ||
1562 | xfs_close_devices(mp); | ||
1563 | xfs_free_fsname(mp); | ||
1564 | kfree(mp); | ||
1565 | } | ||
1566 | |||
1527 | STATIC struct dentry * | 1567 | STATIC struct dentry * |
1528 | xfs_fs_mount( | 1568 | xfs_fs_mount( |
1529 | struct file_system_type *fs_type, | 1569 | struct file_system_type *fs_type, |
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 2b830c2f322e..499058fea303 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h | |||
@@ -72,6 +72,8 @@ extern const struct export_operations xfs_export_operations; | |||
72 | extern const struct xattr_handler *xfs_xattr_handlers[]; | 72 | extern const struct xattr_handler *xfs_xattr_handlers[]; |
73 | extern const struct quotactl_ops xfs_quotactl_operations; | 73 | extern const struct quotactl_ops xfs_quotactl_operations; |
74 | 74 | ||
75 | extern void xfs_reinit_percpu_counters(struct xfs_mount *mp); | ||
76 | |||
75 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) | 77 | #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) |
76 | 78 | ||
77 | #endif /* __XFS_SUPER_H__ */ | 79 | #endif /* __XFS_SUPER_H__ */ |
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 25791df6f638..3df411eadb86 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c | |||
@@ -177,7 +177,7 @@ xfs_symlink( | |||
177 | int pathlen; | 177 | int pathlen; |
178 | struct xfs_bmap_free free_list; | 178 | struct xfs_bmap_free free_list; |
179 | xfs_fsblock_t first_block; | 179 | xfs_fsblock_t first_block; |
180 | bool unlock_dp_on_error = false; | 180 | bool unlock_dp_on_error = false; |
181 | uint cancel_flags; | 181 | uint cancel_flags; |
182 | int committed; | 182 | int committed; |
183 | xfs_fileoff_t first_fsb; | 183 | xfs_fileoff_t first_fsb; |
@@ -221,7 +221,7 @@ xfs_symlink( | |||
221 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, | 221 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, |
222 | &udqp, &gdqp, &pdqp); | 222 | &udqp, &gdqp, &pdqp); |
223 | if (error) | 223 | if (error) |
224 | goto std_return; | 224 | return error; |
225 | 225 | ||
226 | tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); | 226 | tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); |
227 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | 227 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
@@ -241,7 +241,7 @@ xfs_symlink( | |||
241 | } | 241 | } |
242 | if (error) { | 242 | if (error) { |
243 | cancel_flags = 0; | 243 | cancel_flags = 0; |
244 | goto error_return; | 244 | goto out_trans_cancel; |
245 | } | 245 | } |
246 | 246 | ||
247 | xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); | 247 | xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); |
@@ -252,7 +252,7 @@ xfs_symlink( | |||
252 | */ | 252 | */ |
253 | if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { | 253 | if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { |
254 | error = -EPERM; | 254 | error = -EPERM; |
255 | goto error_return; | 255 | goto out_trans_cancel; |
256 | } | 256 | } |
257 | 257 | ||
258 | /* | 258 | /* |
@@ -261,7 +261,7 @@ xfs_symlink( | |||
261 | error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, | 261 | error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, |
262 | pdqp, resblks, 1, 0); | 262 | pdqp, resblks, 1, 0); |
263 | if (error) | 263 | if (error) |
264 | goto error_return; | 264 | goto out_trans_cancel; |
265 | 265 | ||
266 | /* | 266 | /* |
267 | * Check for ability to enter directory entry, if no space reserved. | 267 | * Check for ability to enter directory entry, if no space reserved. |
@@ -269,7 +269,7 @@ xfs_symlink( | |||
269 | if (!resblks) { | 269 | if (!resblks) { |
270 | error = xfs_dir_canenter(tp, dp, link_name); | 270 | error = xfs_dir_canenter(tp, dp, link_name); |
271 | if (error) | 271 | if (error) |
272 | goto error_return; | 272 | goto out_trans_cancel; |
273 | } | 273 | } |
274 | /* | 274 | /* |
275 | * Initialize the bmap freelist prior to calling either | 275 | * Initialize the bmap freelist prior to calling either |
@@ -282,15 +282,14 @@ xfs_symlink( | |||
282 | */ | 282 | */ |
283 | error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, | 283 | error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, |
284 | prid, resblks > 0, &ip, NULL); | 284 | prid, resblks > 0, &ip, NULL); |
285 | if (error) { | 285 | if (error) |
286 | if (error == -ENOSPC) | 286 | goto out_trans_cancel; |
287 | goto error_return; | ||
288 | goto error1; | ||
289 | } | ||
290 | 287 | ||
291 | /* | 288 | /* |
292 | * An error after we've joined dp to the transaction will result in the | 289 | * Now we join the directory inode to the transaction. We do not do it |
293 | * transaction cancel unlocking dp so don't do it explicitly in the | 290 | * earlier because xfs_dir_ialloc might commit the previous transaction |
291 | * (and release all the locks). An error from here on will result in | ||
292 | * the transaction cancel unlocking dp so don't do it explicitly in the | ||
294 | * error path. | 293 | * error path. |
295 | */ | 294 | */ |
296 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); | 295 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
@@ -330,7 +329,7 @@ xfs_symlink( | |||
330 | XFS_BMAPI_METADATA, &first_block, resblks, | 329 | XFS_BMAPI_METADATA, &first_block, resblks, |
331 | mval, &nmaps, &free_list); | 330 | mval, &nmaps, &free_list); |
332 | if (error) | 331 | if (error) |
333 | goto error2; | 332 | goto out_bmap_cancel; |
334 | 333 | ||
335 | if (resblks) | 334 | if (resblks) |
336 | resblks -= fs_blocks; | 335 | resblks -= fs_blocks; |
@@ -348,7 +347,7 @@ xfs_symlink( | |||
348 | BTOBB(byte_cnt), 0); | 347 | BTOBB(byte_cnt), 0); |
349 | if (!bp) { | 348 | if (!bp) { |
350 | error = -ENOMEM; | 349 | error = -ENOMEM; |
351 | goto error2; | 350 | goto out_bmap_cancel; |
352 | } | 351 | } |
353 | bp->b_ops = &xfs_symlink_buf_ops; | 352 | bp->b_ops = &xfs_symlink_buf_ops; |
354 | 353 | ||
@@ -378,7 +377,7 @@ xfs_symlink( | |||
378 | error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, | 377 | error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, |
379 | &first_block, &free_list, resblks); | 378 | &first_block, &free_list, resblks); |
380 | if (error) | 379 | if (error) |
381 | goto error2; | 380 | goto out_bmap_cancel; |
382 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 381 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
383 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); | 382 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); |
384 | 383 | ||
@@ -392,10 +391,13 @@ xfs_symlink( | |||
392 | } | 391 | } |
393 | 392 | ||
394 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 393 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
395 | if (error) { | 394 | if (error) |
396 | goto error2; | 395 | goto out_bmap_cancel; |
397 | } | 396 | |
398 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 397 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
398 | if (error) | ||
399 | goto out_release_inode; | ||
400 | |||
399 | xfs_qm_dqrele(udqp); | 401 | xfs_qm_dqrele(udqp); |
400 | xfs_qm_dqrele(gdqp); | 402 | xfs_qm_dqrele(gdqp); |
401 | xfs_qm_dqrele(pdqp); | 403 | xfs_qm_dqrele(pdqp); |
@@ -403,20 +405,28 @@ xfs_symlink( | |||
403 | *ipp = ip; | 405 | *ipp = ip; |
404 | return 0; | 406 | return 0; |
405 | 407 | ||
406 | error2: | 408 | out_bmap_cancel: |
407 | IRELE(ip); | ||
408 | error1: | ||
409 | xfs_bmap_cancel(&free_list); | 409 | xfs_bmap_cancel(&free_list); |
410 | cancel_flags |= XFS_TRANS_ABORT; | 410 | cancel_flags |= XFS_TRANS_ABORT; |
411 | error_return: | 411 | out_trans_cancel: |
412 | xfs_trans_cancel(tp, cancel_flags); | 412 | xfs_trans_cancel(tp, cancel_flags); |
413 | out_release_inode: | ||
414 | /* | ||
415 | * Wait until after the current transaction is aborted to finish the | ||
416 | * setup of the inode and release the inode. This prevents recursive | ||
417 | * transactions and deadlocks from xfs_inactive. | ||
418 | */ | ||
419 | if (ip) { | ||
420 | xfs_finish_inode_setup(ip); | ||
421 | IRELE(ip); | ||
422 | } | ||
423 | |||
413 | xfs_qm_dqrele(udqp); | 424 | xfs_qm_dqrele(udqp); |
414 | xfs_qm_dqrele(gdqp); | 425 | xfs_qm_dqrele(gdqp); |
415 | xfs_qm_dqrele(pdqp); | 426 | xfs_qm_dqrele(pdqp); |
416 | 427 | ||
417 | if (unlock_dp_on_error) | 428 | if (unlock_dp_on_error) |
418 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 429 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
419 | std_return: | ||
420 | return error; | 430 | return error; |
421 | } | 431 | } |
422 | 432 | ||
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 51372e34d988..615781bf4ee5 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -115,7 +115,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class, | |||
115 | __entry->refcount = refcount; | 115 | __entry->refcount = refcount; |
116 | __entry->caller_ip = caller_ip; | 116 | __entry->caller_ip = caller_ip; |
117 | ), | 117 | ), |
118 | TP_printk("dev %d:%d agno %u refcount %d caller %pf", | 118 | TP_printk("dev %d:%d agno %u refcount %d caller %ps", |
119 | MAJOR(__entry->dev), MINOR(__entry->dev), | 119 | MAJOR(__entry->dev), MINOR(__entry->dev), |
120 | __entry->agno, | 120 | __entry->agno, |
121 | __entry->refcount, | 121 | __entry->refcount, |
@@ -239,7 +239,7 @@ TRACE_EVENT(xfs_iext_insert, | |||
239 | __entry->caller_ip = caller_ip; | 239 | __entry->caller_ip = caller_ip; |
240 | ), | 240 | ), |
241 | TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " | 241 | TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " |
242 | "offset %lld block %lld count %lld flag %d caller %pf", | 242 | "offset %lld block %lld count %lld flag %d caller %ps", |
243 | MAJOR(__entry->dev), MINOR(__entry->dev), | 243 | MAJOR(__entry->dev), MINOR(__entry->dev), |
244 | __entry->ino, | 244 | __entry->ino, |
245 | __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), | 245 | __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), |
@@ -283,7 +283,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, | |||
283 | __entry->caller_ip = caller_ip; | 283 | __entry->caller_ip = caller_ip; |
284 | ), | 284 | ), |
285 | TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " | 285 | TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " |
286 | "offset %lld block %lld count %lld flag %d caller %pf", | 286 | "offset %lld block %lld count %lld flag %d caller %ps", |
287 | MAJOR(__entry->dev), MINOR(__entry->dev), | 287 | MAJOR(__entry->dev), MINOR(__entry->dev), |
288 | __entry->ino, | 288 | __entry->ino, |
289 | __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), | 289 | __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), |
@@ -329,7 +329,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, | |||
329 | __entry->caller_ip = caller_ip; | 329 | __entry->caller_ip = caller_ip; |
330 | ), | 330 | ), |
331 | TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " | 331 | TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " |
332 | "lock %d flags %s caller %pf", | 332 | "lock %d flags %s caller %ps", |
333 | MAJOR(__entry->dev), MINOR(__entry->dev), | 333 | MAJOR(__entry->dev), MINOR(__entry->dev), |
334 | (unsigned long long)__entry->bno, | 334 | (unsigned long long)__entry->bno, |
335 | __entry->nblks, | 335 | __entry->nblks, |
@@ -402,7 +402,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, | |||
402 | __entry->caller_ip = caller_ip; | 402 | __entry->caller_ip = caller_ip; |
403 | ), | 403 | ), |
404 | TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " | 404 | TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " |
405 | "lock %d flags %s caller %pf", | 405 | "lock %d flags %s caller %ps", |
406 | MAJOR(__entry->dev), MINOR(__entry->dev), | 406 | MAJOR(__entry->dev), MINOR(__entry->dev), |
407 | (unsigned long long)__entry->bno, | 407 | (unsigned long long)__entry->bno, |
408 | __entry->buffer_length, | 408 | __entry->buffer_length, |
@@ -447,7 +447,7 @@ TRACE_EVENT(xfs_buf_ioerror, | |||
447 | __entry->caller_ip = caller_ip; | 447 | __entry->caller_ip = caller_ip; |
448 | ), | 448 | ), |
449 | TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " | 449 | TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " |
450 | "lock %d error %d flags %s caller %pf", | 450 | "lock %d error %d flags %s caller %ps", |
451 | MAJOR(__entry->dev), MINOR(__entry->dev), | 451 | MAJOR(__entry->dev), MINOR(__entry->dev), |
452 | (unsigned long long)__entry->bno, | 452 | (unsigned long long)__entry->bno, |
453 | __entry->buffer_length, | 453 | __entry->buffer_length, |
@@ -613,7 +613,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class, | |||
613 | __entry->lock_flags = lock_flags; | 613 | __entry->lock_flags = lock_flags; |
614 | __entry->caller_ip = caller_ip; | 614 | __entry->caller_ip = caller_ip; |
615 | ), | 615 | ), |
616 | TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", | 616 | TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps", |
617 | MAJOR(__entry->dev), MINOR(__entry->dev), | 617 | MAJOR(__entry->dev), MINOR(__entry->dev), |
618 | __entry->ino, | 618 | __entry->ino, |
619 | __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), | 619 | __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), |
@@ -664,6 +664,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space); | |||
664 | DEFINE_INODE_EVENT(xfs_free_file_space); | 664 | DEFINE_INODE_EVENT(xfs_free_file_space); |
665 | DEFINE_INODE_EVENT(xfs_zero_file_space); | 665 | DEFINE_INODE_EVENT(xfs_zero_file_space); |
666 | DEFINE_INODE_EVENT(xfs_collapse_file_space); | 666 | DEFINE_INODE_EVENT(xfs_collapse_file_space); |
667 | DEFINE_INODE_EVENT(xfs_insert_file_space); | ||
667 | DEFINE_INODE_EVENT(xfs_readdir); | 668 | DEFINE_INODE_EVENT(xfs_readdir); |
668 | #ifdef CONFIG_XFS_POSIX_ACL | 669 | #ifdef CONFIG_XFS_POSIX_ACL |
669 | DEFINE_INODE_EVENT(xfs_get_acl); | 670 | DEFINE_INODE_EVENT(xfs_get_acl); |
@@ -685,6 +686,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag); | |||
685 | DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); | 686 | DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); |
686 | DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); | 687 | DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); |
687 | 688 | ||
689 | DEFINE_INODE_EVENT(xfs_filemap_fault); | ||
690 | DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite); | ||
691 | |||
688 | DECLARE_EVENT_CLASS(xfs_iref_class, | 692 | DECLARE_EVENT_CLASS(xfs_iref_class, |
689 | TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), | 693 | TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), |
690 | TP_ARGS(ip, caller_ip), | 694 | TP_ARGS(ip, caller_ip), |
@@ -702,7 +706,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class, | |||
702 | __entry->pincount = atomic_read(&ip->i_pincount); | 706 | __entry->pincount = atomic_read(&ip->i_pincount); |
703 | __entry->caller_ip = caller_ip; | 707 | __entry->caller_ip = caller_ip; |
704 | ), | 708 | ), |
705 | TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", | 709 | TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps", |
706 | MAJOR(__entry->dev), MINOR(__entry->dev), | 710 | MAJOR(__entry->dev), MINOR(__entry->dev), |
707 | __entry->ino, | 711 | __entry->ino, |
708 | __entry->count, | 712 | __entry->count, |
@@ -1217,6 +1221,11 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found); | |||
1217 | DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); | 1221 | DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); |
1218 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); | 1222 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); |
1219 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); | 1223 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); |
1224 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct); | ||
1225 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new); | ||
1226 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update); | ||
1227 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none); | ||
1228 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio); | ||
1220 | 1229 | ||
1221 | DECLARE_EVENT_CLASS(xfs_simple_io_class, | 1230 | DECLARE_EVENT_CLASS(xfs_simple_io_class, |
1222 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), | 1231 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), |
@@ -1333,7 +1342,7 @@ TRACE_EVENT(xfs_bunmap, | |||
1333 | __entry->flags = flags; | 1342 | __entry->flags = flags; |
1334 | ), | 1343 | ), |
1335 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" | 1344 | TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" |
1336 | "flags %s caller %pf", | 1345 | "flags %s caller %ps", |
1337 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1346 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1338 | __entry->ino, | 1347 | __entry->ino, |
1339 | __entry->size, | 1348 | __entry->size, |
@@ -1466,7 +1475,7 @@ TRACE_EVENT(xfs_agf, | |||
1466 | ), | 1475 | ), |
1467 | TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " | 1476 | TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " |
1468 | "levels b %u c %u flfirst %u fllast %u flcount %u " | 1477 | "levels b %u c %u flfirst %u fllast %u flcount %u " |
1469 | "freeblks %u longest %u caller %pf", | 1478 | "freeblks %u longest %u caller %ps", |
1470 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1479 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1471 | __entry->agno, | 1480 | __entry->agno, |
1472 | __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), | 1481 | __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index eb90cd59a0ec..220ef2c906b2 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -173,7 +173,7 @@ xfs_trans_reserve( | |||
173 | uint rtextents) | 173 | uint rtextents) |
174 | { | 174 | { |
175 | int error = 0; | 175 | int error = 0; |
176 | int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; | 176 | bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; |
177 | 177 | ||
178 | /* Mark this thread as being in a transaction */ | 178 | /* Mark this thread as being in a transaction */ |
179 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); | 179 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); |
@@ -184,8 +184,7 @@ xfs_trans_reserve( | |||
184 | * fail if the count would go below zero. | 184 | * fail if the count would go below zero. |
185 | */ | 185 | */ |
186 | if (blocks > 0) { | 186 | if (blocks > 0) { |
187 | error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, | 187 | error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd); |
188 | -((int64_t)blocks), rsvd); | ||
189 | if (error != 0) { | 188 | if (error != 0) { |
190 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 189 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
191 | return -ENOSPC; | 190 | return -ENOSPC; |
@@ -236,8 +235,7 @@ xfs_trans_reserve( | |||
236 | * fail if the count would go below zero. | 235 | * fail if the count would go below zero. |
237 | */ | 236 | */ |
238 | if (rtextents > 0) { | 237 | if (rtextents > 0) { |
239 | error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS, | 238 | error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents)); |
240 | -((int64_t)rtextents), rsvd); | ||
241 | if (error) { | 239 | if (error) { |
242 | error = -ENOSPC; | 240 | error = -ENOSPC; |
243 | goto undo_log; | 241 | goto undo_log; |
@@ -268,8 +266,7 @@ undo_log: | |||
268 | 266 | ||
269 | undo_blocks: | 267 | undo_blocks: |
270 | if (blocks > 0) { | 268 | if (blocks > 0) { |
271 | xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, | 269 | xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd); |
272 | (int64_t)blocks, rsvd); | ||
273 | tp->t_blk_res = 0; | 270 | tp->t_blk_res = 0; |
274 | } | 271 | } |
275 | 272 | ||
@@ -488,6 +485,54 @@ xfs_trans_apply_sb_deltas( | |||
488 | sizeof(sbp->sb_frextents) - 1); | 485 | sizeof(sbp->sb_frextents) - 1); |
489 | } | 486 | } |
490 | 487 | ||
488 | STATIC int | ||
489 | xfs_sb_mod8( | ||
490 | uint8_t *field, | ||
491 | int8_t delta) | ||
492 | { | ||
493 | int8_t counter = *field; | ||
494 | |||
495 | counter += delta; | ||
496 | if (counter < 0) { | ||
497 | ASSERT(0); | ||
498 | return -EINVAL; | ||
499 | } | ||
500 | *field = counter; | ||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | STATIC int | ||
505 | xfs_sb_mod32( | ||
506 | uint32_t *field, | ||
507 | int32_t delta) | ||
508 | { | ||
509 | int32_t counter = *field; | ||
510 | |||
511 | counter += delta; | ||
512 | if (counter < 0) { | ||
513 | ASSERT(0); | ||
514 | return -EINVAL; | ||
515 | } | ||
516 | *field = counter; | ||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | STATIC int | ||
521 | xfs_sb_mod64( | ||
522 | uint64_t *field, | ||
523 | int64_t delta) | ||
524 | { | ||
525 | int64_t counter = *field; | ||
526 | |||
527 | counter += delta; | ||
528 | if (counter < 0) { | ||
529 | ASSERT(0); | ||
530 | return -EINVAL; | ||
531 | } | ||
532 | *field = counter; | ||
533 | return 0; | ||
534 | } | ||
535 | |||
491 | /* | 536 | /* |
492 | * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations | 537 | * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations |
493 | * and apply superblock counter changes to the in-core superblock. The | 538 | * and apply superblock counter changes to the in-core superblock. The |
@@ -495,13 +540,6 @@ xfs_trans_apply_sb_deltas( | |||
495 | * applied to the in-core superblock. The idea is that that has already been | 540 | * applied to the in-core superblock. The idea is that that has already been |
496 | * done. | 541 | * done. |
497 | * | 542 | * |
498 | * This is done efficiently with a single call to xfs_mod_incore_sb_batch(). | ||
499 | * However, we have to ensure that we only modify each superblock field only | ||
500 | * once because the application of the delta values may not be atomic. That can | ||
501 | * lead to ENOSPC races occurring if we have two separate modifcations of the | ||
502 | * free space counter to put back the entire reservation and then take away | ||
503 | * what we used. | ||
504 | * | ||
505 | * If we are not logging superblock counters, then the inode allocated/free and | 543 | * If we are not logging superblock counters, then the inode allocated/free and |
506 | * used block counts are not updated in the on disk superblock. In this case, | 544 | * used block counts are not updated in the on disk superblock. In this case, |
507 | * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we | 545 | * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we |
@@ -509,21 +547,15 @@ xfs_trans_apply_sb_deltas( | |||
509 | */ | 547 | */ |
510 | void | 548 | void |
511 | xfs_trans_unreserve_and_mod_sb( | 549 | xfs_trans_unreserve_and_mod_sb( |
512 | xfs_trans_t *tp) | 550 | struct xfs_trans *tp) |
513 | { | 551 | { |
514 | xfs_mod_sb_t msb[9]; /* If you add cases, add entries */ | 552 | struct xfs_mount *mp = tp->t_mountp; |
515 | xfs_mod_sb_t *msbp; | 553 | bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; |
516 | xfs_mount_t *mp = tp->t_mountp; | 554 | int64_t blkdelta = 0; |
517 | /* REFERENCED */ | 555 | int64_t rtxdelta = 0; |
518 | int error; | 556 | int64_t idelta = 0; |
519 | int rsvd; | 557 | int64_t ifreedelta = 0; |
520 | int64_t blkdelta = 0; | 558 | int error; |
521 | int64_t rtxdelta = 0; | ||
522 | int64_t idelta = 0; | ||
523 | int64_t ifreedelta = 0; | ||
524 | |||
525 | msbp = msb; | ||
526 | rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; | ||
527 | 559 | ||
528 | /* calculate deltas */ | 560 | /* calculate deltas */ |
529 | if (tp->t_blk_res > 0) | 561 | if (tp->t_blk_res > 0) |
@@ -547,97 +579,115 @@ xfs_trans_unreserve_and_mod_sb( | |||
547 | 579 | ||
548 | /* apply the per-cpu counters */ | 580 | /* apply the per-cpu counters */ |
549 | if (blkdelta) { | 581 | if (blkdelta) { |
550 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | 582 | error = xfs_mod_fdblocks(mp, blkdelta, rsvd); |
551 | blkdelta, rsvd); | ||
552 | if (error) | 583 | if (error) |
553 | goto out; | 584 | goto out; |
554 | } | 585 | } |
555 | 586 | ||
556 | if (idelta) { | 587 | if (idelta) { |
557 | error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, | 588 | error = xfs_mod_icount(mp, idelta); |
558 | idelta, rsvd); | ||
559 | if (error) | 589 | if (error) |
560 | goto out_undo_fdblocks; | 590 | goto out_undo_fdblocks; |
561 | } | 591 | } |
562 | 592 | ||
563 | if (ifreedelta) { | 593 | if (ifreedelta) { |
564 | error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, | 594 | error = xfs_mod_ifree(mp, ifreedelta); |
565 | ifreedelta, rsvd); | ||
566 | if (error) | 595 | if (error) |
567 | goto out_undo_icount; | 596 | goto out_undo_icount; |
568 | } | 597 | } |
569 | 598 | ||
599 | if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY)) | ||
600 | return; | ||
601 | |||
570 | /* apply remaining deltas */ | 602 | /* apply remaining deltas */ |
571 | if (rtxdelta != 0) { | 603 | spin_lock(&mp->m_sb_lock); |
572 | msbp->msb_field = XFS_SBS_FREXTENTS; | 604 | if (rtxdelta) { |
573 | msbp->msb_delta = rtxdelta; | 605 | error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta); |
574 | msbp++; | 606 | if (error) |
607 | goto out_undo_ifree; | ||
575 | } | 608 | } |
576 | 609 | ||
577 | if (tp->t_flags & XFS_TRANS_SB_DIRTY) { | 610 | if (tp->t_dblocks_delta != 0) { |
578 | if (tp->t_dblocks_delta != 0) { | 611 | error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta); |
579 | msbp->msb_field = XFS_SBS_DBLOCKS; | 612 | if (error) |
580 | msbp->msb_delta = tp->t_dblocks_delta; | 613 | goto out_undo_frextents; |
581 | msbp++; | ||
582 | } | ||
583 | if (tp->t_agcount_delta != 0) { | ||
584 | msbp->msb_field = XFS_SBS_AGCOUNT; | ||
585 | msbp->msb_delta = tp->t_agcount_delta; | ||
586 | msbp++; | ||
587 | } | ||
588 | if (tp->t_imaxpct_delta != 0) { | ||
589 | msbp->msb_field = XFS_SBS_IMAX_PCT; | ||
590 | msbp->msb_delta = tp->t_imaxpct_delta; | ||
591 | msbp++; | ||
592 | } | ||
593 | if (tp->t_rextsize_delta != 0) { | ||
594 | msbp->msb_field = XFS_SBS_REXTSIZE; | ||
595 | msbp->msb_delta = tp->t_rextsize_delta; | ||
596 | msbp++; | ||
597 | } | ||
598 | if (tp->t_rbmblocks_delta != 0) { | ||
599 | msbp->msb_field = XFS_SBS_RBMBLOCKS; | ||
600 | msbp->msb_delta = tp->t_rbmblocks_delta; | ||
601 | msbp++; | ||
602 | } | ||
603 | if (tp->t_rblocks_delta != 0) { | ||
604 | msbp->msb_field = XFS_SBS_RBLOCKS; | ||
605 | msbp->msb_delta = tp->t_rblocks_delta; | ||
606 | msbp++; | ||
607 | } | ||
608 | if (tp->t_rextents_delta != 0) { | ||
609 | msbp->msb_field = XFS_SBS_REXTENTS; | ||
610 | msbp->msb_delta = tp->t_rextents_delta; | ||
611 | msbp++; | ||
612 | } | ||
613 | if (tp->t_rextslog_delta != 0) { | ||
614 | msbp->msb_field = XFS_SBS_REXTSLOG; | ||
615 | msbp->msb_delta = tp->t_rextslog_delta; | ||
616 | msbp++; | ||
617 | } | ||
618 | } | 614 | } |
619 | 615 | if (tp->t_agcount_delta != 0) { | |
620 | /* | 616 | error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta); |
621 | * If we need to change anything, do it. | ||
622 | */ | ||
623 | if (msbp > msb) { | ||
624 | error = xfs_mod_incore_sb_batch(tp->t_mountp, msb, | ||
625 | (uint)(msbp - msb), rsvd); | ||
626 | if (error) | 617 | if (error) |
627 | goto out_undo_ifreecount; | 618 | goto out_undo_dblocks; |
628 | } | 619 | } |
629 | 620 | if (tp->t_imaxpct_delta != 0) { | |
621 | error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta); | ||
622 | if (error) | ||
623 | goto out_undo_agcount; | ||
624 | } | ||
625 | if (tp->t_rextsize_delta != 0) { | ||
626 | error = xfs_sb_mod32(&mp->m_sb.sb_rextsize, | ||
627 | tp->t_rextsize_delta); | ||
628 | if (error) | ||
629 | goto out_undo_imaxpct; | ||
630 | } | ||
631 | if (tp->t_rbmblocks_delta != 0) { | ||
632 | error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, | ||
633 | tp->t_rbmblocks_delta); | ||
634 | if (error) | ||
635 | goto out_undo_rextsize; | ||
636 | } | ||
637 | if (tp->t_rblocks_delta != 0) { | ||
638 | error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta); | ||
639 | if (error) | ||
640 | goto out_undo_rbmblocks; | ||
641 | } | ||
642 | if (tp->t_rextents_delta != 0) { | ||
643 | error = xfs_sb_mod64(&mp->m_sb.sb_rextents, | ||
644 | tp->t_rextents_delta); | ||
645 | if (error) | ||
646 | goto out_undo_rblocks; | ||
647 | } | ||
648 | if (tp->t_rextslog_delta != 0) { | ||
649 | error = xfs_sb_mod8(&mp->m_sb.sb_rextslog, | ||
650 | tp->t_rextslog_delta); | ||
651 | if (error) | ||
652 | goto out_undo_rextents; | ||
653 | } | ||
654 | spin_unlock(&mp->m_sb_lock); | ||
630 | return; | 655 | return; |
631 | 656 | ||
632 | out_undo_ifreecount: | 657 | out_undo_rextents: |
658 | if (tp->t_rextents_delta) | ||
659 | xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta); | ||
660 | out_undo_rblocks: | ||
661 | if (tp->t_rblocks_delta) | ||
662 | xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta); | ||
663 | out_undo_rbmblocks: | ||
664 | if (tp->t_rbmblocks_delta) | ||
665 | xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta); | ||
666 | out_undo_rextsize: | ||
667 | if (tp->t_rextsize_delta) | ||
668 | xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta); | ||
669 | out_undo_imaxpct: | ||
670 | if (tp->t_rextsize_delta) | ||
671 | xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta); | ||
672 | out_undo_agcount: | ||
673 | if (tp->t_agcount_delta) | ||
674 | xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta); | ||
675 | out_undo_dblocks: | ||
676 | if (tp->t_dblocks_delta) | ||
677 | xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta); | ||
678 | out_undo_frextents: | ||
679 | if (rtxdelta) | ||
680 | xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta); | ||
681 | out_undo_ifree: | ||
682 | spin_unlock(&mp->m_sb_lock); | ||
633 | if (ifreedelta) | 683 | if (ifreedelta) |
634 | xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd); | 684 | xfs_mod_ifree(mp, -ifreedelta); |
635 | out_undo_icount: | 685 | out_undo_icount: |
636 | if (idelta) | 686 | if (idelta) |
637 | xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd); | 687 | xfs_mod_icount(mp, -idelta); |
638 | out_undo_fdblocks: | 688 | out_undo_fdblocks: |
639 | if (blkdelta) | 689 | if (blkdelta) |
640 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); | 690 | xfs_mod_fdblocks(mp, -blkdelta, rsvd); |
641 | out: | 691 | out: |
642 | ASSERT(error == 0); | 692 | ASSERT(error == 0); |
643 | return; | 693 | return; |