diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 226 |
1 files changed, 221 insertions, 5 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1ce80c1c4eb6..9c45431e69ab 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -4117,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode, | |||
4117 | return 0; | 4117 | return 0; |
4118 | } | 4118 | } |
4119 | 4119 | ||
4120 | /* | ||
4121 | * At the moment we always log all xattrs. This is to figure out at log replay | ||
4122 | * time which xattrs must have their deletion replayed. If a xattr is missing | ||
4123 | * in the log tree and exists in the fs/subvol tree, we delete it. This is | ||
4124 | * because if a xattr is deleted, the inode is fsynced and a power failure | ||
4125 | * happens, causing the log to be replayed the next time the fs is mounted, | ||
4126 | * we want the xattr to not exist anymore (same behaviour as other filesystems | ||
4127 | * with a journal, ext3/4, xfs, f2fs, etc). | ||
4128 | */ | ||
4129 | static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, | ||
4130 | struct btrfs_root *root, | ||
4131 | struct inode *inode, | ||
4132 | struct btrfs_path *path, | ||
4133 | struct btrfs_path *dst_path) | ||
4134 | { | ||
4135 | int ret; | ||
4136 | struct btrfs_key key; | ||
4137 | const u64 ino = btrfs_ino(inode); | ||
4138 | int ins_nr = 0; | ||
4139 | int start_slot = 0; | ||
4140 | |||
4141 | key.objectid = ino; | ||
4142 | key.type = BTRFS_XATTR_ITEM_KEY; | ||
4143 | key.offset = 0; | ||
4144 | |||
4145 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
4146 | if (ret < 0) | ||
4147 | return ret; | ||
4148 | |||
4149 | while (true) { | ||
4150 | int slot = path->slots[0]; | ||
4151 | struct extent_buffer *leaf = path->nodes[0]; | ||
4152 | int nritems = btrfs_header_nritems(leaf); | ||
4153 | |||
4154 | if (slot >= nritems) { | ||
4155 | if (ins_nr > 0) { | ||
4156 | u64 last_extent = 0; | ||
4157 | |||
4158 | ret = copy_items(trans, inode, dst_path, path, | ||
4159 | &last_extent, start_slot, | ||
4160 | ins_nr, 1, 0); | ||
4161 | /* can't be 1, extent items aren't processed */ | ||
4162 | ASSERT(ret <= 0); | ||
4163 | if (ret < 0) | ||
4164 | return ret; | ||
4165 | ins_nr = 0; | ||
4166 | } | ||
4167 | ret = btrfs_next_leaf(root, path); | ||
4168 | if (ret < 0) | ||
4169 | return ret; | ||
4170 | else if (ret > 0) | ||
4171 | break; | ||
4172 | continue; | ||
4173 | } | ||
4174 | |||
4175 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
4176 | if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) | ||
4177 | break; | ||
4178 | |||
4179 | if (ins_nr == 0) | ||
4180 | start_slot = slot; | ||
4181 | ins_nr++; | ||
4182 | path->slots[0]++; | ||
4183 | cond_resched(); | ||
4184 | } | ||
4185 | if (ins_nr > 0) { | ||
4186 | u64 last_extent = 0; | ||
4187 | |||
4188 | ret = copy_items(trans, inode, dst_path, path, | ||
4189 | &last_extent, start_slot, | ||
4190 | ins_nr, 1, 0); | ||
4191 | /* can't be 1, extent items aren't processed */ | ||
4192 | ASSERT(ret <= 0); | ||
4193 | if (ret < 0) | ||
4194 | return ret; | ||
4195 | } | ||
4196 | |||
4197 | return 0; | ||
4198 | } | ||
4199 | |||
4200 | /* | ||
4201 | * If the no holes feature is enabled we need to make sure any hole between the | ||
4202 | * last extent and the i_size of our inode is explicitly marked in the log. This | ||
4203 | * is to make sure that doing something like: | ||
4204 | * | ||
4205 | * 1) create file with 128Kb of data | ||
4206 | * 2) truncate file to 64Kb | ||
4207 | * 3) truncate file to 256Kb | ||
4208 | * 4) fsync file | ||
4209 | * 5) <crash/power failure> | ||
4210 | * 6) mount fs and trigger log replay | ||
4211 | * | ||
4212 | * Will give us a file with a size of 256Kb, the first 64Kb of data match what | ||
4213 | * the file had in its first 64Kb of data at step 1 and the last 192Kb of the | ||
4214 | * file correspond to a hole. The presence of explicit holes in a log tree is | ||
4215 | * what guarantees that log replay will remove/adjust file extent items in the | ||
4216 | * fs/subvol tree. | ||
4217 | * | ||
4218 | * Here we do not need to care about holes between extents, that is already done | ||
4219 | * by copy_items(). We also only need to do this in the full sync path, where we | ||
4220 | * lookup for extents from the fs/subvol tree only. In the fast path case, we | ||
4221 | * lookup the list of modified extent maps and if any represents a hole, we | ||
4222 | * insert a corresponding extent representing a hole in the log tree. | ||
4223 | */ | ||
4224 | static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, | ||
4225 | struct btrfs_root *root, | ||
4226 | struct inode *inode, | ||
4227 | struct btrfs_path *path) | ||
4228 | { | ||
4229 | int ret; | ||
4230 | struct btrfs_key key; | ||
4231 | u64 hole_start; | ||
4232 | u64 hole_size; | ||
4233 | struct extent_buffer *leaf; | ||
4234 | struct btrfs_root *log = root->log_root; | ||
4235 | const u64 ino = btrfs_ino(inode); | ||
4236 | const u64 i_size = i_size_read(inode); | ||
4237 | |||
4238 | if (!btrfs_fs_incompat(root->fs_info, NO_HOLES)) | ||
4239 | return 0; | ||
4240 | |||
4241 | key.objectid = ino; | ||
4242 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
4243 | key.offset = (u64)-1; | ||
4244 | |||
4245 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
4246 | ASSERT(ret != 0); | ||
4247 | if (ret < 0) | ||
4248 | return ret; | ||
4249 | |||
4250 | ASSERT(path->slots[0] > 0); | ||
4251 | path->slots[0]--; | ||
4252 | leaf = path->nodes[0]; | ||
4253 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
4254 | |||
4255 | if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { | ||
4256 | /* inode does not have any extents */ | ||
4257 | hole_start = 0; | ||
4258 | hole_size = i_size; | ||
4259 | } else { | ||
4260 | struct btrfs_file_extent_item *extent; | ||
4261 | u64 len; | ||
4262 | |||
4263 | /* | ||
4264 | * If there's an extent beyond i_size, an explicit hole was | ||
4265 | * already inserted by copy_items(). | ||
4266 | */ | ||
4267 | if (key.offset >= i_size) | ||
4268 | return 0; | ||
4269 | |||
4270 | extent = btrfs_item_ptr(leaf, path->slots[0], | ||
4271 | struct btrfs_file_extent_item); | ||
4272 | |||
4273 | if (btrfs_file_extent_type(leaf, extent) == | ||
4274 | BTRFS_FILE_EXTENT_INLINE) { | ||
4275 | len = btrfs_file_extent_inline_len(leaf, | ||
4276 | path->slots[0], | ||
4277 | extent); | ||
4278 | ASSERT(len == i_size); | ||
4279 | return 0; | ||
4280 | } | ||
4281 | |||
4282 | len = btrfs_file_extent_num_bytes(leaf, extent); | ||
4283 | /* Last extent goes beyond i_size, no need to log a hole. */ | ||
4284 | if (key.offset + len > i_size) | ||
4285 | return 0; | ||
4286 | hole_start = key.offset + len; | ||
4287 | hole_size = i_size - hole_start; | ||
4288 | } | ||
4289 | btrfs_release_path(path); | ||
4290 | |||
4291 | /* Last extent ends at i_size. */ | ||
4292 | if (hole_size == 0) | ||
4293 | return 0; | ||
4294 | |||
4295 | hole_size = ALIGN(hole_size, root->sectorsize); | ||
4296 | ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, | ||
4297 | hole_size, 0, hole_size, 0, 0, 0); | ||
4298 | return ret; | ||
4299 | } | ||
4300 | |||
4120 | /* log a single inode in the tree log. | 4301 | /* log a single inode in the tree log. |
4121 | * At least one parent directory for this inode must exist in the tree | 4302 | * At least one parent directory for this inode must exist in the tree |
4122 | * or be logged already. | 4303 | * or be logged already. |
@@ -4155,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4155 | u64 ino = btrfs_ino(inode); | 4336 | u64 ino = btrfs_ino(inode); |
4156 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 4337 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
4157 | u64 logged_isize = 0; | 4338 | u64 logged_isize = 0; |
4339 | bool need_log_inode_item = true; | ||
4158 | 4340 | ||
4159 | path = btrfs_alloc_path(); | 4341 | path = btrfs_alloc_path(); |
4160 | if (!path) | 4342 | if (!path) |
@@ -4263,11 +4445,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4263 | } else { | 4445 | } else { |
4264 | if (inode_only == LOG_INODE_ALL) | 4446 | if (inode_only == LOG_INODE_ALL) |
4265 | fast_search = true; | 4447 | fast_search = true; |
4266 | ret = log_inode_item(trans, log, dst_path, inode); | ||
4267 | if (ret) { | ||
4268 | err = ret; | ||
4269 | goto out_unlock; | ||
4270 | } | ||
4271 | goto log_extents; | 4448 | goto log_extents; |
4272 | } | 4449 | } |
4273 | 4450 | ||
@@ -4290,6 +4467,28 @@ again: | |||
4290 | if (min_key.type > max_key.type) | 4467 | if (min_key.type > max_key.type) |
4291 | break; | 4468 | break; |
4292 | 4469 | ||
4470 | if (min_key.type == BTRFS_INODE_ITEM_KEY) | ||
4471 | need_log_inode_item = false; | ||
4472 | |||
4473 | /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ | ||
4474 | if (min_key.type == BTRFS_XATTR_ITEM_KEY) { | ||
4475 | if (ins_nr == 0) | ||
4476 | goto next_slot; | ||
4477 | ret = copy_items(trans, inode, dst_path, path, | ||
4478 | &last_extent, ins_start_slot, | ||
4479 | ins_nr, inode_only, logged_isize); | ||
4480 | if (ret < 0) { | ||
4481 | err = ret; | ||
4482 | goto out_unlock; | ||
4483 | } | ||
4484 | ins_nr = 0; | ||
4485 | if (ret) { | ||
4486 | btrfs_release_path(path); | ||
4487 | continue; | ||
4488 | } | ||
4489 | goto next_slot; | ||
4490 | } | ||
4491 | |||
4293 | src = path->nodes[0]; | 4492 | src = path->nodes[0]; |
4294 | if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { | 4493 | if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { |
4295 | ins_nr++; | 4494 | ins_nr++; |
@@ -4357,9 +4556,26 @@ next_slot: | |||
4357 | ins_nr = 0; | 4556 | ins_nr = 0; |
4358 | } | 4557 | } |
4359 | 4558 | ||
4559 | btrfs_release_path(path); | ||
4560 | btrfs_release_path(dst_path); | ||
4561 | err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); | ||
4562 | if (err) | ||
4563 | goto out_unlock; | ||
4564 | if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { | ||
4565 | btrfs_release_path(path); | ||
4566 | btrfs_release_path(dst_path); | ||
4567 | err = btrfs_log_trailing_hole(trans, root, inode, path); | ||
4568 | if (err) | ||
4569 | goto out_unlock; | ||
4570 | } | ||
4360 | log_extents: | 4571 | log_extents: |
4361 | btrfs_release_path(path); | 4572 | btrfs_release_path(path); |
4362 | btrfs_release_path(dst_path); | 4573 | btrfs_release_path(dst_path); |
4574 | if (need_log_inode_item) { | ||
4575 | err = log_inode_item(trans, log, dst_path, inode); | ||
4576 | if (err) | ||
4577 | goto out_unlock; | ||
4578 | } | ||
4363 | if (fast_search) { | 4579 | if (fast_search) { |
4364 | /* | 4580 | /* |
4365 | * Some ordered extents started by fsync might have completed | 4581 | * Some ordered extents started by fsync might have completed |