summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c137
1 files changed, 112 insertions, 25 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c1509547c762..8f23a94dab77 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4320,6 +4320,110 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
4320 return ret; 4320 return ret;
4321} 4321}
4322 4322
4323/*
4324 * Log all prealloc extents beyond the inode's i_size to make sure we do not
4325 * lose them after doing a fast fsync and replaying the log. We scan the
4326 * subvolume's root instead of iterating the inode's extent map tree because
4327 * otherwise we can log incorrect extent items based on extent map conversion.
4328 * That can happen due to the fact that extent maps are merged when they
4329 * are not in the extent map tree's list of modified extents.
4330 */
4331static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
4332 struct btrfs_inode *inode,
4333 struct btrfs_path *path)
4334{
4335 struct btrfs_root *root = inode->root;
4336 struct btrfs_key key;
4337 const u64 i_size = i_size_read(&inode->vfs_inode);
4338 const u64 ino = btrfs_ino(inode);
4339 struct btrfs_path *dst_path = NULL;
4340 u64 last_extent = (u64)-1;
4341 int ins_nr = 0;
4342 int start_slot;
4343 int ret;
4344
4345 if (!(inode->flags & BTRFS_INODE_PREALLOC))
4346 return 0;
4347
4348 key.objectid = ino;
4349 key.type = BTRFS_EXTENT_DATA_KEY;
4350 key.offset = i_size;
4351 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4352 if (ret < 0)
4353 goto out;
4354
4355 while (true) {
4356 struct extent_buffer *leaf = path->nodes[0];
4357 int slot = path->slots[0];
4358
4359 if (slot >= btrfs_header_nritems(leaf)) {
4360 if (ins_nr > 0) {
4361 ret = copy_items(trans, inode, dst_path, path,
4362 &last_extent, start_slot,
4363 ins_nr, 1, 0);
4364 if (ret < 0)
4365 goto out;
4366 ins_nr = 0;
4367 }
4368 ret = btrfs_next_leaf(root, path);
4369 if (ret < 0)
4370 goto out;
4371 if (ret > 0) {
4372 ret = 0;
4373 break;
4374 }
4375 continue;
4376 }
4377
4378 btrfs_item_key_to_cpu(leaf, &key, slot);
4379 if (key.objectid > ino)
4380 break;
4381 if (WARN_ON_ONCE(key.objectid < ino) ||
4382 key.type < BTRFS_EXTENT_DATA_KEY ||
4383 key.offset < i_size) {
4384 path->slots[0]++;
4385 continue;
4386 }
4387 if (last_extent == (u64)-1) {
4388 last_extent = key.offset;
4389 /*
4390 * Avoid logging extent items logged in past fsync calls
4391 * and leading to duplicate keys in the log tree.
4392 */
4393 do {
4394 ret = btrfs_truncate_inode_items(trans,
4395 root->log_root,
4396 &inode->vfs_inode,
4397 i_size,
4398 BTRFS_EXTENT_DATA_KEY);
4399 } while (ret == -EAGAIN);
4400 if (ret)
4401 goto out;
4402 }
4403 if (ins_nr == 0)
4404 start_slot = slot;
4405 ins_nr++;
4406 path->slots[0]++;
4407 if (!dst_path) {
4408 dst_path = btrfs_alloc_path();
4409 if (!dst_path) {
4410 ret = -ENOMEM;
4411 goto out;
4412 }
4413 }
4414 }
4415 if (ins_nr > 0) {
4416 ret = copy_items(trans, inode, dst_path, path, &last_extent,
4417 start_slot, ins_nr, 1, 0);
4418 if (ret > 0)
4419 ret = 0;
4420 }
4421out:
4422 btrfs_release_path(path);
4423 btrfs_free_path(dst_path);
4424 return ret;
4425}
4426
4323static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, 4427static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4324 struct btrfs_root *root, 4428 struct btrfs_root *root,
4325 struct btrfs_inode *inode, 4429 struct btrfs_inode *inode,
@@ -4362,6 +4466,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4362 if (em->generation <= test_gen) 4466 if (em->generation <= test_gen)
4363 continue; 4467 continue;
4364 4468
4469 /* We log prealloc extents beyond eof later. */
4470 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) &&
4471 em->start >= i_size_read(&inode->vfs_inode))
4472 continue;
4473
4365 if (em->start < logged_start) 4474 if (em->start < logged_start)
4366 logged_start = em->start; 4475 logged_start = em->start;
4367 if ((em->start + em->len - 1) > logged_end) 4476 if ((em->start + em->len - 1) > logged_end)
@@ -4374,31 +4483,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4374 num++; 4483 num++;
4375 } 4484 }
4376 4485
4377 /*
4378 * Add all prealloc extents beyond the inode's i_size to make sure we
4379 * don't lose them after doing a fast fsync and replaying the log.
4380 */
4381 if (inode->flags & BTRFS_INODE_PREALLOC) {
4382 struct rb_node *node;
4383
4384 for (node = rb_last(&tree->map); node; node = rb_prev(node)) {
4385 em = rb_entry(node, struct extent_map, rb_node);
4386 if (em->start < i_size_read(&inode->vfs_inode))
4387 break;
4388 if (!list_empty(&em->list))
4389 continue;
4390 /* Same as above loop. */
4391 if (++num > 32768) {
4392 list_del_init(&tree->modified_extents);
4393 ret = -EFBIG;
4394 goto process;
4395 }
4396 refcount_inc(&em->refs);
4397 set_bit(EXTENT_FLAG_LOGGING, &em->flags);
4398 list_add_tail(&em->list, &extents);
4399 }
4400 }
4401
4402 list_sort(NULL, &extents, extent_cmp); 4486 list_sort(NULL, &extents, extent_cmp);
4403 btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); 4487 btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
4404 /* 4488 /*
@@ -4443,6 +4527,9 @@ process:
4443 up_write(&inode->dio_sem); 4527 up_write(&inode->dio_sem);
4444 4528
4445 btrfs_release_path(path); 4529 btrfs_release_path(path);
4530 if (!ret)
4531 ret = btrfs_log_prealloc_extents(trans, inode, path);
4532
4446 return ret; 4533 return ret;
4447} 4534}
4448 4535