diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 466 |
1 files changed, 400 insertions, 66 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6b7fe291a174..91419ef79b00 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -824,6 +824,7 @@ retry: | |||
824 | async_extent->ram_size - 1, 0); | 824 | async_extent->ram_size - 1, 0); |
825 | goto out_free_reserve; | 825 | goto out_free_reserve; |
826 | } | 826 | } |
827 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); | ||
827 | 828 | ||
828 | /* | 829 | /* |
829 | * clear dirty, set writeback and unlock the pages. | 830 | * clear dirty, set writeback and unlock the pages. |
@@ -861,6 +862,7 @@ retry: | |||
861 | } | 862 | } |
862 | return; | 863 | return; |
863 | out_free_reserve: | 864 | out_free_reserve: |
865 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); | ||
864 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | 866 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
865 | out_free: | 867 | out_free: |
866 | extent_clear_unlock_delalloc(inode, async_extent->start, | 868 | extent_clear_unlock_delalloc(inode, async_extent->start, |
@@ -1038,6 +1040,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
1038 | goto out_drop_extent_cache; | 1040 | goto out_drop_extent_cache; |
1039 | } | 1041 | } |
1040 | 1042 | ||
1043 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); | ||
1044 | |||
1041 | if (disk_num_bytes < cur_alloc_size) | 1045 | if (disk_num_bytes < cur_alloc_size) |
1042 | break; | 1046 | break; |
1043 | 1047 | ||
@@ -1066,6 +1070,7 @@ out: | |||
1066 | out_drop_extent_cache: | 1070 | out_drop_extent_cache: |
1067 | btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); | 1071 | btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); |
1068 | out_reserve: | 1072 | out_reserve: |
1073 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); | ||
1069 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | 1074 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
1070 | out_unlock: | 1075 | out_unlock: |
1071 | extent_clear_unlock_delalloc(inode, start, end, locked_page, | 1076 | extent_clear_unlock_delalloc(inode, start, end, locked_page, |
@@ -1377,6 +1382,9 @@ next_slot: | |||
1377 | */ | 1382 | */ |
1378 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | 1383 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) |
1379 | goto out_check; | 1384 | goto out_check; |
1385 | if (!btrfs_inc_nocow_writers(root->fs_info, | ||
1386 | disk_bytenr)) | ||
1387 | goto out_check; | ||
1380 | nocow = 1; | 1388 | nocow = 1; |
1381 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 1389 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
1382 | extent_end = found_key.offset + | 1390 | extent_end = found_key.offset + |
@@ -1391,6 +1399,9 @@ out_check: | |||
1391 | path->slots[0]++; | 1399 | path->slots[0]++; |
1392 | if (!nolock && nocow) | 1400 | if (!nolock && nocow) |
1393 | btrfs_end_write_no_snapshoting(root); | 1401 | btrfs_end_write_no_snapshoting(root); |
1402 | if (nocow) | ||
1403 | btrfs_dec_nocow_writers(root->fs_info, | ||
1404 | disk_bytenr); | ||
1394 | goto next_slot; | 1405 | goto next_slot; |
1395 | } | 1406 | } |
1396 | if (!nocow) { | 1407 | if (!nocow) { |
@@ -1411,6 +1422,9 @@ out_check: | |||
1411 | if (ret) { | 1422 | if (ret) { |
1412 | if (!nolock && nocow) | 1423 | if (!nolock && nocow) |
1413 | btrfs_end_write_no_snapshoting(root); | 1424 | btrfs_end_write_no_snapshoting(root); |
1425 | if (nocow) | ||
1426 | btrfs_dec_nocow_writers(root->fs_info, | ||
1427 | disk_bytenr); | ||
1414 | goto error; | 1428 | goto error; |
1415 | } | 1429 | } |
1416 | cow_start = (u64)-1; | 1430 | cow_start = (u64)-1; |
@@ -1453,6 +1467,8 @@ out_check: | |||
1453 | 1467 | ||
1454 | ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, | 1468 | ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, |
1455 | num_bytes, num_bytes, type); | 1469 | num_bytes, num_bytes, type); |
1470 | if (nocow) | ||
1471 | btrfs_dec_nocow_writers(root->fs_info, disk_bytenr); | ||
1456 | BUG_ON(ret); /* -ENOMEM */ | 1472 | BUG_ON(ret); /* -ENOMEM */ |
1457 | 1473 | ||
1458 | if (root->root_key.objectid == | 1474 | if (root->root_key.objectid == |
@@ -7129,6 +7145,43 @@ out: | |||
7129 | return em; | 7145 | return em; |
7130 | } | 7146 | } |
7131 | 7147 | ||
7148 | static struct extent_map *btrfs_create_dio_extent(struct inode *inode, | ||
7149 | const u64 start, | ||
7150 | const u64 len, | ||
7151 | const u64 orig_start, | ||
7152 | const u64 block_start, | ||
7153 | const u64 block_len, | ||
7154 | const u64 orig_block_len, | ||
7155 | const u64 ram_bytes, | ||
7156 | const int type) | ||
7157 | { | ||
7158 | struct extent_map *em = NULL; | ||
7159 | int ret; | ||
7160 | |||
7161 | down_read(&BTRFS_I(inode)->dio_sem); | ||
7162 | if (type != BTRFS_ORDERED_NOCOW) { | ||
7163 | em = create_pinned_em(inode, start, len, orig_start, | ||
7164 | block_start, block_len, orig_block_len, | ||
7165 | ram_bytes, type); | ||
7166 | if (IS_ERR(em)) | ||
7167 | goto out; | ||
7168 | } | ||
7169 | ret = btrfs_add_ordered_extent_dio(inode, start, block_start, | ||
7170 | len, block_len, type); | ||
7171 | if (ret) { | ||
7172 | if (em) { | ||
7173 | free_extent_map(em); | ||
7174 | btrfs_drop_extent_cache(inode, start, | ||
7175 | start + len - 1, 0); | ||
7176 | } | ||
7177 | em = ERR_PTR(ret); | ||
7178 | } | ||
7179 | out: | ||
7180 | up_read(&BTRFS_I(inode)->dio_sem); | ||
7181 | |||
7182 | return em; | ||
7183 | } | ||
7184 | |||
7132 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | 7185 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, |
7133 | u64 start, u64 len) | 7186 | u64 start, u64 len) |
7134 | { | 7187 | { |
@@ -7144,41 +7197,13 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
7144 | if (ret) | 7197 | if (ret) |
7145 | return ERR_PTR(ret); | 7198 | return ERR_PTR(ret); |
7146 | 7199 | ||
7147 | /* | 7200 | em = btrfs_create_dio_extent(inode, start, ins.offset, start, |
7148 | * Create the ordered extent before the extent map. This is to avoid | 7201 | ins.objectid, ins.offset, ins.offset, |
7149 | * races with the fast fsync path that would lead to it logging file | 7202 | ins.offset, 0); |
7150 | * extent items that point to disk extents that were not yet written to. | 7203 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); |
7151 | * The fast fsync path collects ordered extents into a local list and | 7204 | if (IS_ERR(em)) |
7152 | * then collects all the new extent maps, so we must create the ordered | ||
7153 | * extent first and make sure the fast fsync path collects any new | ||
7154 | * ordered extents after collecting new extent maps as well. | ||
7155 | * The fsync path simply can not rely on inode_dio_wait() because it | ||
7156 | * causes deadlock with AIO. | ||
7157 | */ | ||
7158 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | ||
7159 | ins.offset, ins.offset, 0); | ||
7160 | if (ret) { | ||
7161 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | 7205 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
7162 | return ERR_PTR(ret); | ||
7163 | } | ||
7164 | |||
7165 | em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, | ||
7166 | ins.offset, ins.offset, ins.offset, 0); | ||
7167 | if (IS_ERR(em)) { | ||
7168 | struct btrfs_ordered_extent *oe; | ||
7169 | 7206 | ||
7170 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | ||
7171 | oe = btrfs_lookup_ordered_extent(inode, start); | ||
7172 | ASSERT(oe); | ||
7173 | if (WARN_ON(!oe)) | ||
7174 | return em; | ||
7175 | set_bit(BTRFS_ORDERED_IOERR, &oe->flags); | ||
7176 | set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags); | ||
7177 | btrfs_remove_ordered_extent(inode, oe); | ||
7178 | /* Once for our lookup and once for the ordered extents tree. */ | ||
7179 | btrfs_put_ordered_extent(oe); | ||
7180 | btrfs_put_ordered_extent(oe); | ||
7181 | } | ||
7182 | return em; | 7207 | return em; |
7183 | } | 7208 | } |
7184 | 7209 | ||
@@ -7650,24 +7675,21 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
7650 | block_start = em->block_start + (start - em->start); | 7675 | block_start = em->block_start + (start - em->start); |
7651 | 7676 | ||
7652 | if (can_nocow_extent(inode, start, &len, &orig_start, | 7677 | if (can_nocow_extent(inode, start, &len, &orig_start, |
7653 | &orig_block_len, &ram_bytes) == 1) { | 7678 | &orig_block_len, &ram_bytes) == 1 && |
7679 | btrfs_inc_nocow_writers(root->fs_info, block_start)) { | ||
7680 | struct extent_map *em2; | ||
7681 | |||
7682 | em2 = btrfs_create_dio_extent(inode, start, len, | ||
7683 | orig_start, block_start, | ||
7684 | len, orig_block_len, | ||
7685 | ram_bytes, type); | ||
7686 | btrfs_dec_nocow_writers(root->fs_info, block_start); | ||
7654 | if (type == BTRFS_ORDERED_PREALLOC) { | 7687 | if (type == BTRFS_ORDERED_PREALLOC) { |
7655 | free_extent_map(em); | 7688 | free_extent_map(em); |
7656 | em = create_pinned_em(inode, start, len, | 7689 | em = em2; |
7657 | orig_start, | ||
7658 | block_start, len, | ||
7659 | orig_block_len, | ||
7660 | ram_bytes, type); | ||
7661 | if (IS_ERR(em)) { | ||
7662 | ret = PTR_ERR(em); | ||
7663 | goto unlock_err; | ||
7664 | } | ||
7665 | } | 7690 | } |
7666 | 7691 | if (em2 && IS_ERR(em2)) { | |
7667 | ret = btrfs_add_ordered_extent_dio(inode, start, | 7692 | ret = PTR_ERR(em2); |
7668 | block_start, len, len, type); | ||
7669 | if (ret) { | ||
7670 | free_extent_map(em); | ||
7671 | goto unlock_err; | 7693 | goto unlock_err; |
7672 | } | 7694 | } |
7673 | goto unlock; | 7695 | goto unlock; |
@@ -9230,6 +9252,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
9230 | INIT_LIST_HEAD(&ei->delalloc_inodes); | 9252 | INIT_LIST_HEAD(&ei->delalloc_inodes); |
9231 | INIT_LIST_HEAD(&ei->delayed_iput); | 9253 | INIT_LIST_HEAD(&ei->delayed_iput); |
9232 | RB_CLEAR_NODE(&ei->rb_node); | 9254 | RB_CLEAR_NODE(&ei->rb_node); |
9255 | init_rwsem(&ei->dio_sem); | ||
9233 | 9256 | ||
9234 | return inode; | 9257 | return inode; |
9235 | } | 9258 | } |
@@ -9387,10 +9410,281 @@ static int btrfs_getattr(struct vfsmount *mnt, | |||
9387 | return 0; | 9410 | return 0; |
9388 | } | 9411 | } |
9389 | 9412 | ||
9413 | static int btrfs_rename_exchange(struct inode *old_dir, | ||
9414 | struct dentry *old_dentry, | ||
9415 | struct inode *new_dir, | ||
9416 | struct dentry *new_dentry) | ||
9417 | { | ||
9418 | struct btrfs_trans_handle *trans; | ||
9419 | struct btrfs_root *root = BTRFS_I(old_dir)->root; | ||
9420 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; | ||
9421 | struct inode *new_inode = new_dentry->d_inode; | ||
9422 | struct inode *old_inode = old_dentry->d_inode; | ||
9423 | struct timespec ctime = CURRENT_TIME; | ||
9424 | struct dentry *parent; | ||
9425 | u64 old_ino = btrfs_ino(old_inode); | ||
9426 | u64 new_ino = btrfs_ino(new_inode); | ||
9427 | u64 old_idx = 0; | ||
9428 | u64 new_idx = 0; | ||
9429 | u64 root_objectid; | ||
9430 | int ret; | ||
9431 | bool root_log_pinned = false; | ||
9432 | bool dest_log_pinned = false; | ||
9433 | |||
9434 | /* we only allow rename subvolume link between subvolumes */ | ||
9435 | if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) | ||
9436 | return -EXDEV; | ||
9437 | |||
9438 | /* close the race window with snapshot create/destroy ioctl */ | ||
9439 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
9440 | down_read(&root->fs_info->subvol_sem); | ||
9441 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
9442 | down_read(&dest->fs_info->subvol_sem); | ||
9443 | |||
9444 | /* | ||
9445 | * We want to reserve the absolute worst case amount of items. So if | ||
9446 | * both inodes are subvols and we need to unlink them then that would | ||
9447 | * require 4 item modifications, but if they are both normal inodes it | ||
9448 | * would require 5 item modifications, so we'll assume their normal | ||
9449 | * inodes. So 5 * 2 is 10, plus 2 for the new links, so 12 total items | ||
9450 | * should cover the worst case number of items we'll modify. | ||
9451 | */ | ||
9452 | trans = btrfs_start_transaction(root, 12); | ||
9453 | if (IS_ERR(trans)) { | ||
9454 | ret = PTR_ERR(trans); | ||
9455 | goto out_notrans; | ||
9456 | } | ||
9457 | |||
9458 | /* | ||
9459 | * We need to find a free sequence number both in the source and | ||
9460 | * in the destination directory for the exchange. | ||
9461 | */ | ||
9462 | ret = btrfs_set_inode_index(new_dir, &old_idx); | ||
9463 | if (ret) | ||
9464 | goto out_fail; | ||
9465 | ret = btrfs_set_inode_index(old_dir, &new_idx); | ||
9466 | if (ret) | ||
9467 | goto out_fail; | ||
9468 | |||
9469 | BTRFS_I(old_inode)->dir_index = 0ULL; | ||
9470 | BTRFS_I(new_inode)->dir_index = 0ULL; | ||
9471 | |||
9472 | /* Reference for the source. */ | ||
9473 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
9474 | /* force full log commit if subvolume involved. */ | ||
9475 | btrfs_set_log_full_commit(root->fs_info, trans); | ||
9476 | } else { | ||
9477 | btrfs_pin_log_trans(root); | ||
9478 | root_log_pinned = true; | ||
9479 | ret = btrfs_insert_inode_ref(trans, dest, | ||
9480 | new_dentry->d_name.name, | ||
9481 | new_dentry->d_name.len, | ||
9482 | old_ino, | ||
9483 | btrfs_ino(new_dir), old_idx); | ||
9484 | if (ret) | ||
9485 | goto out_fail; | ||
9486 | } | ||
9487 | |||
9488 | /* And now for the dest. */ | ||
9489 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
9490 | /* force full log commit if subvolume involved. */ | ||
9491 | btrfs_set_log_full_commit(dest->fs_info, trans); | ||
9492 | } else { | ||
9493 | btrfs_pin_log_trans(dest); | ||
9494 | dest_log_pinned = true; | ||
9495 | ret = btrfs_insert_inode_ref(trans, root, | ||
9496 | old_dentry->d_name.name, | ||
9497 | old_dentry->d_name.len, | ||
9498 | new_ino, | ||
9499 | btrfs_ino(old_dir), new_idx); | ||
9500 | if (ret) | ||
9501 | goto out_fail; | ||
9502 | } | ||
9503 | |||
9504 | /* Update inode version and ctime/mtime. */ | ||
9505 | inode_inc_iversion(old_dir); | ||
9506 | inode_inc_iversion(new_dir); | ||
9507 | inode_inc_iversion(old_inode); | ||
9508 | inode_inc_iversion(new_inode); | ||
9509 | old_dir->i_ctime = old_dir->i_mtime = ctime; | ||
9510 | new_dir->i_ctime = new_dir->i_mtime = ctime; | ||
9511 | old_inode->i_ctime = ctime; | ||
9512 | new_inode->i_ctime = ctime; | ||
9513 | |||
9514 | if (old_dentry->d_parent != new_dentry->d_parent) { | ||
9515 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); | ||
9516 | btrfs_record_unlink_dir(trans, new_dir, new_inode, 1); | ||
9517 | } | ||
9518 | |||
9519 | /* src is a subvolume */ | ||
9520 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
9521 | root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; | ||
9522 | ret = btrfs_unlink_subvol(trans, root, old_dir, | ||
9523 | root_objectid, | ||
9524 | old_dentry->d_name.name, | ||
9525 | old_dentry->d_name.len); | ||
9526 | } else { /* src is an inode */ | ||
9527 | ret = __btrfs_unlink_inode(trans, root, old_dir, | ||
9528 | old_dentry->d_inode, | ||
9529 | old_dentry->d_name.name, | ||
9530 | old_dentry->d_name.len); | ||
9531 | if (!ret) | ||
9532 | ret = btrfs_update_inode(trans, root, old_inode); | ||
9533 | } | ||
9534 | if (ret) { | ||
9535 | btrfs_abort_transaction(trans, root, ret); | ||
9536 | goto out_fail; | ||
9537 | } | ||
9538 | |||
9539 | /* dest is a subvolume */ | ||
9540 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
9541 | root_objectid = BTRFS_I(new_inode)->root->root_key.objectid; | ||
9542 | ret = btrfs_unlink_subvol(trans, dest, new_dir, | ||
9543 | root_objectid, | ||
9544 | new_dentry->d_name.name, | ||
9545 | new_dentry->d_name.len); | ||
9546 | } else { /* dest is an inode */ | ||
9547 | ret = __btrfs_unlink_inode(trans, dest, new_dir, | ||
9548 | new_dentry->d_inode, | ||
9549 | new_dentry->d_name.name, | ||
9550 | new_dentry->d_name.len); | ||
9551 | if (!ret) | ||
9552 | ret = btrfs_update_inode(trans, dest, new_inode); | ||
9553 | } | ||
9554 | if (ret) { | ||
9555 | btrfs_abort_transaction(trans, root, ret); | ||
9556 | goto out_fail; | ||
9557 | } | ||
9558 | |||
9559 | ret = btrfs_add_link(trans, new_dir, old_inode, | ||
9560 | new_dentry->d_name.name, | ||
9561 | new_dentry->d_name.len, 0, old_idx); | ||
9562 | if (ret) { | ||
9563 | btrfs_abort_transaction(trans, root, ret); | ||
9564 | goto out_fail; | ||
9565 | } | ||
9566 | |||
9567 | ret = btrfs_add_link(trans, old_dir, new_inode, | ||
9568 | old_dentry->d_name.name, | ||
9569 | old_dentry->d_name.len, 0, new_idx); | ||
9570 | if (ret) { | ||
9571 | btrfs_abort_transaction(trans, root, ret); | ||
9572 | goto out_fail; | ||
9573 | } | ||
9574 | |||
9575 | if (old_inode->i_nlink == 1) | ||
9576 | BTRFS_I(old_inode)->dir_index = old_idx; | ||
9577 | if (new_inode->i_nlink == 1) | ||
9578 | BTRFS_I(new_inode)->dir_index = new_idx; | ||
9579 | |||
9580 | if (root_log_pinned) { | ||
9581 | parent = new_dentry->d_parent; | ||
9582 | btrfs_log_new_name(trans, old_inode, old_dir, parent); | ||
9583 | btrfs_end_log_trans(root); | ||
9584 | root_log_pinned = false; | ||
9585 | } | ||
9586 | if (dest_log_pinned) { | ||
9587 | parent = old_dentry->d_parent; | ||
9588 | btrfs_log_new_name(trans, new_inode, new_dir, parent); | ||
9589 | btrfs_end_log_trans(dest); | ||
9590 | dest_log_pinned = false; | ||
9591 | } | ||
9592 | out_fail: | ||
9593 | /* | ||
9594 | * If we have pinned a log and an error happened, we unpin tasks | ||
9595 | * trying to sync the log and force them to fallback to a transaction | ||
9596 | * commit if the log currently contains any of the inodes involved in | ||
9597 | * this rename operation (to ensure we do not persist a log with an | ||
9598 | * inconsistent state for any of these inodes or leading to any | ||
9599 | * inconsistencies when replayed). If the transaction was aborted, the | ||
9600 | * abortion reason is propagated to userspace when attempting to commit | ||
9601 | * the transaction. If the log does not contain any of these inodes, we | ||
9602 | * allow the tasks to sync it. | ||
9603 | */ | ||
9604 | if (ret && (root_log_pinned || dest_log_pinned)) { | ||
9605 | if (btrfs_inode_in_log(old_dir, root->fs_info->generation) || | ||
9606 | btrfs_inode_in_log(new_dir, root->fs_info->generation) || | ||
9607 | btrfs_inode_in_log(old_inode, root->fs_info->generation) || | ||
9608 | (new_inode && | ||
9609 | btrfs_inode_in_log(new_inode, root->fs_info->generation))) | ||
9610 | btrfs_set_log_full_commit(root->fs_info, trans); | ||
9611 | |||
9612 | if (root_log_pinned) { | ||
9613 | btrfs_end_log_trans(root); | ||
9614 | root_log_pinned = false; | ||
9615 | } | ||
9616 | if (dest_log_pinned) { | ||
9617 | btrfs_end_log_trans(dest); | ||
9618 | dest_log_pinned = false; | ||
9619 | } | ||
9620 | } | ||
9621 | ret = btrfs_end_transaction(trans, root); | ||
9622 | out_notrans: | ||
9623 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
9624 | up_read(&dest->fs_info->subvol_sem); | ||
9625 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
9626 | up_read(&root->fs_info->subvol_sem); | ||
9627 | |||
9628 | return ret; | ||
9629 | } | ||
9630 | |||
9631 | static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans, | ||
9632 | struct btrfs_root *root, | ||
9633 | struct inode *dir, | ||
9634 | struct dentry *dentry) | ||
9635 | { | ||
9636 | int ret; | ||
9637 | struct inode *inode; | ||
9638 | u64 objectid; | ||
9639 | u64 index; | ||
9640 | |||
9641 | ret = btrfs_find_free_ino(root, &objectid); | ||
9642 | if (ret) | ||
9643 | return ret; | ||
9644 | |||
9645 | inode = btrfs_new_inode(trans, root, dir, | ||
9646 | dentry->d_name.name, | ||
9647 | dentry->d_name.len, | ||
9648 | btrfs_ino(dir), | ||
9649 | objectid, | ||
9650 | S_IFCHR | WHITEOUT_MODE, | ||
9651 | &index); | ||
9652 | |||
9653 | if (IS_ERR(inode)) { | ||
9654 | ret = PTR_ERR(inode); | ||
9655 | return ret; | ||
9656 | } | ||
9657 | |||
9658 | inode->i_op = &btrfs_special_inode_operations; | ||
9659 | init_special_inode(inode, inode->i_mode, | ||
9660 | WHITEOUT_DEV); | ||
9661 | |||
9662 | ret = btrfs_init_inode_security(trans, inode, dir, | ||
9663 | &dentry->d_name); | ||
9664 | if (ret) | ||
9665 | goto out; | ||
9666 | |||
9667 | ret = btrfs_add_nondir(trans, dir, dentry, | ||
9668 | inode, 0, index); | ||
9669 | if (ret) | ||
9670 | goto out; | ||
9671 | |||
9672 | ret = btrfs_update_inode(trans, root, inode); | ||
9673 | out: | ||
9674 | unlock_new_inode(inode); | ||
9675 | if (ret) | ||
9676 | inode_dec_link_count(inode); | ||
9677 | iput(inode); | ||
9678 | |||
9679 | return ret; | ||
9680 | } | ||
9681 | |||
9390 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | 9682 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
9391 | struct inode *new_dir, struct dentry *new_dentry) | 9683 | struct inode *new_dir, struct dentry *new_dentry, |
9684 | unsigned int flags) | ||
9392 | { | 9685 | { |
9393 | struct btrfs_trans_handle *trans; | 9686 | struct btrfs_trans_handle *trans; |
9687 | unsigned int trans_num_items; | ||
9394 | struct btrfs_root *root = BTRFS_I(old_dir)->root; | 9688 | struct btrfs_root *root = BTRFS_I(old_dir)->root; |
9395 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; | 9689 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; |
9396 | struct inode *new_inode = d_inode(new_dentry); | 9690 | struct inode *new_inode = d_inode(new_dentry); |
@@ -9399,6 +9693,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9399 | u64 root_objectid; | 9693 | u64 root_objectid; |
9400 | int ret; | 9694 | int ret; |
9401 | u64 old_ino = btrfs_ino(old_inode); | 9695 | u64 old_ino = btrfs_ino(old_inode); |
9696 | bool log_pinned = false; | ||
9402 | 9697 | ||
9403 | if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) | 9698 | if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
9404 | return -EPERM; | 9699 | return -EPERM; |
@@ -9449,15 +9744,21 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9449 | * We want to reserve the absolute worst case amount of items. So if | 9744 | * We want to reserve the absolute worst case amount of items. So if |
9450 | * both inodes are subvols and we need to unlink them then that would | 9745 | * both inodes are subvols and we need to unlink them then that would |
9451 | * require 4 item modifications, but if they are both normal inodes it | 9746 | * require 4 item modifications, but if they are both normal inodes it |
9452 | * would require 5 item modifications, so we'll assume their normal | 9747 | * would require 5 item modifications, so we'll assume they are normal |
9453 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | 9748 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items |
9454 | * should cover the worst case number of items we'll modify. | 9749 | * should cover the worst case number of items we'll modify. |
9750 | * If our rename has the whiteout flag, we need more 5 units for the | ||
9751 | * new inode (1 inode item, 1 inode ref, 2 dir items and 1 xattr item | ||
9752 | * when selinux is enabled). | ||
9455 | */ | 9753 | */ |
9456 | trans = btrfs_start_transaction(root, 11); | 9754 | trans_num_items = 11; |
9755 | if (flags & RENAME_WHITEOUT) | ||
9756 | trans_num_items += 5; | ||
9757 | trans = btrfs_start_transaction(root, trans_num_items); | ||
9457 | if (IS_ERR(trans)) { | 9758 | if (IS_ERR(trans)) { |
9458 | ret = PTR_ERR(trans); | 9759 | ret = PTR_ERR(trans); |
9459 | goto out_notrans; | 9760 | goto out_notrans; |
9460 | } | 9761 | } |
9461 | 9762 | ||
9462 | if (dest != root) | 9763 | if (dest != root) |
9463 | btrfs_record_root_in_trans(trans, dest); | 9764 | btrfs_record_root_in_trans(trans, dest); |
@@ -9471,6 +9772,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9471 | /* force full log commit if subvolume involved. */ | 9772 | /* force full log commit if subvolume involved. */ |
9472 | btrfs_set_log_full_commit(root->fs_info, trans); | 9773 | btrfs_set_log_full_commit(root->fs_info, trans); |
9473 | } else { | 9774 | } else { |
9775 | btrfs_pin_log_trans(root); | ||
9776 | log_pinned = true; | ||
9474 | ret = btrfs_insert_inode_ref(trans, dest, | 9777 | ret = btrfs_insert_inode_ref(trans, dest, |
9475 | new_dentry->d_name.name, | 9778 | new_dentry->d_name.name, |
9476 | new_dentry->d_name.len, | 9779 | new_dentry->d_name.len, |
@@ -9478,14 +9781,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9478 | btrfs_ino(new_dir), index); | 9781 | btrfs_ino(new_dir), index); |
9479 | if (ret) | 9782 | if (ret) |
9480 | goto out_fail; | 9783 | goto out_fail; |
9481 | /* | ||
9482 | * this is an ugly little race, but the rename is required | ||
9483 | * to make sure that if we crash, the inode is either at the | ||
9484 | * old name or the new one. pinning the log transaction lets | ||
9485 | * us make sure we don't allow a log commit to come in after | ||
9486 | * we unlink the name but before we add the new name back in. | ||
9487 | */ | ||
9488 | btrfs_pin_log_trans(root); | ||
9489 | } | 9784 | } |
9490 | 9785 | ||
9491 | inode_inc_iversion(old_dir); | 9786 | inode_inc_iversion(old_dir); |
@@ -9552,12 +9847,46 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9552 | if (old_inode->i_nlink == 1) | 9847 | if (old_inode->i_nlink == 1) |
9553 | BTRFS_I(old_inode)->dir_index = index; | 9848 | BTRFS_I(old_inode)->dir_index = index; |
9554 | 9849 | ||
9555 | if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { | 9850 | if (log_pinned) { |
9556 | struct dentry *parent = new_dentry->d_parent; | 9851 | struct dentry *parent = new_dentry->d_parent; |
9852 | |||
9557 | btrfs_log_new_name(trans, old_inode, old_dir, parent); | 9853 | btrfs_log_new_name(trans, old_inode, old_dir, parent); |
9558 | btrfs_end_log_trans(root); | 9854 | btrfs_end_log_trans(root); |
9855 | log_pinned = false; | ||
9856 | } | ||
9857 | |||
9858 | if (flags & RENAME_WHITEOUT) { | ||
9859 | ret = btrfs_whiteout_for_rename(trans, root, old_dir, | ||
9860 | old_dentry); | ||
9861 | |||
9862 | if (ret) { | ||
9863 | btrfs_abort_transaction(trans, root, ret); | ||
9864 | goto out_fail; | ||
9865 | } | ||
9559 | } | 9866 | } |
9560 | out_fail: | 9867 | out_fail: |
9868 | /* | ||
9869 | * If we have pinned the log and an error happened, we unpin tasks | ||
9870 | * trying to sync the log and force them to fallback to a transaction | ||
9871 | * commit if the log currently contains any of the inodes involved in | ||
9872 | * this rename operation (to ensure we do not persist a log with an | ||
9873 | * inconsistent state for any of these inodes or leading to any | ||
9874 | * inconsistencies when replayed). If the transaction was aborted, the | ||
9875 | * abortion reason is propagated to userspace when attempting to commit | ||
9876 | * the transaction. If the log does not contain any of these inodes, we | ||
9877 | * allow the tasks to sync it. | ||
9878 | */ | ||
9879 | if (ret && log_pinned) { | ||
9880 | if (btrfs_inode_in_log(old_dir, root->fs_info->generation) || | ||
9881 | btrfs_inode_in_log(new_dir, root->fs_info->generation) || | ||
9882 | btrfs_inode_in_log(old_inode, root->fs_info->generation) || | ||
9883 | (new_inode && | ||
9884 | btrfs_inode_in_log(new_inode, root->fs_info->generation))) | ||
9885 | btrfs_set_log_full_commit(root->fs_info, trans); | ||
9886 | |||
9887 | btrfs_end_log_trans(root); | ||
9888 | log_pinned = false; | ||
9889 | } | ||
9561 | btrfs_end_transaction(trans, root); | 9890 | btrfs_end_transaction(trans, root); |
9562 | out_notrans: | 9891 | out_notrans: |
9563 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) | 9892 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) |
@@ -9570,10 +9899,14 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry, | |||
9570 | struct inode *new_dir, struct dentry *new_dentry, | 9899 | struct inode *new_dir, struct dentry *new_dentry, |
9571 | unsigned int flags) | 9900 | unsigned int flags) |
9572 | { | 9901 | { |
9573 | if (flags & ~RENAME_NOREPLACE) | 9902 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
9574 | return -EINVAL; | 9903 | return -EINVAL; |
9575 | 9904 | ||
9576 | return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry); | 9905 | if (flags & RENAME_EXCHANGE) |
9906 | return btrfs_rename_exchange(old_dir, old_dentry, new_dir, | ||
9907 | new_dentry); | ||
9908 | |||
9909 | return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags); | ||
9577 | } | 9910 | } |
9578 | 9911 | ||
9579 | static void btrfs_run_delalloc_work(struct btrfs_work *work) | 9912 | static void btrfs_run_delalloc_work(struct btrfs_work *work) |
@@ -9942,6 +10275,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
9942 | btrfs_end_transaction(trans, root); | 10275 | btrfs_end_transaction(trans, root); |
9943 | break; | 10276 | break; |
9944 | } | 10277 | } |
10278 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); | ||
9945 | 10279 | ||
9946 | last_alloc = ins.offset; | 10280 | last_alloc = ins.offset; |
9947 | ret = insert_reserved_file_extent(trans, inode, | 10281 | ret = insert_reserved_file_extent(trans, inode, |
@@ -10184,7 +10518,7 @@ static const struct file_operations btrfs_dir_file_operations = { | |||
10184 | .iterate = btrfs_real_readdir, | 10518 | .iterate = btrfs_real_readdir, |
10185 | .unlocked_ioctl = btrfs_ioctl, | 10519 | .unlocked_ioctl = btrfs_ioctl, |
10186 | #ifdef CONFIG_COMPAT | 10520 | #ifdef CONFIG_COMPAT |
10187 | .compat_ioctl = btrfs_ioctl, | 10521 | .compat_ioctl = btrfs_compat_ioctl, |
10188 | #endif | 10522 | #endif |
10189 | .release = btrfs_release_file, | 10523 | .release = btrfs_release_file, |
10190 | .fsync = btrfs_sync_file, | 10524 | .fsync = btrfs_sync_file, |