diff options
author | Sage Weil <sage@newdream.net> | 2008-11-12 14:32:25 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-11-12 14:32:25 -0500 |
commit | c5c9cd4d1b827fe545ed2a945e91e3a6909f3886 (patch) | |
tree | 5dae28d8cd871952b105cdc2822ef4e54f1f02f3 /fs/btrfs/ioctl.c | |
parent | 2ed6d66408527be0d1c6131d44cec7e86008ba26 (diff) |
Btrfs: allow clone of an arbitrary file range
This patch adds an additional CLONE_RANGE ioctl to clone an arbitrary
(block-aligned) file range to another file. The original CLONE ioctl
becomes a special case of cloning the entire file range. The logic is a
bit more complex now since ranges may be cloned to different offsets, and
because we may only be cloning the beginning or end of a particular extent
or checksum item.
An additional sanity check ensures the source and destination files aren't
the same (which would previously deadlock), although eventually this could
be extended to allow the duplication of file data at a different offset
within the same file.
Any extents within the destination range in the target file are dropped.
We currently do not cope with the case where a compressed inline extent
needs to be split. This will probably require decompressing the extent
into a temporary address_space, and inserting just the cloned portion as a
new compressed inline extent. For now, just return -EINVAL in this case.
Note that this never comes up in the more common case of cloning an entire
file.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ioctl.c')
-rw-r--r-- | fs/btrfs/ioctl.c | 253 |
1 files changed, 212 insertions, 41 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9ff2b4e0e922..4d7cc7c504d0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -592,7 +592,8 @@ out: | |||
592 | return ret; | 592 | return ret; |
593 | } | 593 | } |
594 | 594 | ||
595 | long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) | 595 | long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off, |
596 | u64 olen, u64 destoff) | ||
596 | { | 597 | { |
597 | struct inode *inode = fdentry(file)->d_inode; | 598 | struct inode *inode = fdentry(file)->d_inode; |
598 | struct btrfs_root *root = BTRFS_I(inode)->root; | 599 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -606,12 +607,29 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) | |||
606 | u32 nritems; | 607 | u32 nritems; |
607 | int slot; | 608 | int slot; |
608 | int ret; | 609 | int ret; |
610 | u64 len = olen; | ||
611 | u64 bs = root->fs_info->sb->s_blocksize; | ||
612 | u64 hint_byte; | ||
609 | 613 | ||
610 | src_file = fget(src_fd); | 614 | /* |
615 | * TODO: | ||
616 | * - split compressed inline extents. annoying: we need to | ||
617 | * decompress into destination's address_space (the file offset | ||
618 | * may change, so source mapping won't do), then recompress (or | ||
619 | * otherwise reinsert) a subrange. | ||
620 | * - allow ranges within the same file to be cloned (provided | ||
621 | * they don't overlap)? | ||
622 | */ | ||
623 | |||
624 | src_file = fget(srcfd); | ||
611 | if (!src_file) | 625 | if (!src_file) |
612 | return -EBADF; | 626 | return -EBADF; |
613 | src = src_file->f_dentry->d_inode; | 627 | src = src_file->f_dentry->d_inode; |
614 | 628 | ||
629 | ret = -EINVAL; | ||
630 | if (src == inode) | ||
631 | goto out_fput; | ||
632 | |||
615 | ret = -EISDIR; | 633 | ret = -EISDIR; |
616 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) | 634 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) |
617 | goto out_fput; | 635 | goto out_fput; |
@@ -640,27 +658,46 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) | |||
640 | mutex_lock(&inode->i_mutex); | 658 | mutex_lock(&inode->i_mutex); |
641 | } | 659 | } |
642 | 660 | ||
643 | ret = -ENOTEMPTY; | 661 | /* determine range to clone */ |
644 | if (inode->i_size) | 662 | ret = -EINVAL; |
663 | if (off >= src->i_size || off + len > src->i_size) | ||
645 | goto out_unlock; | 664 | goto out_unlock; |
665 | if (len == 0) | ||
666 | olen = len = src->i_size - off; | ||
667 | /* if we extend to eof, continue to block boundary */ | ||
668 | if (off + len == src->i_size) | ||
669 | len = ((src->i_size + bs-1) & ~(bs-1)) | ||
670 | - off; | ||
671 | |||
672 | /* verify the end result is block aligned */ | ||
673 | if ((off & (bs-1)) || | ||
674 | ((off + len) & (bs-1))) | ||
675 | goto out_unlock; | ||
676 | |||
677 | printk("final src extent is %llu~%llu\n", off, len); | ||
678 | printk("final dst extent is %llu~%llu\n", destoff, len); | ||
646 | 679 | ||
647 | /* do any pending delalloc/csum calc on src, one way or | 680 | /* do any pending delalloc/csum calc on src, one way or |
648 | another, and lock file content */ | 681 | another, and lock file content */ |
649 | while (1) { | 682 | while (1) { |
650 | struct btrfs_ordered_extent *ordered; | 683 | struct btrfs_ordered_extent *ordered; |
651 | lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); | 684 | lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
652 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 685 | ordered = btrfs_lookup_first_ordered_extent(inode, off+len); |
653 | if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) | 686 | if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) |
654 | break; | 687 | break; |
655 | unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); | 688 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
656 | if (ordered) | 689 | if (ordered) |
657 | btrfs_put_ordered_extent(ordered); | 690 | btrfs_put_ordered_extent(ordered); |
658 | btrfs_wait_ordered_range(src, 0, (u64)-1); | 691 | btrfs_wait_ordered_range(src, off, off+len); |
659 | } | 692 | } |
660 | 693 | ||
661 | trans = btrfs_start_transaction(root, 1); | 694 | trans = btrfs_start_transaction(root, 1); |
662 | BUG_ON(!trans); | 695 | BUG_ON(!trans); |
663 | 696 | ||
697 | /* punch hole in destination first */ | ||
698 | btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte); | ||
699 | |||
700 | /* clone data */ | ||
664 | key.objectid = src->i_ino; | 701 | key.objectid = src->i_ino; |
665 | key.type = BTRFS_EXTENT_DATA_KEY; | 702 | key.type = BTRFS_EXTENT_DATA_KEY; |
666 | key.offset = 0; | 703 | key.offset = 0; |
@@ -691,56 +728,178 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) | |||
691 | key.objectid != src->i_ino) | 728 | key.objectid != src->i_ino) |
692 | break; | 729 | break; |
693 | 730 | ||
694 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY || | 731 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { |
695 | btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { | 732 | struct btrfs_file_extent_item *extent; |
733 | int type; | ||
696 | u32 size; | 734 | u32 size; |
697 | struct btrfs_key new_key; | 735 | struct btrfs_key new_key; |
736 | u64 disko = 0, diskl = 0; | ||
737 | u64 datao = 0, datal = 0; | ||
738 | u8 comp; | ||
698 | 739 | ||
699 | size = btrfs_item_size_nr(leaf, slot); | 740 | size = btrfs_item_size_nr(leaf, slot); |
700 | read_extent_buffer(leaf, buf, | 741 | read_extent_buffer(leaf, buf, |
701 | btrfs_item_ptr_offset(leaf, slot), | 742 | btrfs_item_ptr_offset(leaf, slot), |
702 | size); | 743 | size); |
744 | |||
745 | extent = btrfs_item_ptr(leaf, slot, | ||
746 | struct btrfs_file_extent_item); | ||
747 | comp = btrfs_file_extent_compression(leaf, extent); | ||
748 | type = btrfs_file_extent_type(leaf, extent); | ||
749 | if (type == BTRFS_FILE_EXTENT_REG) { | ||
750 | disko = btrfs_file_extent_disk_bytenr(leaf, extent); | ||
751 | diskl = btrfs_file_extent_disk_num_bytes(leaf, extent); | ||
752 | datao = btrfs_file_extent_offset(leaf, extent); | ||
753 | datal = btrfs_file_extent_num_bytes(leaf, extent); | ||
754 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { | ||
755 | /* take upper bound, may be compressed */ | ||
756 | datal = btrfs_file_extent_ram_bytes(leaf, | ||
757 | extent); | ||
758 | } | ||
703 | btrfs_release_path(root, path); | 759 | btrfs_release_path(root, path); |
704 | 760 | ||
761 | if (key.offset + datal < off || | ||
762 | key.offset >= off+len) | ||
763 | goto next; | ||
764 | |||
705 | memcpy(&new_key, &key, sizeof(new_key)); | 765 | memcpy(&new_key, &key, sizeof(new_key)); |
706 | new_key.objectid = inode->i_ino; | 766 | new_key.objectid = inode->i_ino; |
707 | ret = btrfs_insert_empty_item(trans, root, path, | 767 | new_key.offset = key.offset + destoff - off; |
708 | &new_key, size); | ||
709 | if (ret) | ||
710 | goto out; | ||
711 | 768 | ||
712 | leaf = path->nodes[0]; | 769 | if (type == BTRFS_FILE_EXTENT_REG) { |
713 | slot = path->slots[0]; | 770 | ret = btrfs_insert_empty_item(trans, root, path, |
714 | write_extent_buffer(leaf, buf, | 771 | &new_key, size); |
772 | if (ret) | ||
773 | goto out; | ||
774 | |||
775 | leaf = path->nodes[0]; | ||
776 | slot = path->slots[0]; | ||
777 | write_extent_buffer(leaf, buf, | ||
715 | btrfs_item_ptr_offset(leaf, slot), | 778 | btrfs_item_ptr_offset(leaf, slot), |
716 | size); | 779 | size); |
717 | btrfs_mark_buffer_dirty(leaf); | ||
718 | } | ||
719 | |||
720 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { | ||
721 | struct btrfs_file_extent_item *extent; | ||
722 | int found_type; | ||
723 | 780 | ||
724 | extent = btrfs_item_ptr(leaf, slot, | 781 | extent = btrfs_item_ptr(leaf, slot, |
725 | struct btrfs_file_extent_item); | 782 | struct btrfs_file_extent_item); |
726 | found_type = btrfs_file_extent_type(leaf, extent); | 783 | printk(" orig disk %llu~%llu data %llu~%llu\n", |
727 | if (found_type == BTRFS_FILE_EXTENT_REG || | 784 | disko, diskl, datao, datal); |
728 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 785 | |
729 | u64 ds = btrfs_file_extent_disk_bytenr(leaf, | 786 | if (off > key.offset) { |
730 | extent); | 787 | datao += off - key.offset; |
731 | u64 dl = btrfs_file_extent_disk_num_bytes(leaf, | 788 | datal -= off - key.offset; |
732 | extent); | 789 | } |
733 | /* ds == 0 means there's a hole */ | 790 | if (key.offset + datao + datal + key.offset > |
734 | if (ds != 0) { | 791 | off + len) |
792 | datal = off + len - key.offset - datao; | ||
793 | /* disko == 0 means it's a hole */ | ||
794 | if (!disko) | ||
795 | datao = 0; | ||
796 | printk(" final disk %llu~%llu data %llu~%llu\n", | ||
797 | disko, diskl, datao, datal); | ||
798 | |||
799 | btrfs_set_file_extent_offset(leaf, extent, | ||
800 | datao); | ||
801 | btrfs_set_file_extent_num_bytes(leaf, extent, | ||
802 | datal); | ||
803 | if (disko) { | ||
804 | inode_add_bytes(inode, datal); | ||
735 | ret = btrfs_inc_extent_ref(trans, root, | 805 | ret = btrfs_inc_extent_ref(trans, root, |
736 | ds, dl, leaf->start, | 806 | disko, diskl, leaf->start, |
737 | root->root_key.objectid, | 807 | root->root_key.objectid, |
738 | trans->transid, | 808 | trans->transid, |
739 | inode->i_ino); | 809 | inode->i_ino); |
740 | BUG_ON(ret); | 810 | BUG_ON(ret); |
741 | } | 811 | } |
812 | } else if (type == BTRFS_FILE_EXTENT_INLINE) { | ||
813 | u64 skip = 0; | ||
814 | u64 trim = 0; | ||
815 | if (off > key.offset) { | ||
816 | skip = off - key.offset; | ||
817 | new_key.offset += skip; | ||
818 | } | ||
819 | if (key.offset + datal > off+len) | ||
820 | trim = key.offset + datal - (off+len); | ||
821 | printk("len %lld skip %lld trim %lld\n", | ||
822 | datal, skip, trim); | ||
823 | if (comp && (skip || trim)) { | ||
824 | printk("btrfs clone_range can't split compressed inline extents yet\n"); | ||
825 | ret = -EINVAL; | ||
826 | goto out; | ||
827 | } | ||
828 | size -= skip + trim; | ||
829 | datal -= skip + trim; | ||
830 | ret = btrfs_insert_empty_item(trans, root, path, | ||
831 | &new_key, size); | ||
832 | if (ret) | ||
833 | goto out; | ||
834 | |||
835 | if (skip) { | ||
836 | u32 start = btrfs_file_extent_calc_inline_size(0); | ||
837 | memmove(buf+start, buf+start+skip, | ||
838 | datal); | ||
839 | } | ||
840 | |||
841 | leaf = path->nodes[0]; | ||
842 | slot = path->slots[0]; | ||
843 | write_extent_buffer(leaf, buf, | ||
844 | btrfs_item_ptr_offset(leaf, slot), | ||
845 | size); | ||
846 | inode_add_bytes(inode, datal); | ||
742 | } | 847 | } |
848 | |||
849 | btrfs_mark_buffer_dirty(leaf); | ||
743 | } | 850 | } |
851 | |||
852 | if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { | ||
853 | u32 size; | ||
854 | struct btrfs_key new_key; | ||
855 | u64 coverslen; | ||
856 | int coff, clen; | ||
857 | |||
858 | size = btrfs_item_size_nr(leaf, slot); | ||
859 | coverslen = (size / BTRFS_CRC32_SIZE) << | ||
860 | root->fs_info->sb->s_blocksize_bits; | ||
861 | printk("csums for %llu~%llu\n", | ||
862 | key.offset, coverslen); | ||
863 | if (key.offset + coverslen < off || | ||
864 | key.offset >= off+len) | ||
865 | goto next; | ||
866 | |||
867 | read_extent_buffer(leaf, buf, | ||
868 | btrfs_item_ptr_offset(leaf, slot), | ||
869 | size); | ||
870 | btrfs_release_path(root, path); | ||
871 | |||
872 | coff = 0; | ||
873 | if (off > key.offset) | ||
874 | coff = ((off - key.offset) >> | ||
875 | root->fs_info->sb->s_blocksize_bits) * | ||
876 | BTRFS_CRC32_SIZE; | ||
877 | clen = size - coff; | ||
878 | if (key.offset + coverslen > off+len) | ||
879 | clen -= ((key.offset+coverslen-off-len) >> | ||
880 | root->fs_info->sb->s_blocksize_bits) * | ||
881 | BTRFS_CRC32_SIZE; | ||
882 | printk(" will dup %d~%d of %d\n", | ||
883 | coff, clen, size); | ||
884 | |||
885 | memcpy(&new_key, &key, sizeof(new_key)); | ||
886 | new_key.objectid = inode->i_ino; | ||
887 | new_key.offset = key.offset + destoff - off; | ||
888 | |||
889 | ret = btrfs_insert_empty_item(trans, root, path, | ||
890 | &new_key, clen); | ||
891 | if (ret) | ||
892 | goto out; | ||
893 | |||
894 | leaf = path->nodes[0]; | ||
895 | slot = path->slots[0]; | ||
896 | write_extent_buffer(leaf, buf + coff, | ||
897 | btrfs_item_ptr_offset(leaf, slot), | ||
898 | clen); | ||
899 | btrfs_mark_buffer_dirty(leaf); | ||
900 | } | ||
901 | |||
902 | next: | ||
744 | btrfs_release_path(root, path); | 903 | btrfs_release_path(root, path); |
745 | key.offset++; | 904 | key.offset++; |
746 | } | 905 | } |
@@ -749,13 +908,13 @@ out: | |||
749 | btrfs_release_path(root, path); | 908 | btrfs_release_path(root, path); |
750 | if (ret == 0) { | 909 | if (ret == 0) { |
751 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 910 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
752 | inode_set_bytes(inode, inode_get_bytes(src)); | 911 | if (destoff + olen > inode->i_size) |
753 | btrfs_i_size_write(inode, src->i_size); | 912 | btrfs_i_size_write(inode, destoff + olen); |
754 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | 913 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; |
755 | ret = btrfs_update_inode(trans, root, inode); | 914 | ret = btrfs_update_inode(trans, root, inode); |
756 | } | 915 | } |
757 | btrfs_end_transaction(trans, root); | 916 | btrfs_end_transaction(trans, root); |
758 | unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); | 917 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
759 | if (ret) | 918 | if (ret) |
760 | vmtruncate(inode, 0); | 919 | vmtruncate(inode, 0); |
761 | out_unlock: | 920 | out_unlock: |
@@ -768,6 +927,16 @@ out_fput: | |||
768 | return ret; | 927 | return ret; |
769 | } | 928 | } |
770 | 929 | ||
930 | long btrfs_ioctl_clone_range(struct file *file, unsigned long argptr) | ||
931 | { | ||
932 | struct btrfs_ioctl_clone_range_args args; | ||
933 | |||
934 | if (copy_from_user(&args, (void *)argptr, sizeof(args))) | ||
935 | return -EFAULT; | ||
936 | return btrfs_ioctl_clone(file, args.src_fd, args.src_offset, | ||
937 | args.src_length, args.dest_offset); | ||
938 | } | ||
939 | |||
771 | /* | 940 | /* |
772 | * there are many ways the trans_start and trans_end ioctls can lead | 941 | * there are many ways the trans_start and trans_end ioctls can lead |
773 | * to deadlocks. They should only be used by applications that | 942 | * to deadlocks. They should only be used by applications that |
@@ -851,7 +1020,9 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
851 | case BTRFS_IOC_BALANCE: | 1020 | case BTRFS_IOC_BALANCE: |
852 | return btrfs_balance(root->fs_info->dev_root); | 1021 | return btrfs_balance(root->fs_info->dev_root); |
853 | case BTRFS_IOC_CLONE: | 1022 | case BTRFS_IOC_CLONE: |
854 | return btrfs_ioctl_clone(file, arg); | 1023 | return btrfs_ioctl_clone(file, arg, 0, 0, 0); |
1024 | case BTRFS_IOC_CLONE_RANGE: | ||
1025 | return btrfs_ioctl_clone_range(file, arg); | ||
855 | case BTRFS_IOC_TRANS_START: | 1026 | case BTRFS_IOC_TRANS_START: |
856 | return btrfs_ioctl_trans_start(file); | 1027 | return btrfs_ioctl_trans_start(file); |
857 | case BTRFS_IOC_TRANS_END: | 1028 | case BTRFS_IOC_TRANS_END: |