aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ioctl.c
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2008-11-12 14:32:25 -0500
committerChris Mason <chris.mason@oracle.com>2008-11-12 14:32:25 -0500
commitc5c9cd4d1b827fe545ed2a945e91e3a6909f3886 (patch)
tree5dae28d8cd871952b105cdc2822ef4e54f1f02f3 /fs/btrfs/ioctl.c
parent2ed6d66408527be0d1c6131d44cec7e86008ba26 (diff)
Btrfs: allow clone of an arbitrary file range
This patch adds an additional CLONE_RANGE ioctl to clone an arbitrary (block-aligned) file range to another file. The original CLONE ioctl becomes a special case of cloning the entire file range. The logic is a bit more complex now since ranges may be cloned to different offsets, and because we may only be cloning the beginning or end of a particular extent or checksum item. An additional sanity check ensures the source and destination files aren't the same (which would previously deadlock), although eventually this could be extended to allow the duplication of file data at a different offset within the same file. Any extents within the destination range in the target file are dropped. We currently do not cope with the case where a compressed inline extent needs to be split. This will probably require decompressing the extent into a temporary address_space, and inserting just the cloned portion as a new compressed inline extent. For now, just return -EINVAL in this case. Note that this never comes up in the more common case of cloning an entire file. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ioctl.c')
-rw-r--r--fs/btrfs/ioctl.c253
1 files changed, 212 insertions, 41 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9ff2b4e0e922..4d7cc7c504d0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -592,7 +592,8 @@ out:
592 return ret; 592 return ret;
593} 593}
594 594
595long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) 595long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off,
596 u64 olen, u64 destoff)
596{ 597{
597 struct inode *inode = fdentry(file)->d_inode; 598 struct inode *inode = fdentry(file)->d_inode;
598 struct btrfs_root *root = BTRFS_I(inode)->root; 599 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -606,12 +607,29 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd)
606 u32 nritems; 607 u32 nritems;
607 int slot; 608 int slot;
608 int ret; 609 int ret;
610 u64 len = olen;
611 u64 bs = root->fs_info->sb->s_blocksize;
612 u64 hint_byte;
609 613
610 src_file = fget(src_fd); 614 /*
615 * TODO:
616 * - split compressed inline extents. annoying: we need to
617 * decompress into destination's address_space (the file offset
618 * may change, so source mapping won't do), then recompress (or
619 * otherwise reinsert) a subrange.
620 * - allow ranges within the same file to be cloned (provided
621 * they don't overlap)?
622 */
623
624 src_file = fget(srcfd);
611 if (!src_file) 625 if (!src_file)
612 return -EBADF; 626 return -EBADF;
613 src = src_file->f_dentry->d_inode; 627 src = src_file->f_dentry->d_inode;
614 628
629 ret = -EINVAL;
630 if (src == inode)
631 goto out_fput;
632
615 ret = -EISDIR; 633 ret = -EISDIR;
616 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 634 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
617 goto out_fput; 635 goto out_fput;
@@ -640,27 +658,46 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd)
640 mutex_lock(&inode->i_mutex); 658 mutex_lock(&inode->i_mutex);
641 } 659 }
642 660
643 ret = -ENOTEMPTY; 661 /* determine range to clone */
644 if (inode->i_size) 662 ret = -EINVAL;
663 if (off >= src->i_size || off + len > src->i_size)
645 goto out_unlock; 664 goto out_unlock;
665 if (len == 0)
666 olen = len = src->i_size - off;
667 /* if we extend to eof, continue to block boundary */
668 if (off + len == src->i_size)
669 len = ((src->i_size + bs-1) & ~(bs-1))
670 - off;
671
672 /* verify the end result is block aligned */
673 if ((off & (bs-1)) ||
674 ((off + len) & (bs-1)))
675 goto out_unlock;
676
677 printk("final src extent is %llu~%llu\n", off, len);
678 printk("final dst extent is %llu~%llu\n", destoff, len);
646 679
647 /* do any pending delalloc/csum calc on src, one way or 680 /* do any pending delalloc/csum calc on src, one way or
648 another, and lock file content */ 681 another, and lock file content */
649 while (1) { 682 while (1) {
650 struct btrfs_ordered_extent *ordered; 683 struct btrfs_ordered_extent *ordered;
651 lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); 684 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
652 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); 685 ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
653 if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) 686 if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
654 break; 687 break;
655 unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); 688 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
656 if (ordered) 689 if (ordered)
657 btrfs_put_ordered_extent(ordered); 690 btrfs_put_ordered_extent(ordered);
658 btrfs_wait_ordered_range(src, 0, (u64)-1); 691 btrfs_wait_ordered_range(src, off, off+len);
659 } 692 }
660 693
661 trans = btrfs_start_transaction(root, 1); 694 trans = btrfs_start_transaction(root, 1);
662 BUG_ON(!trans); 695 BUG_ON(!trans);
663 696
697 /* punch hole in destination first */
698 btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte);
699
700 /* clone data */
664 key.objectid = src->i_ino; 701 key.objectid = src->i_ino;
665 key.type = BTRFS_EXTENT_DATA_KEY; 702 key.type = BTRFS_EXTENT_DATA_KEY;
666 key.offset = 0; 703 key.offset = 0;
@@ -691,56 +728,178 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd)
691 key.objectid != src->i_ino) 728 key.objectid != src->i_ino)
692 break; 729 break;
693 730
694 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY || 731 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
695 btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) { 732 struct btrfs_file_extent_item *extent;
733 int type;
696 u32 size; 734 u32 size;
697 struct btrfs_key new_key; 735 struct btrfs_key new_key;
736 u64 disko = 0, diskl = 0;
737 u64 datao = 0, datal = 0;
738 u8 comp;
698 739
699 size = btrfs_item_size_nr(leaf, slot); 740 size = btrfs_item_size_nr(leaf, slot);
700 read_extent_buffer(leaf, buf, 741 read_extent_buffer(leaf, buf,
701 btrfs_item_ptr_offset(leaf, slot), 742 btrfs_item_ptr_offset(leaf, slot),
702 size); 743 size);
744
745 extent = btrfs_item_ptr(leaf, slot,
746 struct btrfs_file_extent_item);
747 comp = btrfs_file_extent_compression(leaf, extent);
748 type = btrfs_file_extent_type(leaf, extent);
749 if (type == BTRFS_FILE_EXTENT_REG) {
750 disko = btrfs_file_extent_disk_bytenr(leaf, extent);
751 diskl = btrfs_file_extent_disk_num_bytes(leaf, extent);
752 datao = btrfs_file_extent_offset(leaf, extent);
753 datal = btrfs_file_extent_num_bytes(leaf, extent);
754 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
755 /* take upper bound, may be compressed */
756 datal = btrfs_file_extent_ram_bytes(leaf,
757 extent);
758 }
703 btrfs_release_path(root, path); 759 btrfs_release_path(root, path);
704 760
761 if (key.offset + datal < off ||
762 key.offset >= off+len)
763 goto next;
764
705 memcpy(&new_key, &key, sizeof(new_key)); 765 memcpy(&new_key, &key, sizeof(new_key));
706 new_key.objectid = inode->i_ino; 766 new_key.objectid = inode->i_ino;
707 ret = btrfs_insert_empty_item(trans, root, path, 767 new_key.offset = key.offset + destoff - off;
708 &new_key, size);
709 if (ret)
710 goto out;
711 768
712 leaf = path->nodes[0]; 769 if (type == BTRFS_FILE_EXTENT_REG) {
713 slot = path->slots[0]; 770 ret = btrfs_insert_empty_item(trans, root, path,
714 write_extent_buffer(leaf, buf, 771 &new_key, size);
772 if (ret)
773 goto out;
774
775 leaf = path->nodes[0];
776 slot = path->slots[0];
777 write_extent_buffer(leaf, buf,
715 btrfs_item_ptr_offset(leaf, slot), 778 btrfs_item_ptr_offset(leaf, slot),
716 size); 779 size);
717 btrfs_mark_buffer_dirty(leaf);
718 }
719
720 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
721 struct btrfs_file_extent_item *extent;
722 int found_type;
723 780
724 extent = btrfs_item_ptr(leaf, slot, 781 extent = btrfs_item_ptr(leaf, slot,
725 struct btrfs_file_extent_item); 782 struct btrfs_file_extent_item);
726 found_type = btrfs_file_extent_type(leaf, extent); 783 printk(" orig disk %llu~%llu data %llu~%llu\n",
727 if (found_type == BTRFS_FILE_EXTENT_REG || 784 disko, diskl, datao, datal);
728 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 785
729 u64 ds = btrfs_file_extent_disk_bytenr(leaf, 786 if (off > key.offset) {
730 extent); 787 datao += off - key.offset;
731 u64 dl = btrfs_file_extent_disk_num_bytes(leaf, 788 datal -= off - key.offset;
732 extent); 789 }
733 /* ds == 0 means there's a hole */ 790 if (key.offset + datao + datal + key.offset >
734 if (ds != 0) { 791 off + len)
792 datal = off + len - key.offset - datao;
793 /* disko == 0 means it's a hole */
794 if (!disko)
795 datao = 0;
796 printk(" final disk %llu~%llu data %llu~%llu\n",
797 disko, diskl, datao, datal);
798
799 btrfs_set_file_extent_offset(leaf, extent,
800 datao);
801 btrfs_set_file_extent_num_bytes(leaf, extent,
802 datal);
803 if (disko) {
804 inode_add_bytes(inode, datal);
735 ret = btrfs_inc_extent_ref(trans, root, 805 ret = btrfs_inc_extent_ref(trans, root,
736 ds, dl, leaf->start, 806 disko, diskl, leaf->start,
737 root->root_key.objectid, 807 root->root_key.objectid,
738 trans->transid, 808 trans->transid,
739 inode->i_ino); 809 inode->i_ino);
740 BUG_ON(ret); 810 BUG_ON(ret);
741 } 811 }
812 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
813 u64 skip = 0;
814 u64 trim = 0;
815 if (off > key.offset) {
816 skip = off - key.offset;
817 new_key.offset += skip;
818 }
819 if (key.offset + datal > off+len)
820 trim = key.offset + datal - (off+len);
821 printk("len %lld skip %lld trim %lld\n",
822 datal, skip, trim);
823 if (comp && (skip || trim)) {
824 printk("btrfs clone_range can't split compressed inline extents yet\n");
825 ret = -EINVAL;
826 goto out;
827 }
828 size -= skip + trim;
829 datal -= skip + trim;
830 ret = btrfs_insert_empty_item(trans, root, path,
831 &new_key, size);
832 if (ret)
833 goto out;
834
835 if (skip) {
836 u32 start = btrfs_file_extent_calc_inline_size(0);
837 memmove(buf+start, buf+start+skip,
838 datal);
839 }
840
841 leaf = path->nodes[0];
842 slot = path->slots[0];
843 write_extent_buffer(leaf, buf,
844 btrfs_item_ptr_offset(leaf, slot),
845 size);
846 inode_add_bytes(inode, datal);
742 } 847 }
848
849 btrfs_mark_buffer_dirty(leaf);
743 } 850 }
851
852 if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) {
853 u32 size;
854 struct btrfs_key new_key;
855 u64 coverslen;
856 int coff, clen;
857
858 size = btrfs_item_size_nr(leaf, slot);
859 coverslen = (size / BTRFS_CRC32_SIZE) <<
860 root->fs_info->sb->s_blocksize_bits;
861 printk("csums for %llu~%llu\n",
862 key.offset, coverslen);
863 if (key.offset + coverslen < off ||
864 key.offset >= off+len)
865 goto next;
866
867 read_extent_buffer(leaf, buf,
868 btrfs_item_ptr_offset(leaf, slot),
869 size);
870 btrfs_release_path(root, path);
871
872 coff = 0;
873 if (off > key.offset)
874 coff = ((off - key.offset) >>
875 root->fs_info->sb->s_blocksize_bits) *
876 BTRFS_CRC32_SIZE;
877 clen = size - coff;
878 if (key.offset + coverslen > off+len)
879 clen -= ((key.offset+coverslen-off-len) >>
880 root->fs_info->sb->s_blocksize_bits) *
881 BTRFS_CRC32_SIZE;
882 printk(" will dup %d~%d of %d\n",
883 coff, clen, size);
884
885 memcpy(&new_key, &key, sizeof(new_key));
886 new_key.objectid = inode->i_ino;
887 new_key.offset = key.offset + destoff - off;
888
889 ret = btrfs_insert_empty_item(trans, root, path,
890 &new_key, clen);
891 if (ret)
892 goto out;
893
894 leaf = path->nodes[0];
895 slot = path->slots[0];
896 write_extent_buffer(leaf, buf + coff,
897 btrfs_item_ptr_offset(leaf, slot),
898 clen);
899 btrfs_mark_buffer_dirty(leaf);
900 }
901
902 next:
744 btrfs_release_path(root, path); 903 btrfs_release_path(root, path);
745 key.offset++; 904 key.offset++;
746 } 905 }
@@ -749,13 +908,13 @@ out:
749 btrfs_release_path(root, path); 908 btrfs_release_path(root, path);
750 if (ret == 0) { 909 if (ret == 0) {
751 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 910 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
752 inode_set_bytes(inode, inode_get_bytes(src)); 911 if (destoff + olen > inode->i_size)
753 btrfs_i_size_write(inode, src->i_size); 912 btrfs_i_size_write(inode, destoff + olen);
754 BTRFS_I(inode)->flags = BTRFS_I(src)->flags; 913 BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
755 ret = btrfs_update_inode(trans, root, inode); 914 ret = btrfs_update_inode(trans, root, inode);
756 } 915 }
757 btrfs_end_transaction(trans, root); 916 btrfs_end_transaction(trans, root);
758 unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); 917 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
759 if (ret) 918 if (ret)
760 vmtruncate(inode, 0); 919 vmtruncate(inode, 0);
761out_unlock: 920out_unlock:
@@ -768,6 +927,16 @@ out_fput:
768 return ret; 927 return ret;
769} 928}
770 929
930long btrfs_ioctl_clone_range(struct file *file, unsigned long argptr)
931{
932 struct btrfs_ioctl_clone_range_args args;
933
934 if (copy_from_user(&args, (void *)argptr, sizeof(args)))
935 return -EFAULT;
936 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
937 args.src_length, args.dest_offset);
938}
939
771/* 940/*
772 * there are many ways the trans_start and trans_end ioctls can lead 941 * there are many ways the trans_start and trans_end ioctls can lead
773 * to deadlocks. They should only be used by applications that 942 * to deadlocks. They should only be used by applications that
@@ -851,7 +1020,9 @@ long btrfs_ioctl(struct file *file, unsigned int
851 case BTRFS_IOC_BALANCE: 1020 case BTRFS_IOC_BALANCE:
852 return btrfs_balance(root->fs_info->dev_root); 1021 return btrfs_balance(root->fs_info->dev_root);
853 case BTRFS_IOC_CLONE: 1022 case BTRFS_IOC_CLONE:
854 return btrfs_ioctl_clone(file, arg); 1023 return btrfs_ioctl_clone(file, arg, 0, 0, 0);
1024 case BTRFS_IOC_CLONE_RANGE:
1025 return btrfs_ioctl_clone_range(file, arg);
855 case BTRFS_IOC_TRANS_START: 1026 case BTRFS_IOC_TRANS_START:
856 return btrfs_ioctl_trans_start(file); 1027 return btrfs_ioctl_trans_start(file);
857 case BTRFS_IOC_TRANS_END: 1028 case BTRFS_IOC_TRANS_END: