aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ioctl.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-27 16:57:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-27 16:57:12 -0400
commita0c3061093c8b49facef95dc09a618c6e0d17cb5 (patch)
tree1d6ff7c06134b71a8bd0721395386e82e46e60c8 /fs/btrfs/ioctl.c
parent10799db60cbc4f990dd69eb49883477095c66af7 (diff)
parent174ba50915b08dcfd07c8b5fb795b46a165fa09a (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (58 commits) Btrfs: use the device_list_mutex during write_dev_supers Btrfs: setup free ino caching in a more asynchronous way btrfs scrub: don't coalesce pages that are logically discontiguous Btrfs: return -ENOMEM in clear_extent_bit Btrfs: add mount -o auto_defrag Btrfs: using rcu lock in the reader side of devices list Btrfs: drop unnecessary device lock Btrfs: fix the race between remove dev and alloc chunk Btrfs: fix the race between reading and updating devices Btrfs: fix bh leak on __btrfs_open_devices path Btrfs: fix unsafe usage of merge_state Btrfs: allocate extent state and check the result properly fs/btrfs: Add missing btrfs_free_path Btrfs: check return value of btrfs_inc_extent_ref() Btrfs: return error to caller if read_one_inode() fails Btrfs: BUG_ON is deleted from the caller of btrfs_truncate_item & btrfs_extend_item Btrfs: return error code to caller when btrfs_del_item fails Btrfs: return error code to caller when btrfs_previous_item fails btrfs: fix typo 'testeing' -> 'testing' btrfs: typo: 'btrfS' -> 'btrfs' ...
Diffstat (limited to 'fs/btrfs/ioctl.c')
-rw-r--r--fs/btrfs/ioctl.c624
1 files changed, 499 insertions, 125 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2616f7ed4799..85e818ce00c5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -50,6 +50,7 @@
50#include "print-tree.h" 50#include "print-tree.h"
51#include "volumes.h" 51#include "volumes.h"
52#include "locking.h" 52#include "locking.h"
53#include "inode-map.h"
53 54
54/* Mask out flags that are inappropriate for the given type of inode. */ 55/* Mask out flags that are inappropriate for the given type of inode. */
55static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 56static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -281,8 +282,9 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
281 if (!capable(CAP_SYS_ADMIN)) 282 if (!capable(CAP_SYS_ADMIN))
282 return -EPERM; 283 return -EPERM;
283 284
284 mutex_lock(&fs_info->fs_devices->device_list_mutex); 285 rcu_read_lock();
285 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { 286 list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
287 dev_list) {
286 if (!device->bdev) 288 if (!device->bdev)
287 continue; 289 continue;
288 q = bdev_get_queue(device->bdev); 290 q = bdev_get_queue(device->bdev);
@@ -292,7 +294,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
292 minlen); 294 minlen);
293 } 295 }
294 } 296 }
295 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 297 rcu_read_unlock();
296 if (!num_devices) 298 if (!num_devices)
297 return -EOPNOTSUPP; 299 return -EOPNOTSUPP;
298 300
@@ -329,8 +331,7 @@ static noinline int create_subvol(struct btrfs_root *root,
329 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 331 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
330 u64 index = 0; 332 u64 index = 0;
331 333
332 ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, 334 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
333 0, &objectid);
334 if (ret) { 335 if (ret) {
335 dput(parent); 336 dput(parent);
336 return ret; 337 return ret;
@@ -422,7 +423,7 @@ static noinline int create_subvol(struct btrfs_root *root,
422 BUG_ON(ret); 423 BUG_ON(ret);
423 424
424 ret = btrfs_insert_dir_item(trans, root, 425 ret = btrfs_insert_dir_item(trans, root,
425 name, namelen, dir->i_ino, &key, 426 name, namelen, dir, &key,
426 BTRFS_FT_DIR, index); 427 BTRFS_FT_DIR, index);
427 if (ret) 428 if (ret)
428 goto fail; 429 goto fail;
@@ -433,7 +434,7 @@ static noinline int create_subvol(struct btrfs_root *root,
433 434
434 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 435 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
435 objectid, root->root_key.objectid, 436 objectid, root->root_key.objectid,
436 dir->i_ino, index, name, namelen); 437 btrfs_ino(dir), index, name, namelen);
437 438
438 BUG_ON(ret); 439 BUG_ON(ret);
439 440
@@ -655,6 +656,106 @@ out_unlock:
655 return error; 656 return error;
656} 657}
657 658
659/*
660 * When we're defragging a range, we don't want to kick it off again
661 * if it is really just waiting for delalloc to send it down.
662 * If we find a nice big extent or delalloc range for the bytes in the
663 * file you want to defrag, we return 0 to let you know to skip this
664 * part of the file
665 */
666static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh)
667{
668 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
669 struct extent_map *em = NULL;
670 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
671 u64 end;
672
673 read_lock(&em_tree->lock);
674 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
675 read_unlock(&em_tree->lock);
676
677 if (em) {
678 end = extent_map_end(em);
679 free_extent_map(em);
680 if (end - offset > thresh)
681 return 0;
682 }
683 /* if we already have a nice delalloc here, just stop */
684 thresh /= 2;
685 end = count_range_bits(io_tree, &offset, offset + thresh,
686 thresh, EXTENT_DELALLOC, 1);
687 if (end >= thresh)
688 return 0;
689 return 1;
690}
691
692/*
693 * helper function to walk through a file and find extents
694 * newer than a specific transid, and smaller than thresh.
695 *
696 * This is used by the defragging code to find new and small
697 * extents
698 */
699static int find_new_extents(struct btrfs_root *root,
700 struct inode *inode, u64 newer_than,
701 u64 *off, int thresh)
702{
703 struct btrfs_path *path;
704 struct btrfs_key min_key;
705 struct btrfs_key max_key;
706 struct extent_buffer *leaf;
707 struct btrfs_file_extent_item *extent;
708 int type;
709 int ret;
710
711 path = btrfs_alloc_path();
712 if (!path)
713 return -ENOMEM;
714
715 min_key.objectid = inode->i_ino;
716 min_key.type = BTRFS_EXTENT_DATA_KEY;
717 min_key.offset = *off;
718
719 max_key.objectid = inode->i_ino;
720 max_key.type = (u8)-1;
721 max_key.offset = (u64)-1;
722
723 path->keep_locks = 1;
724
725 while(1) {
726 ret = btrfs_search_forward(root, &min_key, &max_key,
727 path, 0, newer_than);
728 if (ret != 0)
729 goto none;
730 if (min_key.objectid != inode->i_ino)
731 goto none;
732 if (min_key.type != BTRFS_EXTENT_DATA_KEY)
733 goto none;
734
735 leaf = path->nodes[0];
736 extent = btrfs_item_ptr(leaf, path->slots[0],
737 struct btrfs_file_extent_item);
738
739 type = btrfs_file_extent_type(leaf, extent);
740 if (type == BTRFS_FILE_EXTENT_REG &&
741 btrfs_file_extent_num_bytes(leaf, extent) < thresh &&
742 check_defrag_in_cache(inode, min_key.offset, thresh)) {
743 *off = min_key.offset;
744 btrfs_free_path(path);
745 return 0;
746 }
747
748 if (min_key.offset == (u64)-1)
749 goto none;
750
751 min_key.offset++;
752 btrfs_release_path(path);
753 }
754none:
755 btrfs_free_path(path);
756 return -ENOENT;
757}
758
658static int should_defrag_range(struct inode *inode, u64 start, u64 len, 759static int should_defrag_range(struct inode *inode, u64 start, u64 len,
659 int thresh, u64 *last_len, u64 *skip, 760 int thresh, u64 *last_len, u64 *skip,
660 u64 *defrag_end) 761 u64 *defrag_end)
@@ -664,10 +765,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
664 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 765 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
665 int ret = 1; 766 int ret = 1;
666 767
667
668 if (thresh == 0)
669 thresh = 256 * 1024;
670
671 /* 768 /*
672 * make sure that once we start defragging and extent, we keep on 769 * make sure that once we start defragging and extent, we keep on
673 * defragging it 770 * defragging it
@@ -726,27 +823,176 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
726 return ret; 823 return ret;
727} 824}
728 825
729static int btrfs_defrag_file(struct file *file, 826/*
730 struct btrfs_ioctl_defrag_range_args *range) 827 * it doesn't do much good to defrag one or two pages
828 * at a time. This pulls in a nice chunk of pages
829 * to COW and defrag.
830 *
831 * It also makes sure the delalloc code has enough
832 * dirty data to avoid making new small extents as part
833 * of the defrag
834 *
835 * It's a good idea to start RA on this range
836 * before calling this.
837 */
838static int cluster_pages_for_defrag(struct inode *inode,
839 struct page **pages,
840 unsigned long start_index,
841 int num_pages)
731{ 842{
732 struct inode *inode = fdentry(file)->d_inode; 843 unsigned long file_end;
733 struct btrfs_root *root = BTRFS_I(inode)->root; 844 u64 isize = i_size_read(inode);
734 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 845 u64 page_start;
846 u64 page_end;
847 int ret;
848 int i;
849 int i_done;
735 struct btrfs_ordered_extent *ordered; 850 struct btrfs_ordered_extent *ordered;
736 struct page *page; 851 struct extent_state *cached_state = NULL;
852
853 if (isize == 0)
854 return 0;
855 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
856
857 ret = btrfs_delalloc_reserve_space(inode,
858 num_pages << PAGE_CACHE_SHIFT);
859 if (ret)
860 return ret;
861again:
862 ret = 0;
863 i_done = 0;
864
865 /* step one, lock all the pages */
866 for (i = 0; i < num_pages; i++) {
867 struct page *page;
868 page = grab_cache_page(inode->i_mapping,
869 start_index + i);
870 if (!page)
871 break;
872
873 if (!PageUptodate(page)) {
874 btrfs_readpage(NULL, page);
875 lock_page(page);
876 if (!PageUptodate(page)) {
877 unlock_page(page);
878 page_cache_release(page);
879 ret = -EIO;
880 break;
881 }
882 }
883 isize = i_size_read(inode);
884 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
885 if (!isize || page->index > file_end ||
886 page->mapping != inode->i_mapping) {
887 /* whoops, we blew past eof, skip this page */
888 unlock_page(page);
889 page_cache_release(page);
890 break;
891 }
892 pages[i] = page;
893 i_done++;
894 }
895 if (!i_done || ret)
896 goto out;
897
898 if (!(inode->i_sb->s_flags & MS_ACTIVE))
899 goto out;
900
901 /*
902 * so now we have a nice long stream of locked
903 * and up to date pages, lets wait on them
904 */
905 for (i = 0; i < i_done; i++)
906 wait_on_page_writeback(pages[i]);
907
908 page_start = page_offset(pages[0]);
909 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
910
911 lock_extent_bits(&BTRFS_I(inode)->io_tree,
912 page_start, page_end - 1, 0, &cached_state,
913 GFP_NOFS);
914 ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1);
915 if (ordered &&
916 ordered->file_offset + ordered->len > page_start &&
917 ordered->file_offset < page_end) {
918 btrfs_put_ordered_extent(ordered);
919 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
920 page_start, page_end - 1,
921 &cached_state, GFP_NOFS);
922 for (i = 0; i < i_done; i++) {
923 unlock_page(pages[i]);
924 page_cache_release(pages[i]);
925 }
926 btrfs_wait_ordered_range(inode, page_start,
927 page_end - page_start);
928 goto again;
929 }
930 if (ordered)
931 btrfs_put_ordered_extent(ordered);
932
933 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
934 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
935 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
936 GFP_NOFS);
937
938 if (i_done != num_pages) {
939 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
940 btrfs_delalloc_release_space(inode,
941 (num_pages - i_done) << PAGE_CACHE_SHIFT);
942 }
943
944
945 btrfs_set_extent_delalloc(inode, page_start, page_end - 1,
946 &cached_state);
947
948 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
949 page_start, page_end - 1, &cached_state,
950 GFP_NOFS);
951
952 for (i = 0; i < i_done; i++) {
953 clear_page_dirty_for_io(pages[i]);
954 ClearPageChecked(pages[i]);
955 set_page_extent_mapped(pages[i]);
956 set_page_dirty(pages[i]);
957 unlock_page(pages[i]);
958 page_cache_release(pages[i]);
959 }
960 return i_done;
961out:
962 for (i = 0; i < i_done; i++) {
963 unlock_page(pages[i]);
964 page_cache_release(pages[i]);
965 }
966 btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT);
967 return ret;
968
969}
970
971int btrfs_defrag_file(struct inode *inode, struct file *file,
972 struct btrfs_ioctl_defrag_range_args *range,
973 u64 newer_than, unsigned long max_to_defrag)
974{
975 struct btrfs_root *root = BTRFS_I(inode)->root;
737 struct btrfs_super_block *disk_super; 976 struct btrfs_super_block *disk_super;
977 struct file_ra_state *ra = NULL;
738 unsigned long last_index; 978 unsigned long last_index;
739 unsigned long ra_pages = root->fs_info->bdi.ra_pages;
740 unsigned long total_read = 0;
741 u64 features; 979 u64 features;
742 u64 page_start;
743 u64 page_end;
744 u64 last_len = 0; 980 u64 last_len = 0;
745 u64 skip = 0; 981 u64 skip = 0;
746 u64 defrag_end = 0; 982 u64 defrag_end = 0;
983 u64 newer_off = range->start;
984 int newer_left = 0;
747 unsigned long i; 985 unsigned long i;
748 int ret; 986 int ret;
987 int defrag_count = 0;
749 int compress_type = BTRFS_COMPRESS_ZLIB; 988 int compress_type = BTRFS_COMPRESS_ZLIB;
989 int extent_thresh = range->extent_thresh;
990 int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
991 u64 new_align = ~((u64)128 * 1024 - 1);
992 struct page **pages = NULL;
993
994 if (extent_thresh == 0)
995 extent_thresh = 256 * 1024;
750 996
751 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 997 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
752 if (range->compress_type > BTRFS_COMPRESS_TYPES) 998 if (range->compress_type > BTRFS_COMPRESS_TYPES)
@@ -758,6 +1004,27 @@ static int btrfs_defrag_file(struct file *file,
758 if (inode->i_size == 0) 1004 if (inode->i_size == 0)
759 return 0; 1005 return 0;
760 1006
1007 /*
1008 * if we were not given a file, allocate a readahead
1009 * context
1010 */
1011 if (!file) {
1012 ra = kzalloc(sizeof(*ra), GFP_NOFS);
1013 if (!ra)
1014 return -ENOMEM;
1015 file_ra_state_init(ra, inode->i_mapping);
1016 } else {
1017 ra = &file->f_ra;
1018 }
1019
1020 pages = kmalloc(sizeof(struct page *) * newer_cluster,
1021 GFP_NOFS);
1022 if (!pages) {
1023 ret = -ENOMEM;
1024 goto out_ra;
1025 }
1026
1027 /* find the last page to defrag */
761 if (range->start + range->len > range->start) { 1028 if (range->start + range->len > range->start) {
762 last_index = min_t(u64, inode->i_size - 1, 1029 last_index = min_t(u64, inode->i_size - 1,
763 range->start + range->len - 1) >> PAGE_CACHE_SHIFT; 1030 range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
@@ -765,11 +1032,37 @@ static int btrfs_defrag_file(struct file *file,
765 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; 1032 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
766 } 1033 }
767 1034
768 i = range->start >> PAGE_CACHE_SHIFT; 1035 if (newer_than) {
769 while (i <= last_index) { 1036 ret = find_new_extents(root, inode, newer_than,
770 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1037 &newer_off, 64 * 1024);
1038 if (!ret) {
1039 range->start = newer_off;
1040 /*
1041 * we always align our defrag to help keep
1042 * the extents in the file evenly spaced
1043 */
1044 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1045 newer_left = newer_cluster;
1046 } else
1047 goto out_ra;
1048 } else {
1049 i = range->start >> PAGE_CACHE_SHIFT;
1050 }
1051 if (!max_to_defrag)
1052 max_to_defrag = last_index - 1;
1053
1054 while (i <= last_index && defrag_count < max_to_defrag) {
1055 /*
1056 * make sure we stop running if someone unmounts
1057 * the FS
1058 */
1059 if (!(inode->i_sb->s_flags & MS_ACTIVE))
1060 break;
1061
1062 if (!newer_than &&
1063 !should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
771 PAGE_CACHE_SIZE, 1064 PAGE_CACHE_SIZE,
772 range->extent_thresh, 1065 extent_thresh,
773 &last_len, &skip, 1066 &last_len, &skip,
774 &defrag_end)) { 1067 &defrag_end)) {
775 unsigned long next; 1068 unsigned long next;
@@ -781,92 +1074,39 @@ static int btrfs_defrag_file(struct file *file,
781 i = max(i + 1, next); 1074 i = max(i + 1, next);
782 continue; 1075 continue;
783 } 1076 }
784
785 if (total_read % ra_pages == 0) {
786 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
787 min(last_index, i + ra_pages - 1));
788 }
789 total_read++;
790 mutex_lock(&inode->i_mutex);
791 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 1077 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
792 BTRFS_I(inode)->force_compress = compress_type; 1078 BTRFS_I(inode)->force_compress = compress_type;
793 1079
794 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 1080 btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster);
795 if (ret)
796 goto err_unlock;
797again:
798 if (inode->i_size == 0 ||
799 i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
800 ret = 0;
801 goto err_reservations;
802 }
803 1081
804 page = grab_cache_page(inode->i_mapping, i); 1082 ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster);
805 if (!page) { 1083 if (ret < 0)
806 ret = -ENOMEM; 1084 goto out_ra;
807 goto err_reservations;
808 }
809
810 if (!PageUptodate(page)) {
811 btrfs_readpage(NULL, page);
812 lock_page(page);
813 if (!PageUptodate(page)) {
814 unlock_page(page);
815 page_cache_release(page);
816 ret = -EIO;
817 goto err_reservations;
818 }
819 }
820
821 if (page->mapping != inode->i_mapping) {
822 unlock_page(page);
823 page_cache_release(page);
824 goto again;
825 }
826 1085
827 wait_on_page_writeback(page); 1086 defrag_count += ret;
1087 balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
1088 i += ret;
828 1089
829 if (PageDirty(page)) { 1090 if (newer_than) {
830 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 1091 if (newer_off == (u64)-1)
831 goto loop_unlock; 1092 break;
832 }
833
834 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
835 page_end = page_start + PAGE_CACHE_SIZE - 1;
836 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
837 1093
838 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1094 newer_off = max(newer_off + 1,
839 if (ordered) { 1095 (u64)i << PAGE_CACHE_SHIFT);
840 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 1096
841 unlock_page(page); 1097 ret = find_new_extents(root, inode,
842 page_cache_release(page); 1098 newer_than, &newer_off,
843 btrfs_start_ordered_extent(inode, ordered, 1); 1099 64 * 1024);
844 btrfs_put_ordered_extent(ordered); 1100 if (!ret) {
845 goto again; 1101 range->start = newer_off;
1102 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1103 newer_left = newer_cluster;
1104 } else {
1105 break;
1106 }
1107 } else {
1108 i++;
846 } 1109 }
847 set_page_extent_mapped(page);
848
849 /*
850 * this makes sure page_mkwrite is called on the
851 * page if it is dirtied again later
852 */
853 clear_page_dirty_for_io(page);
854 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start,
855 page_end, EXTENT_DIRTY | EXTENT_DELALLOC |
856 EXTENT_DO_ACCOUNTING, GFP_NOFS);
857
858 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
859 ClearPageChecked(page);
860 set_page_dirty(page);
861 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
862
863loop_unlock:
864 unlock_page(page);
865 page_cache_release(page);
866 mutex_unlock(&inode->i_mutex);
867
868 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
869 i++;
870 } 1110 }
871 1111
872 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) 1112 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
@@ -898,12 +1138,14 @@ loop_unlock:
898 btrfs_set_super_incompat_flags(disk_super, features); 1138 btrfs_set_super_incompat_flags(disk_super, features);
899 } 1139 }
900 1140
901 return 0; 1141 if (!file)
1142 kfree(ra);
1143 return defrag_count;
902 1144
903err_reservations: 1145out_ra:
904 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 1146 if (!file)
905err_unlock: 1147 kfree(ra);
906 mutex_unlock(&inode->i_mutex); 1148 kfree(pages);
907 return ret; 1149 return ret;
908} 1150}
909 1151
@@ -1129,7 +1371,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
1129 int ret = 0; 1371 int ret = 0;
1130 u64 flags = 0; 1372 u64 flags = 0;
1131 1373
1132 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1374 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1133 return -EINVAL; 1375 return -EINVAL;
1134 1376
1135 down_read(&root->fs_info->subvol_sem); 1377 down_read(&root->fs_info->subvol_sem);
@@ -1156,7 +1398,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1156 if (root->fs_info->sb->s_flags & MS_RDONLY) 1398 if (root->fs_info->sb->s_flags & MS_RDONLY)
1157 return -EROFS; 1399 return -EROFS;
1158 1400
1159 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) 1401 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1160 return -EINVAL; 1402 return -EINVAL;
1161 1403
1162 if (copy_from_user(&flags, arg, sizeof(flags))) 1404 if (copy_from_user(&flags, arg, sizeof(flags)))
@@ -1279,7 +1521,6 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1279 int nritems; 1521 int nritems;
1280 int i; 1522 int i;
1281 int slot; 1523 int slot;
1282 int found = 0;
1283 int ret = 0; 1524 int ret = 0;
1284 1525
1285 leaf = path->nodes[0]; 1526 leaf = path->nodes[0];
@@ -1326,7 +1567,7 @@ static noinline int copy_to_sk(struct btrfs_root *root,
1326 item_off, item_len); 1567 item_off, item_len);
1327 *sk_offset += item_len; 1568 *sk_offset += item_len;
1328 } 1569 }
1329 found++; 1570 (*num_found)++;
1330 1571
1331 if (*num_found >= sk->nr_items) 1572 if (*num_found >= sk->nr_items)
1332 break; 1573 break;
@@ -1345,7 +1586,6 @@ advance_key:
1345 } else 1586 } else
1346 ret = 1; 1587 ret = 1;
1347overflow: 1588overflow:
1348 *num_found += found;
1349 return ret; 1589 return ret;
1350} 1590}
1351 1591
@@ -1402,7 +1642,7 @@ static noinline int search_ioctl(struct inode *inode,
1402 } 1642 }
1403 ret = copy_to_sk(root, path, &key, sk, args->buf, 1643 ret = copy_to_sk(root, path, &key, sk, args->buf,
1404 &sk_offset, &num_found); 1644 &sk_offset, &num_found);
1405 btrfs_release_path(root, path); 1645 btrfs_release_path(path);
1406 if (ret || num_found >= sk->nr_items) 1646 if (ret || num_found >= sk->nr_items)
1407 break; 1647 break;
1408 1648
@@ -1509,7 +1749,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1509 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 1749 if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1510 break; 1750 break;
1511 1751
1512 btrfs_release_path(root, path); 1752 btrfs_release_path(path);
1513 key.objectid = key.offset; 1753 key.objectid = key.offset;
1514 key.offset = (u64)-1; 1754 key.offset = (u64)-1;
1515 dirid = key.objectid; 1755 dirid = key.objectid;
@@ -1639,7 +1879,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1639 goto out_dput; 1879 goto out_dput;
1640 } 1880 }
1641 1881
1642 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1882 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
1643 err = -EINVAL; 1883 err = -EINVAL;
1644 goto out_dput; 1884 goto out_dput;
1645 } 1885 }
@@ -1757,7 +1997,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
1757 /* the rest are all set to zero by kzalloc */ 1997 /* the rest are all set to zero by kzalloc */
1758 range->len = (u64)-1; 1998 range->len = (u64)-1;
1759 } 1999 }
1760 ret = btrfs_defrag_file(file, range); 2000 ret = btrfs_defrag_file(fdentry(file)->d_inode, file,
2001 range, 0, 0);
2002 if (ret > 0)
2003 ret = 0;
1761 kfree(range); 2004 kfree(range);
1762 break; 2005 break;
1763 default: 2006 default:
@@ -1809,6 +2052,75 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
1809 return ret; 2052 return ret;
1810} 2053}
1811 2054
2055static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2056{
2057 struct btrfs_ioctl_fs_info_args fi_args;
2058 struct btrfs_device *device;
2059 struct btrfs_device *next;
2060 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2061
2062 if (!capable(CAP_SYS_ADMIN))
2063 return -EPERM;
2064
2065 fi_args.num_devices = fs_devices->num_devices;
2066 fi_args.max_id = 0;
2067 memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid));
2068
2069 mutex_lock(&fs_devices->device_list_mutex);
2070 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2071 if (device->devid > fi_args.max_id)
2072 fi_args.max_id = device->devid;
2073 }
2074 mutex_unlock(&fs_devices->device_list_mutex);
2075
2076 if (copy_to_user(arg, &fi_args, sizeof(fi_args)))
2077 return -EFAULT;
2078
2079 return 0;
2080}
2081
2082static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2083{
2084 struct btrfs_ioctl_dev_info_args *di_args;
2085 struct btrfs_device *dev;
2086 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2087 int ret = 0;
2088 char *s_uuid = NULL;
2089 char empty_uuid[BTRFS_UUID_SIZE] = {0};
2090
2091 if (!capable(CAP_SYS_ADMIN))
2092 return -EPERM;
2093
2094 di_args = memdup_user(arg, sizeof(*di_args));
2095 if (IS_ERR(di_args))
2096 return PTR_ERR(di_args);
2097
2098 if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0)
2099 s_uuid = di_args->uuid;
2100
2101 mutex_lock(&fs_devices->device_list_mutex);
2102 dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL);
2103 mutex_unlock(&fs_devices->device_list_mutex);
2104
2105 if (!dev) {
2106 ret = -ENODEV;
2107 goto out;
2108 }
2109
2110 di_args->devid = dev->devid;
2111 di_args->bytes_used = dev->bytes_used;
2112 di_args->total_bytes = dev->total_bytes;
2113 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2114 strncpy(di_args->path, dev->name, sizeof(di_args->path));
2115
2116out:
2117 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
2118 ret = -EFAULT;
2119
2120 kfree(di_args);
2121 return ret;
2122}
2123
1812static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 2124static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1813 u64 off, u64 olen, u64 destoff) 2125 u64 off, u64 olen, u64 destoff)
1814{ 2126{
@@ -1925,7 +2237,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1925 } 2237 }
1926 2238
1927 /* clone data */ 2239 /* clone data */
1928 key.objectid = src->i_ino; 2240 key.objectid = btrfs_ino(src);
1929 key.type = BTRFS_EXTENT_DATA_KEY; 2241 key.type = BTRFS_EXTENT_DATA_KEY;
1930 key.offset = 0; 2242 key.offset = 0;
1931 2243
@@ -1952,7 +2264,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1952 2264
1953 btrfs_item_key_to_cpu(leaf, &key, slot); 2265 btrfs_item_key_to_cpu(leaf, &key, slot);
1954 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 2266 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
1955 key.objectid != src->i_ino) 2267 key.objectid != btrfs_ino(src))
1956 break; 2268 break;
1957 2269
1958 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { 2270 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
@@ -1988,14 +2300,14 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1988 datal = btrfs_file_extent_ram_bytes(leaf, 2300 datal = btrfs_file_extent_ram_bytes(leaf,
1989 extent); 2301 extent);
1990 } 2302 }
1991 btrfs_release_path(root, path); 2303 btrfs_release_path(path);
1992 2304
1993 if (key.offset + datal <= off || 2305 if (key.offset + datal <= off ||
1994 key.offset >= off+len) 2306 key.offset >= off+len)
1995 goto next; 2307 goto next;
1996 2308
1997 memcpy(&new_key, &key, sizeof(new_key)); 2309 memcpy(&new_key, &key, sizeof(new_key));
1998 new_key.objectid = inode->i_ino; 2310 new_key.objectid = btrfs_ino(inode);
1999 if (off <= key.offset) 2311 if (off <= key.offset)
2000 new_key.offset = key.offset + destoff - off; 2312 new_key.offset = key.offset + destoff - off;
2001 else 2313 else
@@ -2049,7 +2361,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2049 ret = btrfs_inc_extent_ref(trans, root, 2361 ret = btrfs_inc_extent_ref(trans, root,
2050 disko, diskl, 0, 2362 disko, diskl, 0,
2051 root->root_key.objectid, 2363 root->root_key.objectid,
2052 inode->i_ino, 2364 btrfs_ino(inode),
2053 new_key.offset - datao); 2365 new_key.offset - datao);
2054 BUG_ON(ret); 2366 BUG_ON(ret);
2055 } 2367 }
@@ -2098,7 +2410,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2098 } 2410 }
2099 2411
2100 btrfs_mark_buffer_dirty(leaf); 2412 btrfs_mark_buffer_dirty(leaf);
2101 btrfs_release_path(root, path); 2413 btrfs_release_path(path);
2102 2414
2103 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2415 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2104 2416
@@ -2119,12 +2431,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2119 btrfs_end_transaction(trans, root); 2431 btrfs_end_transaction(trans, root);
2120 } 2432 }
2121next: 2433next:
2122 btrfs_release_path(root, path); 2434 btrfs_release_path(path);
2123 key.offset++; 2435 key.offset++;
2124 } 2436 }
2125 ret = 0; 2437 ret = 0;
2126out: 2438out:
2127 btrfs_release_path(root, path); 2439 btrfs_release_path(path);
2128 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 2440 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
2129out_unlock: 2441out_unlock:
2130 mutex_unlock(&src->i_mutex); 2442 mutex_unlock(&src->i_mutex);
@@ -2471,6 +2783,58 @@ static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
2471 return btrfs_wait_for_commit(root, transid); 2783 return btrfs_wait_for_commit(root, transid);
2472} 2784}
2473 2785
2786static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg)
2787{
2788 int ret;
2789 struct btrfs_ioctl_scrub_args *sa;
2790
2791 if (!capable(CAP_SYS_ADMIN))
2792 return -EPERM;
2793
2794 sa = memdup_user(arg, sizeof(*sa));
2795 if (IS_ERR(sa))
2796 return PTR_ERR(sa);
2797
2798 ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end,
2799 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY);
2800
2801 if (copy_to_user(arg, sa, sizeof(*sa)))
2802 ret = -EFAULT;
2803
2804 kfree(sa);
2805 return ret;
2806}
2807
2808static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg)
2809{
2810 if (!capable(CAP_SYS_ADMIN))
2811 return -EPERM;
2812
2813 return btrfs_scrub_cancel(root);
2814}
2815
2816static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
2817 void __user *arg)
2818{
2819 struct btrfs_ioctl_scrub_args *sa;
2820 int ret;
2821
2822 if (!capable(CAP_SYS_ADMIN))
2823 return -EPERM;
2824
2825 sa = memdup_user(arg, sizeof(*sa));
2826 if (IS_ERR(sa))
2827 return PTR_ERR(sa);
2828
2829 ret = btrfs_scrub_progress(root, sa->devid, &sa->progress);
2830
2831 if (copy_to_user(arg, sa, sizeof(*sa)))
2832 ret = -EFAULT;
2833
2834 kfree(sa);
2835 return ret;
2836}
2837
2474long btrfs_ioctl(struct file *file, unsigned int 2838long btrfs_ioctl(struct file *file, unsigned int
2475 cmd, unsigned long arg) 2839 cmd, unsigned long arg)
2476{ 2840{
@@ -2510,6 +2874,10 @@ long btrfs_ioctl(struct file *file, unsigned int
2510 return btrfs_ioctl_add_dev(root, argp); 2874 return btrfs_ioctl_add_dev(root, argp);
2511 case BTRFS_IOC_RM_DEV: 2875 case BTRFS_IOC_RM_DEV:
2512 return btrfs_ioctl_rm_dev(root, argp); 2876 return btrfs_ioctl_rm_dev(root, argp);
2877 case BTRFS_IOC_FS_INFO:
2878 return btrfs_ioctl_fs_info(root, argp);
2879 case BTRFS_IOC_DEV_INFO:
2880 return btrfs_ioctl_dev_info(root, argp);
2513 case BTRFS_IOC_BALANCE: 2881 case BTRFS_IOC_BALANCE:
2514 return btrfs_balance(root->fs_info->dev_root); 2882 return btrfs_balance(root->fs_info->dev_root);
2515 case BTRFS_IOC_CLONE: 2883 case BTRFS_IOC_CLONE:
@@ -2533,6 +2901,12 @@ long btrfs_ioctl(struct file *file, unsigned int
2533 return btrfs_ioctl_start_sync(file, argp); 2901 return btrfs_ioctl_start_sync(file, argp);
2534 case BTRFS_IOC_WAIT_SYNC: 2902 case BTRFS_IOC_WAIT_SYNC:
2535 return btrfs_ioctl_wait_sync(file, argp); 2903 return btrfs_ioctl_wait_sync(file, argp);
2904 case BTRFS_IOC_SCRUB:
2905 return btrfs_ioctl_scrub(root, argp);
2906 case BTRFS_IOC_SCRUB_CANCEL:
2907 return btrfs_ioctl_scrub_cancel(root, argp);
2908 case BTRFS_IOC_SCRUB_PROGRESS:
2909 return btrfs_ioctl_scrub_progress(root, argp);
2536 } 2910 }
2537 2911
2538 return -ENOTTY; 2912 return -ENOTTY;