aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ioctl.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ioctl.c')
-rw-r--r--fs/btrfs/ioctl.c448
1 files changed, 346 insertions, 102 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c4f17e4e2c9c..85e818ce00c5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -656,6 +656,106 @@ out_unlock:
656 return error; 656 return error;
657} 657}
658 658
659/*
660 * When we're defragging a range, we don't want to kick it off again
661 * if it is really just waiting for delalloc to send it down.
662 * If we find a nice big extent or delalloc range for the bytes in the
663 * file you want to defrag, we return 0 to let you know to skip this
664 * part of the file
665 */
666static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh)
667{
668 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
669 struct extent_map *em = NULL;
670 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
671 u64 end;
672
673 read_lock(&em_tree->lock);
674 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
675 read_unlock(&em_tree->lock);
676
677 if (em) {
678 end = extent_map_end(em);
679 free_extent_map(em);
680 if (end - offset > thresh)
681 return 0;
682 }
683 /* if we already have a nice delalloc here, just stop */
684 thresh /= 2;
685 end = count_range_bits(io_tree, &offset, offset + thresh,
686 thresh, EXTENT_DELALLOC, 1);
687 if (end >= thresh)
688 return 0;
689 return 1;
690}
691
692/*
693 * helper function to walk through a file and find extents
694 * newer than a specific transid, and smaller than thresh.
695 *
696 * This is used by the defragging code to find new and small
697 * extents
698 */
699static int find_new_extents(struct btrfs_root *root,
700 struct inode *inode, u64 newer_than,
701 u64 *off, int thresh)
702{
703 struct btrfs_path *path;
704 struct btrfs_key min_key;
705 struct btrfs_key max_key;
706 struct extent_buffer *leaf;
707 struct btrfs_file_extent_item *extent;
708 int type;
709 int ret;
710
711 path = btrfs_alloc_path();
712 if (!path)
713 return -ENOMEM;
714
715 min_key.objectid = inode->i_ino;
716 min_key.type = BTRFS_EXTENT_DATA_KEY;
717 min_key.offset = *off;
718
719 max_key.objectid = inode->i_ino;
720 max_key.type = (u8)-1;
721 max_key.offset = (u64)-1;
722
723 path->keep_locks = 1;
724
725 while(1) {
726 ret = btrfs_search_forward(root, &min_key, &max_key,
727 path, 0, newer_than);
728 if (ret != 0)
729 goto none;
730 if (min_key.objectid != inode->i_ino)
731 goto none;
732 if (min_key.type != BTRFS_EXTENT_DATA_KEY)
733 goto none;
734
735 leaf = path->nodes[0];
736 extent = btrfs_item_ptr(leaf, path->slots[0],
737 struct btrfs_file_extent_item);
738
739 type = btrfs_file_extent_type(leaf, extent);
740 if (type == BTRFS_FILE_EXTENT_REG &&
741 btrfs_file_extent_num_bytes(leaf, extent) < thresh &&
742 check_defrag_in_cache(inode, min_key.offset, thresh)) {
743 *off = min_key.offset;
744 btrfs_free_path(path);
745 return 0;
746 }
747
748 if (min_key.offset == (u64)-1)
749 goto none;
750
751 min_key.offset++;
752 btrfs_release_path(path);
753 }
754none:
755 btrfs_free_path(path);
756 return -ENOENT;
757}
758
659static int should_defrag_range(struct inode *inode, u64 start, u64 len, 759static int should_defrag_range(struct inode *inode, u64 start, u64 len,
660 int thresh, u64 *last_len, u64 *skip, 760 int thresh, u64 *last_len, u64 *skip,
661 u64 *defrag_end) 761 u64 *defrag_end)
@@ -665,10 +765,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
665 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 765 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
666 int ret = 1; 766 int ret = 1;
667 767
668
669 if (thresh == 0)
670 thresh = 256 * 1024;
671
672 /* 768 /*
673 * make sure that once we start defragging and extent, we keep on 769 * make sure that once we start defragging and extent, we keep on
674 * defragging it 770 * defragging it
@@ -727,27 +823,176 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
727 return ret; 823 return ret;
728} 824}
729 825
730static int btrfs_defrag_file(struct file *file, 826/*
731 struct btrfs_ioctl_defrag_range_args *range) 827 * it doesn't do much good to defrag one or two pages
828 * at a time. This pulls in a nice chunk of pages
829 * to COW and defrag.
830 *
831 * It also makes sure the delalloc code has enough
832 * dirty data to avoid making new small extents as part
833 * of the defrag
834 *
835 * It's a good idea to start RA on this range
836 * before calling this.
837 */
838static int cluster_pages_for_defrag(struct inode *inode,
839 struct page **pages,
840 unsigned long start_index,
841 int num_pages)
732{ 842{
733 struct inode *inode = fdentry(file)->d_inode; 843 unsigned long file_end;
734 struct btrfs_root *root = BTRFS_I(inode)->root; 844 u64 isize = i_size_read(inode);
735 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 845 u64 page_start;
846 u64 page_end;
847 int ret;
848 int i;
849 int i_done;
736 struct btrfs_ordered_extent *ordered; 850 struct btrfs_ordered_extent *ordered;
737 struct page *page; 851 struct extent_state *cached_state = NULL;
852
853 if (isize == 0)
854 return 0;
855 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
856
857 ret = btrfs_delalloc_reserve_space(inode,
858 num_pages << PAGE_CACHE_SHIFT);
859 if (ret)
860 return ret;
861again:
862 ret = 0;
863 i_done = 0;
864
865 /* step one, lock all the pages */
866 for (i = 0; i < num_pages; i++) {
867 struct page *page;
868 page = grab_cache_page(inode->i_mapping,
869 start_index + i);
870 if (!page)
871 break;
872
873 if (!PageUptodate(page)) {
874 btrfs_readpage(NULL, page);
875 lock_page(page);
876 if (!PageUptodate(page)) {
877 unlock_page(page);
878 page_cache_release(page);
879 ret = -EIO;
880 break;
881 }
882 }
883 isize = i_size_read(inode);
884 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
885 if (!isize || page->index > file_end ||
886 page->mapping != inode->i_mapping) {
887 /* whoops, we blew past eof, skip this page */
888 unlock_page(page);
889 page_cache_release(page);
890 break;
891 }
892 pages[i] = page;
893 i_done++;
894 }
895 if (!i_done || ret)
896 goto out;
897
898 if (!(inode->i_sb->s_flags & MS_ACTIVE))
899 goto out;
900
901 /*
902 * so now we have a nice long stream of locked
903 * and up to date pages, lets wait on them
904 */
905 for (i = 0; i < i_done; i++)
906 wait_on_page_writeback(pages[i]);
907
908 page_start = page_offset(pages[0]);
909 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
910
911 lock_extent_bits(&BTRFS_I(inode)->io_tree,
912 page_start, page_end - 1, 0, &cached_state,
913 GFP_NOFS);
914 ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1);
915 if (ordered &&
916 ordered->file_offset + ordered->len > page_start &&
917 ordered->file_offset < page_end) {
918 btrfs_put_ordered_extent(ordered);
919 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
920 page_start, page_end - 1,
921 &cached_state, GFP_NOFS);
922 for (i = 0; i < i_done; i++) {
923 unlock_page(pages[i]);
924 page_cache_release(pages[i]);
925 }
926 btrfs_wait_ordered_range(inode, page_start,
927 page_end - page_start);
928 goto again;
929 }
930 if (ordered)
931 btrfs_put_ordered_extent(ordered);
932
933 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
934 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
935 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
936 GFP_NOFS);
937
938 if (i_done != num_pages) {
939 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
940 btrfs_delalloc_release_space(inode,
941 (num_pages - i_done) << PAGE_CACHE_SHIFT);
942 }
943
944
945 btrfs_set_extent_delalloc(inode, page_start, page_end - 1,
946 &cached_state);
947
948 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
949 page_start, page_end - 1, &cached_state,
950 GFP_NOFS);
951
952 for (i = 0; i < i_done; i++) {
953 clear_page_dirty_for_io(pages[i]);
954 ClearPageChecked(pages[i]);
955 set_page_extent_mapped(pages[i]);
956 set_page_dirty(pages[i]);
957 unlock_page(pages[i]);
958 page_cache_release(pages[i]);
959 }
960 return i_done;
961out:
962 for (i = 0; i < i_done; i++) {
963 unlock_page(pages[i]);
964 page_cache_release(pages[i]);
965 }
966 btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT);
967 return ret;
968
969}
970
971int btrfs_defrag_file(struct inode *inode, struct file *file,
972 struct btrfs_ioctl_defrag_range_args *range,
973 u64 newer_than, unsigned long max_to_defrag)
974{
975 struct btrfs_root *root = BTRFS_I(inode)->root;
738 struct btrfs_super_block *disk_super; 976 struct btrfs_super_block *disk_super;
977 struct file_ra_state *ra = NULL;
739 unsigned long last_index; 978 unsigned long last_index;
740 unsigned long ra_pages = root->fs_info->bdi.ra_pages;
741 unsigned long total_read = 0;
742 u64 features; 979 u64 features;
743 u64 page_start;
744 u64 page_end;
745 u64 last_len = 0; 980 u64 last_len = 0;
746 u64 skip = 0; 981 u64 skip = 0;
747 u64 defrag_end = 0; 982 u64 defrag_end = 0;
983 u64 newer_off = range->start;
984 int newer_left = 0;
748 unsigned long i; 985 unsigned long i;
749 int ret; 986 int ret;
987 int defrag_count = 0;
750 int compress_type = BTRFS_COMPRESS_ZLIB; 988 int compress_type = BTRFS_COMPRESS_ZLIB;
989 int extent_thresh = range->extent_thresh;
990 int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
991 u64 new_align = ~((u64)128 * 1024 - 1);
992 struct page **pages = NULL;
993
994 if (extent_thresh == 0)
995 extent_thresh = 256 * 1024;
751 996
752 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 997 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
753 if (range->compress_type > BTRFS_COMPRESS_TYPES) 998 if (range->compress_type > BTRFS_COMPRESS_TYPES)
@@ -759,6 +1004,27 @@ static int btrfs_defrag_file(struct file *file,
759 if (inode->i_size == 0) 1004 if (inode->i_size == 0)
760 return 0; 1005 return 0;
761 1006
1007 /*
1008 * if we were not given a file, allocate a readahead
1009 * context
1010 */
1011 if (!file) {
1012 ra = kzalloc(sizeof(*ra), GFP_NOFS);
1013 if (!ra)
1014 return -ENOMEM;
1015 file_ra_state_init(ra, inode->i_mapping);
1016 } else {
1017 ra = &file->f_ra;
1018 }
1019
1020 pages = kmalloc(sizeof(struct page *) * newer_cluster,
1021 GFP_NOFS);
1022 if (!pages) {
1023 ret = -ENOMEM;
1024 goto out_ra;
1025 }
1026
1027 /* find the last page to defrag */
762 if (range->start + range->len > range->start) { 1028 if (range->start + range->len > range->start) {
763 last_index = min_t(u64, inode->i_size - 1, 1029 last_index = min_t(u64, inode->i_size - 1,
764 range->start + range->len - 1) >> PAGE_CACHE_SHIFT; 1030 range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
@@ -766,11 +1032,37 @@ static int btrfs_defrag_file(struct file *file,
766 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; 1032 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
767 } 1033 }
768 1034
769 i = range->start >> PAGE_CACHE_SHIFT; 1035 if (newer_than) {
770 while (i <= last_index) { 1036 ret = find_new_extents(root, inode, newer_than,
771 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1037 &newer_off, 64 * 1024);
1038 if (!ret) {
1039 range->start = newer_off;
1040 /*
1041 * we always align our defrag to help keep
1042 * the extents in the file evenly spaced
1043 */
1044 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1045 newer_left = newer_cluster;
1046 } else
1047 goto out_ra;
1048 } else {
1049 i = range->start >> PAGE_CACHE_SHIFT;
1050 }
1051 if (!max_to_defrag)
1052 max_to_defrag = last_index - 1;
1053
1054 while (i <= last_index && defrag_count < max_to_defrag) {
1055 /*
1056 * make sure we stop running if someone unmounts
1057 * the FS
1058 */
1059 if (!(inode->i_sb->s_flags & MS_ACTIVE))
1060 break;
1061
1062 if (!newer_than &&
1063 !should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
772 PAGE_CACHE_SIZE, 1064 PAGE_CACHE_SIZE,
773 range->extent_thresh, 1065 extent_thresh,
774 &last_len, &skip, 1066 &last_len, &skip,
775 &defrag_end)) { 1067 &defrag_end)) {
776 unsigned long next; 1068 unsigned long next;
@@ -782,92 +1074,39 @@ static int btrfs_defrag_file(struct file *file,
782 i = max(i + 1, next); 1074 i = max(i + 1, next);
783 continue; 1075 continue;
784 } 1076 }
785
786 if (total_read % ra_pages == 0) {
787 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
788 min(last_index, i + ra_pages - 1));
789 }
790 total_read++;
791 mutex_lock(&inode->i_mutex);
792 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 1077 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
793 BTRFS_I(inode)->force_compress = compress_type; 1078 BTRFS_I(inode)->force_compress = compress_type;
794 1079
795 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 1080 btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster);
796 if (ret)
797 goto err_unlock;
798again:
799 if (inode->i_size == 0 ||
800 i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
801 ret = 0;
802 goto err_reservations;
803 }
804 1081
805 page = grab_cache_page(inode->i_mapping, i); 1082 ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster);
806 if (!page) { 1083 if (ret < 0)
807 ret = -ENOMEM; 1084 goto out_ra;
808 goto err_reservations;
809 }
810
811 if (!PageUptodate(page)) {
812 btrfs_readpage(NULL, page);
813 lock_page(page);
814 if (!PageUptodate(page)) {
815 unlock_page(page);
816 page_cache_release(page);
817 ret = -EIO;
818 goto err_reservations;
819 }
820 }
821
822 if (page->mapping != inode->i_mapping) {
823 unlock_page(page);
824 page_cache_release(page);
825 goto again;
826 }
827
828 wait_on_page_writeback(page);
829 1085
830 if (PageDirty(page)) { 1086 defrag_count += ret;
831 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 1087 balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
832 goto loop_unlock; 1088 i += ret;
833 }
834 1089
835 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 1090 if (newer_than) {
836 page_end = page_start + PAGE_CACHE_SIZE - 1; 1091 if (newer_off == (u64)-1)
837 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 1092 break;
838 1093
839 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1094 newer_off = max(newer_off + 1,
840 if (ordered) { 1095 (u64)i << PAGE_CACHE_SHIFT);
841 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 1096
842 unlock_page(page); 1097 ret = find_new_extents(root, inode,
843 page_cache_release(page); 1098 newer_than, &newer_off,
844 btrfs_start_ordered_extent(inode, ordered, 1); 1099 64 * 1024);
845 btrfs_put_ordered_extent(ordered); 1100 if (!ret) {
846 goto again; 1101 range->start = newer_off;
1102 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1103 newer_left = newer_cluster;
1104 } else {
1105 break;
1106 }
1107 } else {
1108 i++;
847 } 1109 }
848 set_page_extent_mapped(page);
849
850 /*
851 * this makes sure page_mkwrite is called on the
852 * page if it is dirtied again later
853 */
854 clear_page_dirty_for_io(page);
855 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start,
856 page_end, EXTENT_DIRTY | EXTENT_DELALLOC |
857 EXTENT_DO_ACCOUNTING, GFP_NOFS);
858
859 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
860 ClearPageChecked(page);
861 set_page_dirty(page);
862 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
863
864loop_unlock:
865 unlock_page(page);
866 page_cache_release(page);
867 mutex_unlock(&inode->i_mutex);
868
869 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
870 i++;
871 } 1110 }
872 1111
873 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) 1112 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
@@ -899,12 +1138,14 @@ loop_unlock:
899 btrfs_set_super_incompat_flags(disk_super, features); 1138 btrfs_set_super_incompat_flags(disk_super, features);
900 } 1139 }
901 1140
902 return 0; 1141 if (!file)
1142 kfree(ra);
1143 return defrag_count;
903 1144
904err_reservations: 1145out_ra:
905 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 1146 if (!file)
906err_unlock: 1147 kfree(ra);
907 mutex_unlock(&inode->i_mutex); 1148 kfree(pages);
908 return ret; 1149 return ret;
909} 1150}
910 1151
@@ -1756,7 +1997,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
1756 /* the rest are all set to zero by kzalloc */ 1997 /* the rest are all set to zero by kzalloc */
1757 range->len = (u64)-1; 1998 range->len = (u64)-1;
1758 } 1999 }
1759 ret = btrfs_defrag_file(file, range); 2000 ret = btrfs_defrag_file(fdentry(file)->d_inode, file,
2001 range, 0, 0);
2002 if (ret > 0)
2003 ret = 0;
1760 kfree(range); 2004 kfree(range);
1761 break; 2005 break;
1762 default: 2006 default: