diff options
| -rw-r--r-- | fs/ocfs2/ioctl.c | 5 | ||||
| -rw-r--r-- | fs/ocfs2/move_extents.c | 308 | ||||
| -rw-r--r-- | fs/ocfs2/move_extents.h | 2 |
3 files changed, 315 insertions, 0 deletions
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index fd248ed53df7..59100598b0cb 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include "dir.h" | 26 | #include "dir.h" |
| 27 | #include "buffer_head_io.h" | 27 | #include "buffer_head_io.h" |
| 28 | #include "suballoc.h" | 28 | #include "suballoc.h" |
| 29 | #include "move_extents.h" | ||
| 29 | 30 | ||
| 30 | #include <linux/ext2_fs.h> | 31 | #include <linux/ext2_fs.h> |
| 31 | 32 | ||
| @@ -951,6 +952,8 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 951 | return -EFAULT; | 952 | return -EFAULT; |
| 952 | 953 | ||
| 953 | return ocfs2_info_handle(inode, &info, 0); | 954 | return ocfs2_info_handle(inode, &info, 0); |
| 955 | case OCFS2_IOC_MOVE_EXT: | ||
| 956 | return ocfs2_ioctl_move_extents(filp, (void __user *)arg); | ||
| 954 | default: | 957 | default: |
| 955 | return -ENOTTY; | 958 | return -ENOTTY; |
| 956 | } | 959 | } |
| @@ -993,6 +996,8 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 993 | return -EFAULT; | 996 | return -EFAULT; |
| 994 | 997 | ||
| 995 | return ocfs2_info_handle(inode, &info, 1); | 998 | return ocfs2_info_handle(inode, &info, 1); |
| 999 | case OCFS2_IOC_MOVE_EXT: | ||
| 1000 | break; | ||
| 996 | default: | 1001 | default: |
| 997 | return -ENOIOCTLCMD; | 1002 | return -ENOIOCTLCMD; |
| 998 | } | 1003 | } |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 1c822e08fea0..800552168d8a 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
| @@ -827,3 +827,311 @@ static void ocfs2_calc_extent_defrag_len(u32 *alloc_size, u32 *len_defraged, | |||
| 827 | *len_defraged = 0; | 827 | *len_defraged = 0; |
| 828 | } | 828 | } |
| 829 | } | 829 | } |
| 830 | |||
| 831 | static int __ocfs2_move_extents_range(struct buffer_head *di_bh, | ||
| 832 | struct ocfs2_move_extents_context *context) | ||
| 833 | { | ||
| 834 | int ret = 0, flags, do_defrag, skip = 0; | ||
| 835 | u32 cpos, phys_cpos, move_start, len_to_move, alloc_size; | ||
| 836 | u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0; | ||
| 837 | |||
| 838 | struct inode *inode = context->inode; | ||
| 839 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 840 | struct ocfs2_move_extents *range = context->range; | ||
| 841 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 842 | |||
| 843 | if ((inode->i_size == 0) || (range->me_len == 0)) | ||
| 844 | return 0; | ||
| 845 | |||
| 846 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
| 847 | return 0; | ||
| 848 | |||
| 849 | context->refcount_loc = le64_to_cpu(di->i_refcount_loc); | ||
| 850 | |||
| 851 | ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh); | ||
| 852 | ocfs2_init_dealloc_ctxt(&context->dealloc); | ||
| 853 | |||
| 854 | /* | ||
| 855 | * TO-DO XXX: | ||
| 856 | * | ||
| 857 | * - xattr extents. | ||
| 858 | */ | ||
| 859 | |||
| 860 | do_defrag = context->auto_defrag; | ||
| 861 | |||
| 862 | /* | ||
| 863 | * extents moving happens in unit of clusters, for the sake | ||
| 864 | * of simplicity, we may ignore two clusters where 'byte_start' | ||
| 865 | * and 'byte_start + len' were within. | ||
| 866 | */ | ||
| 867 | move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start); | ||
| 868 | len_to_move = (range->me_start + range->me_len) >> | ||
| 869 | osb->s_clustersize_bits; | ||
| 870 | if (len_to_move >= move_start) | ||
| 871 | len_to_move -= move_start; | ||
| 872 | else | ||
| 873 | len_to_move = 0; | ||
| 874 | |||
| 875 | if (do_defrag) | ||
| 876 | defrag_thresh = range->me_threshold >> osb->s_clustersize_bits; | ||
| 877 | else | ||
| 878 | new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, | ||
| 879 | range->me_goal); | ||
| 880 | |||
| 881 | mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u, " | ||
| 882 | "thresh: %u\n", | ||
| 883 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 884 | (unsigned long long)range->me_start, | ||
| 885 | (unsigned long long)range->me_len, | ||
| 886 | move_start, len_to_move, defrag_thresh); | ||
| 887 | |||
| 888 | cpos = move_start; | ||
| 889 | while (len_to_move) { | ||
| 890 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &alloc_size, | ||
| 891 | &flags); | ||
| 892 | if (ret) { | ||
| 893 | mlog_errno(ret); | ||
| 894 | goto out; | ||
| 895 | } | ||
| 896 | |||
| 897 | if (alloc_size > len_to_move) | ||
| 898 | alloc_size = len_to_move; | ||
| 899 | |||
| 900 | /* | ||
| 901 | * XXX: how to deal with a hole: | ||
| 902 | * | ||
| 903 | * - skip the hole of course | ||
| 904 | * - force a new defragmentation | ||
| 905 | */ | ||
| 906 | if (!phys_cpos) { | ||
| 907 | if (do_defrag) | ||
| 908 | len_defraged = 0; | ||
| 909 | |||
| 910 | goto next; | ||
| 911 | } | ||
| 912 | |||
| 913 | if (do_defrag) { | ||
| 914 | ocfs2_calc_extent_defrag_len(&alloc_size, &len_defraged, | ||
| 915 | defrag_thresh, &skip); | ||
| 916 | /* | ||
| 917 | * skip large extents | ||
| 918 | */ | ||
| 919 | if (skip) { | ||
| 920 | skip = 0; | ||
| 921 | goto next; | ||
| 922 | } | ||
| 923 | |||
| 924 | mlog(0, "#Defrag: cpos: %u, phys_cpos: %u, " | ||
| 925 | "alloc_size: %u, len_defraged: %u\n", | ||
| 926 | cpos, phys_cpos, alloc_size, len_defraged); | ||
| 927 | |||
| 928 | ret = ocfs2_defrag_extent(context, cpos, phys_cpos, | ||
| 929 | alloc_size, flags); | ||
| 930 | } else { | ||
| 931 | ret = ocfs2_move_extent(context, cpos, phys_cpos, | ||
| 932 | &new_phys_cpos, alloc_size, | ||
| 933 | flags); | ||
| 934 | |||
| 935 | new_phys_cpos += alloc_size; | ||
| 936 | } | ||
| 937 | |||
| 938 | if (ret < 0) { | ||
| 939 | mlog_errno(ret); | ||
| 940 | goto out; | ||
| 941 | } | ||
| 942 | |||
| 943 | context->clusters_moved += alloc_size; | ||
| 944 | next: | ||
| 945 | cpos += alloc_size; | ||
| 946 | len_to_move -= alloc_size; | ||
| 947 | } | ||
| 948 | |||
| 949 | range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE; | ||
| 950 | |||
| 951 | out: | ||
| 952 | range->me_moved_len = ocfs2_clusters_to_bytes(osb->sb, | ||
| 953 | context->clusters_moved); | ||
| 954 | range->me_new_offset = ocfs2_clusters_to_bytes(osb->sb, | ||
| 955 | context->new_phys_cpos); | ||
| 956 | |||
| 957 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
| 958 | ocfs2_run_deallocs(osb, &context->dealloc); | ||
| 959 | |||
| 960 | return ret; | ||
| 961 | } | ||
| 962 | |||
| 963 | static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) | ||
| 964 | { | ||
| 965 | int status; | ||
| 966 | handle_t *handle; | ||
| 967 | struct inode *inode = context->inode; | ||
| 968 | struct ocfs2_dinode *di; | ||
| 969 | struct buffer_head *di_bh = NULL; | ||
| 970 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 971 | |||
| 972 | if (!inode) | ||
| 973 | return -ENOENT; | ||
| 974 | |||
| 975 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
| 976 | return -EROFS; | ||
| 977 | |||
| 978 | mutex_lock(&inode->i_mutex); | ||
| 979 | |||
| 980 | /* | ||
| 981 | * This prevents concurrent writes from other nodes | ||
| 982 | */ | ||
| 983 | status = ocfs2_rw_lock(inode, 1); | ||
| 984 | if (status) { | ||
| 985 | mlog_errno(status); | ||
| 986 | goto out; | ||
| 987 | } | ||
| 988 | |||
| 989 | status = ocfs2_inode_lock(inode, &di_bh, 1); | ||
| 990 | if (status) { | ||
| 991 | mlog_errno(status); | ||
| 992 | goto out_rw_unlock; | ||
| 993 | } | ||
| 994 | |||
| 995 | /* | ||
| 996 | * rememer ip_xattr_sem also needs to be held if necessary | ||
| 997 | */ | ||
| 998 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 999 | |||
| 1000 | status = __ocfs2_move_extents_range(di_bh, context); | ||
| 1001 | |||
| 1002 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1003 | if (status) { | ||
| 1004 | mlog_errno(status); | ||
| 1005 | goto out_inode_unlock; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | /* | ||
| 1009 | * We update ctime for these changes | ||
| 1010 | */ | ||
| 1011 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
| 1012 | if (IS_ERR(handle)) { | ||
| 1013 | status = PTR_ERR(handle); | ||
| 1014 | mlog_errno(status); | ||
| 1015 | goto out_inode_unlock; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
| 1019 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 1020 | if (status) { | ||
| 1021 | mlog_errno(status); | ||
| 1022 | goto out_commit; | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 1026 | inode->i_ctime = CURRENT_TIME; | ||
| 1027 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
| 1028 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
| 1029 | |||
| 1030 | ocfs2_journal_dirty(handle, di_bh); | ||
| 1031 | |||
| 1032 | out_commit: | ||
| 1033 | ocfs2_commit_trans(osb, handle); | ||
| 1034 | |||
| 1035 | out_inode_unlock: | ||
| 1036 | brelse(di_bh); | ||
| 1037 | ocfs2_inode_unlock(inode, 1); | ||
| 1038 | out_rw_unlock: | ||
| 1039 | ocfs2_rw_unlock(inode, 1); | ||
| 1040 | out: | ||
| 1041 | mutex_unlock(&inode->i_mutex); | ||
| 1042 | |||
| 1043 | return status; | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) | ||
| 1047 | { | ||
| 1048 | int status; | ||
| 1049 | |||
| 1050 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
| 1051 | struct ocfs2_move_extents range; | ||
| 1052 | struct ocfs2_move_extents_context *context = NULL; | ||
| 1053 | |||
| 1054 | status = mnt_want_write(filp->f_path.mnt); | ||
| 1055 | if (status) | ||
| 1056 | return status; | ||
| 1057 | |||
| 1058 | if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) | ||
| 1059 | goto out; | ||
| 1060 | |||
| 1061 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | ||
| 1062 | status = -EPERM; | ||
| 1063 | goto out; | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS); | ||
| 1067 | if (!context) { | ||
| 1068 | status = -ENOMEM; | ||
| 1069 | mlog_errno(status); | ||
| 1070 | goto out; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | context->inode = inode; | ||
| 1074 | context->file = filp; | ||
| 1075 | |||
| 1076 | if (argp) { | ||
| 1077 | if (copy_from_user(&range, (struct ocfs2_move_extents *)argp, | ||
| 1078 | sizeof(range))) { | ||
| 1079 | status = -EFAULT; | ||
| 1080 | goto out; | ||
| 1081 | } | ||
| 1082 | } else { | ||
| 1083 | status = -EINVAL; | ||
| 1084 | goto out; | ||
| 1085 | } | ||
| 1086 | |||
| 1087 | if (range.me_start > i_size_read(inode)) | ||
| 1088 | goto out; | ||
| 1089 | |||
| 1090 | if (range.me_start + range.me_len > i_size_read(inode)) | ||
| 1091 | range.me_len = i_size_read(inode) - range.me_start; | ||
| 1092 | |||
| 1093 | context->range = ⦥ | ||
| 1094 | |||
| 1095 | if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) { | ||
| 1096 | context->auto_defrag = 1; | ||
| 1097 | if (!range.me_threshold) | ||
| 1098 | /* | ||
| 1099 | * ok, the default theshold for the defragmentation | ||
| 1100 | * is 1M, since our maximum clustersize was 1M also. | ||
| 1101 | * any thought? | ||
| 1102 | */ | ||
| 1103 | range.me_threshold = 1024 * 1024; | ||
| 1104 | } else { | ||
| 1105 | /* | ||
| 1106 | * first best-effort attempt to validate and adjust the goal | ||
| 1107 | * (physical address in block), while it can't guarantee later | ||
| 1108 | * operation can succeed all the time since global_bitmap may | ||
| 1109 | * change a bit over time. | ||
| 1110 | */ | ||
| 1111 | |||
| 1112 | status = ocfs2_validate_and_adjust_move_goal(inode, &range); | ||
| 1113 | if (status) | ||
| 1114 | goto out; | ||
| 1115 | } | ||
| 1116 | |||
| 1117 | status = ocfs2_move_extents(context); | ||
| 1118 | if (status) | ||
| 1119 | mlog_errno(status); | ||
| 1120 | out: | ||
| 1121 | /* | ||
| 1122 | * movement/defragmentation may end up being partially completed, | ||
| 1123 | * that's the reason why we need to return userspace the finished | ||
| 1124 | * length and new_offset even if failure happens somewhere. | ||
| 1125 | */ | ||
| 1126 | if (argp) { | ||
| 1127 | if (copy_to_user((struct ocfs2_move_extents *)argp, &range, | ||
| 1128 | sizeof(range))) | ||
| 1129 | status = -EFAULT; | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | kfree(context); | ||
| 1133 | |||
| 1134 | mnt_drop_write(filp->f_path.mnt); | ||
| 1135 | |||
| 1136 | return status; | ||
| 1137 | } | ||
diff --git a/fs/ocfs2/move_extents.h b/fs/ocfs2/move_extents.h index 27570f7f6909..4e143e811441 100644 --- a/fs/ocfs2/move_extents.h +++ b/fs/ocfs2/move_extents.h | |||
| @@ -17,4 +17,6 @@ | |||
| 17 | #ifndef OCFS2_MOVE_EXTENTS_H | 17 | #ifndef OCFS2_MOVE_EXTENTS_H |
| 18 | #define OCFS2_MOVE_EXTENTS_H | 18 | #define OCFS2_MOVE_EXTENTS_H |
| 19 | 19 | ||
| 20 | int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp); | ||
| 21 | |||
| 20 | #endif /* OCFS2_MOVE_EXTENTS_H */ | 22 | #endif /* OCFS2_MOVE_EXTENTS_H */ |
