aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTristan Ye <tristan.ye@oracle.com>2011-05-25 02:23:43 -0400
committerTristan Ye <tristan.ye@oracle.com>2011-05-25 03:17:12 -0400
commit53069d4e76954e2e63c1b3c501051c6fbcf7298c (patch)
tree6b906eb22fef78636c7d4db7120370d7715f5aef
parentee16cc037e255801892481a2d0b7c1fff2adf1aa (diff)
Ocfs2/move_extents: move/defrag extents within a certain range.
the basic logic of moving extents for a file is pretty like punching-hole sequence, walk the extents within the range as user specified, calculating an appropriate len to defrag/move, then let ocfs2_defrag/move_extent() to do the actual moving. This func ends up setting 'OCFS2_MOVE_EXT_FL_COMPLETE' to userpace if operation gets done successfully. Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
-rw-r--r--fs/ocfs2/ioctl.c5
-rw-r--r--fs/ocfs2/move_extents.c308
-rw-r--r--fs/ocfs2/move_extents.h2
3 files changed, 315 insertions, 0 deletions
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index fd248ed53df7..59100598b0cb 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -26,6 +26,7 @@
26#include "dir.h" 26#include "dir.h"
27#include "buffer_head_io.h" 27#include "buffer_head_io.h"
28#include "suballoc.h" 28#include "suballoc.h"
29#include "move_extents.h"
29 30
30#include <linux/ext2_fs.h> 31#include <linux/ext2_fs.h>
31 32
@@ -951,6 +952,8 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
951 return -EFAULT; 952 return -EFAULT;
952 953
953 return ocfs2_info_handle(inode, &info, 0); 954 return ocfs2_info_handle(inode, &info, 0);
955 case OCFS2_IOC_MOVE_EXT:
956 return ocfs2_ioctl_move_extents(filp, (void __user *)arg);
954 default: 957 default:
955 return -ENOTTY; 958 return -ENOTTY;
956 } 959 }
@@ -993,6 +996,8 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
993 return -EFAULT; 996 return -EFAULT;
994 997
995 return ocfs2_info_handle(inode, &info, 1); 998 return ocfs2_info_handle(inode, &info, 1);
999 case OCFS2_IOC_MOVE_EXT:
1000 break;
996 default: 1001 default:
997 return -ENOIOCTLCMD; 1002 return -ENOIOCTLCMD;
998 } 1003 }
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 1c822e08fea0..800552168d8a 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -827,3 +827,311 @@ static void ocfs2_calc_extent_defrag_len(u32 *alloc_size, u32 *len_defraged,
827 *len_defraged = 0; 827 *len_defraged = 0;
828 } 828 }
829} 829}
830
831static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
832 struct ocfs2_move_extents_context *context)
833{
834 int ret = 0, flags, do_defrag, skip = 0;
835 u32 cpos, phys_cpos, move_start, len_to_move, alloc_size;
836 u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0;
837
838 struct inode *inode = context->inode;
839 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
840 struct ocfs2_move_extents *range = context->range;
841 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
842
843 if ((inode->i_size == 0) || (range->me_len == 0))
844 return 0;
845
846 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
847 return 0;
848
849 context->refcount_loc = le64_to_cpu(di->i_refcount_loc);
850
851 ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh);
852 ocfs2_init_dealloc_ctxt(&context->dealloc);
853
854 /*
855 * TO-DO XXX:
856 *
857 * - xattr extents.
858 */
859
860 do_defrag = context->auto_defrag;
861
862 /*
863 * extents moving happens in unit of clusters, for the sake
864 * of simplicity, we may ignore two clusters where 'byte_start'
865 * and 'byte_start + len' were within.
866 */
867 move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start);
868 len_to_move = (range->me_start + range->me_len) >>
869 osb->s_clustersize_bits;
870 if (len_to_move >= move_start)
871 len_to_move -= move_start;
872 else
873 len_to_move = 0;
874
875 if (do_defrag)
876 defrag_thresh = range->me_threshold >> osb->s_clustersize_bits;
877 else
878 new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb,
879 range->me_goal);
880
881 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u, "
882 "thresh: %u\n",
883 (unsigned long long)OCFS2_I(inode)->ip_blkno,
884 (unsigned long long)range->me_start,
885 (unsigned long long)range->me_len,
886 move_start, len_to_move, defrag_thresh);
887
888 cpos = move_start;
889 while (len_to_move) {
890 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &alloc_size,
891 &flags);
892 if (ret) {
893 mlog_errno(ret);
894 goto out;
895 }
896
897 if (alloc_size > len_to_move)
898 alloc_size = len_to_move;
899
900 /*
901 * XXX: how to deal with a hole:
902 *
903 * - skip the hole of course
904 * - force a new defragmentation
905 */
906 if (!phys_cpos) {
907 if (do_defrag)
908 len_defraged = 0;
909
910 goto next;
911 }
912
913 if (do_defrag) {
914 ocfs2_calc_extent_defrag_len(&alloc_size, &len_defraged,
915 defrag_thresh, &skip);
916 /*
917 * skip large extents
918 */
919 if (skip) {
920 skip = 0;
921 goto next;
922 }
923
924 mlog(0, "#Defrag: cpos: %u, phys_cpos: %u, "
925 "alloc_size: %u, len_defraged: %u\n",
926 cpos, phys_cpos, alloc_size, len_defraged);
927
928 ret = ocfs2_defrag_extent(context, cpos, phys_cpos,
929 alloc_size, flags);
930 } else {
931 ret = ocfs2_move_extent(context, cpos, phys_cpos,
932 &new_phys_cpos, alloc_size,
933 flags);
934
935 new_phys_cpos += alloc_size;
936 }
937
938 if (ret < 0) {
939 mlog_errno(ret);
940 goto out;
941 }
942
943 context->clusters_moved += alloc_size;
944next:
945 cpos += alloc_size;
946 len_to_move -= alloc_size;
947 }
948
949 range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE;
950
951out:
952 range->me_moved_len = ocfs2_clusters_to_bytes(osb->sb,
953 context->clusters_moved);
954 range->me_new_offset = ocfs2_clusters_to_bytes(osb->sb,
955 context->new_phys_cpos);
956
957 ocfs2_schedule_truncate_log_flush(osb, 1);
958 ocfs2_run_deallocs(osb, &context->dealloc);
959
960 return ret;
961}
962
963static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
964{
965 int status;
966 handle_t *handle;
967 struct inode *inode = context->inode;
968 struct ocfs2_dinode *di;
969 struct buffer_head *di_bh = NULL;
970 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
971
972 if (!inode)
973 return -ENOENT;
974
975 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
976 return -EROFS;
977
978 mutex_lock(&inode->i_mutex);
979
980 /*
981 * This prevents concurrent writes from other nodes
982 */
983 status = ocfs2_rw_lock(inode, 1);
984 if (status) {
985 mlog_errno(status);
986 goto out;
987 }
988
989 status = ocfs2_inode_lock(inode, &di_bh, 1);
990 if (status) {
991 mlog_errno(status);
992 goto out_rw_unlock;
993 }
994
995 /*
996 * rememer ip_xattr_sem also needs to be held if necessary
997 */
998 down_write(&OCFS2_I(inode)->ip_alloc_sem);
999
1000 status = __ocfs2_move_extents_range(di_bh, context);
1001
1002 up_write(&OCFS2_I(inode)->ip_alloc_sem);
1003 if (status) {
1004 mlog_errno(status);
1005 goto out_inode_unlock;
1006 }
1007
1008 /*
1009 * We update ctime for these changes
1010 */
1011 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1012 if (IS_ERR(handle)) {
1013 status = PTR_ERR(handle);
1014 mlog_errno(status);
1015 goto out_inode_unlock;
1016 }
1017
1018 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
1019 OCFS2_JOURNAL_ACCESS_WRITE);
1020 if (status) {
1021 mlog_errno(status);
1022 goto out_commit;
1023 }
1024
1025 di = (struct ocfs2_dinode *)di_bh->b_data;
1026 inode->i_ctime = CURRENT_TIME;
1027 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1028 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1029
1030 ocfs2_journal_dirty(handle, di_bh);
1031
1032out_commit:
1033 ocfs2_commit_trans(osb, handle);
1034
1035out_inode_unlock:
1036 brelse(di_bh);
1037 ocfs2_inode_unlock(inode, 1);
1038out_rw_unlock:
1039 ocfs2_rw_unlock(inode, 1);
1040out:
1041 mutex_unlock(&inode->i_mutex);
1042
1043 return status;
1044}
1045
1046int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
1047{
1048 int status;
1049
1050 struct inode *inode = filp->f_path.dentry->d_inode;
1051 struct ocfs2_move_extents range;
1052 struct ocfs2_move_extents_context *context = NULL;
1053
1054 status = mnt_want_write(filp->f_path.mnt);
1055 if (status)
1056 return status;
1057
1058 if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE))
1059 goto out;
1060
1061 if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
1062 status = -EPERM;
1063 goto out;
1064 }
1065
1066 context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS);
1067 if (!context) {
1068 status = -ENOMEM;
1069 mlog_errno(status);
1070 goto out;
1071 }
1072
1073 context->inode = inode;
1074 context->file = filp;
1075
1076 if (argp) {
1077 if (copy_from_user(&range, (struct ocfs2_move_extents *)argp,
1078 sizeof(range))) {
1079 status = -EFAULT;
1080 goto out;
1081 }
1082 } else {
1083 status = -EINVAL;
1084 goto out;
1085 }
1086
1087 if (range.me_start > i_size_read(inode))
1088 goto out;
1089
1090 if (range.me_start + range.me_len > i_size_read(inode))
1091 range.me_len = i_size_read(inode) - range.me_start;
1092
1093 context->range = &range;
1094
1095 if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
1096 context->auto_defrag = 1;
1097 if (!range.me_threshold)
1098 /*
1099 * ok, the default theshold for the defragmentation
1100 * is 1M, since our maximum clustersize was 1M also.
1101 * any thought?
1102 */
1103 range.me_threshold = 1024 * 1024;
1104 } else {
1105 /*
1106 * first best-effort attempt to validate and adjust the goal
1107 * (physical address in block), while it can't guarantee later
1108 * operation can succeed all the time since global_bitmap may
1109 * change a bit over time.
1110 */
1111
1112 status = ocfs2_validate_and_adjust_move_goal(inode, &range);
1113 if (status)
1114 goto out;
1115 }
1116
1117 status = ocfs2_move_extents(context);
1118 if (status)
1119 mlog_errno(status);
1120out:
1121 /*
1122 * movement/defragmentation may end up being partially completed,
1123 * that's the reason why we need to return userspace the finished
1124 * length and new_offset even if failure happens somewhere.
1125 */
1126 if (argp) {
1127 if (copy_to_user((struct ocfs2_move_extents *)argp, &range,
1128 sizeof(range)))
1129 status = -EFAULT;
1130 }
1131
1132 kfree(context);
1133
1134 mnt_drop_write(filp->f_path.mnt);
1135
1136 return status;
1137}
diff --git a/fs/ocfs2/move_extents.h b/fs/ocfs2/move_extents.h
index 27570f7f6909..4e143e811441 100644
--- a/fs/ocfs2/move_extents.h
+++ b/fs/ocfs2/move_extents.h
@@ -17,4 +17,6 @@
17#ifndef OCFS2_MOVE_EXTENTS_H 17#ifndef OCFS2_MOVE_EXTENTS_H
18#define OCFS2_MOVE_EXTENTS_H 18#define OCFS2_MOVE_EXTENTS_H
19 19
20int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp);
21
20#endif /* OCFS2_MOVE_EXTENTS_H */ 22#endif /* OCFS2_MOVE_EXTENTS_H */