aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_reflink.c
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2016-10-03 12:11:40 -0400
committerDarrick J. Wong <darrick.wong@oracle.com>2016-10-05 19:26:05 -0400
commit862bb360ef569f625bcf700ae4b162a9c8fa9bba (patch)
treed643c7e7bb8850a33a74cfc8051bda55620b5265 /fs/xfs/xfs_reflink.c
parent174edb0e46e520230791a1a894397b7c824cefc4 (diff)
xfs: reflink extents from one file to another
Reflink extents from one file to another; that is to say, iteratively remove the mappings from the destination file, copy the mappings from the source file to the destination file, and increment the reference count of all the blocks that got remapped. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/xfs/xfs_reflink.c')
-rw-r--r--fs/xfs/xfs_reflink.c428
1 files changed, 428 insertions, 0 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 0aac26208a82..c1e98a43a937 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -792,3 +792,431 @@ xfs_reflink_recover_cow(
792 792
793 return error; 793 return error;
794} 794}
795
796/*
797 * Reflinking (Block) Ranges of Two Files Together
798 *
799 * First, ensure that the reflink flag is set on both inodes. The flag is an
800 * optimization to avoid unnecessary refcount btree lookups in the write path.
801 *
802 * Now we can iteratively remap the range of extents (and holes) in src to the
803 * corresponding ranges in dest. Let drange and srange denote the ranges of
804 * logical blocks in dest and src touched by the reflink operation.
805 *
806 * While the length of drange is greater than zero,
807 * - Read src's bmbt at the start of srange ("imap")
808 * - If imap doesn't exist, make imap appear to start at the end of srange
809 * with zero length.
810 * - If imap starts before srange, advance imap to start at srange.
811 * - If imap goes beyond srange, truncate imap to end at the end of srange.
812 * - Punch (imap start - srange start + imap len) blocks from dest at
813 * offset (drange start).
814 * - If imap points to a real range of pblks,
815 * > Increase the refcount of the imap's pblks
816 * > Map imap's pblks into dest at the offset
817 * (drange start + imap start - srange start)
818 * - Advance drange and srange by (imap start - srange start + imap len)
819 *
820 * Finally, if the reflink made dest longer, update both the in-core and
821 * on-disk file sizes.
822 *
823 * ASCII Art Demonstration:
824 *
825 * Let's say we want to reflink this source file:
826 *
827 * ----SSSSSSS-SSSSS----SSSSSS (src file)
828 * <-------------------->
829 *
830 * into this destination file:
831 *
832 * --DDDDDDDDDDDDDDDDDDD--DDD (dest file)
833 * <-------------------->
834 * '-' means a hole, and 'S' and 'D' are written blocks in the src and dest.
835 * Observe that the range has different logical offsets in either file.
836 *
837 * Consider that the first extent in the source file doesn't line up with our
838 * reflink range. Unmapping and remapping are separate operations, so we can
839 * unmap more blocks from the destination file than we remap.
840 *
841 * ----SSSSSSS-SSSSS----SSSSSS
842 * <------->
843 * --DDDDD---------DDDDD--DDD
844 * <------->
845 *
846 * Now remap the source extent into the destination file:
847 *
848 * ----SSSSSSS-SSSSS----SSSSSS
849 * <------->
850 * --DDDDD--SSSSSSSDDDDD--DDD
851 * <------->
852 *
853 * Do likewise with the second hole and extent in our range. Holes in the
854 * unmap range don't affect our operation.
855 *
856 * ----SSSSSSS-SSSSS----SSSSSS
857 * <---->
858 * --DDDDD--SSSSSSS-SSSSS-DDD
859 * <---->
860 *
861 * Finally, unmap and remap part of the third extent. This will increase the
862 * size of the destination file.
863 *
864 * ----SSSSSSS-SSSSS----SSSSSS
865 * <----->
866 * --DDDDD--SSSSSSS-SSSSS----SSS
867 * <----->
868 *
869 * Once we update the destination file's i_size, we're done.
870 */
871
872/*
873 * Ensure the reflink bit is set in both inodes.
874 */
875STATIC int
876xfs_reflink_set_inode_flag(
877 struct xfs_inode *src,
878 struct xfs_inode *dest)
879{
880 struct xfs_mount *mp = src->i_mount;
881 int error;
882 struct xfs_trans *tp;
883
884 if (xfs_is_reflink_inode(src) && xfs_is_reflink_inode(dest))
885 return 0;
886
887 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
888 if (error)
889 goto out_error;
890
891 /* Lock both files against IO */
892 if (src->i_ino == dest->i_ino)
893 xfs_ilock(src, XFS_ILOCK_EXCL);
894 else
895 xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL);
896
897 if (!xfs_is_reflink_inode(src)) {
898 trace_xfs_reflink_set_inode_flag(src);
899 xfs_trans_ijoin(tp, src, XFS_ILOCK_EXCL);
900 src->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK;
901 xfs_trans_log_inode(tp, src, XFS_ILOG_CORE);
902 xfs_ifork_init_cow(src);
903 } else
904 xfs_iunlock(src, XFS_ILOCK_EXCL);
905
906 if (src->i_ino == dest->i_ino)
907 goto commit_flags;
908
909 if (!xfs_is_reflink_inode(dest)) {
910 trace_xfs_reflink_set_inode_flag(dest);
911 xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL);
912 dest->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK;
913 xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
914 xfs_ifork_init_cow(dest);
915 } else
916 xfs_iunlock(dest, XFS_ILOCK_EXCL);
917
918commit_flags:
919 error = xfs_trans_commit(tp);
920 if (error)
921 goto out_error;
922 return error;
923
924out_error:
925 trace_xfs_reflink_set_inode_flag_error(dest, error, _RET_IP_);
926 return error;
927}
928
929/*
930 * Update destination inode size, if necessary.
931 */
932STATIC int
933xfs_reflink_update_dest(
934 struct xfs_inode *dest,
935 xfs_off_t newlen)
936{
937 struct xfs_mount *mp = dest->i_mount;
938 struct xfs_trans *tp;
939 int error;
940
941 if (newlen <= i_size_read(VFS_I(dest)))
942 return 0;
943
944 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
945 if (error)
946 goto out_error;
947
948 xfs_ilock(dest, XFS_ILOCK_EXCL);
949 xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL);
950
951 trace_xfs_reflink_update_inode_size(dest, newlen);
952 i_size_write(VFS_I(dest), newlen);
953 dest->i_d.di_size = newlen;
954 xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
955
956 error = xfs_trans_commit(tp);
957 if (error)
958 goto out_error;
959 return error;
960
961out_error:
962 trace_xfs_reflink_update_inode_size_error(dest, error, _RET_IP_);
963 return error;
964}
965
966/*
967 * Unmap a range of blocks from a file, then map other blocks into the hole.
968 * The range to unmap is (destoff : destoff + srcioff + irec->br_blockcount).
969 * The extent irec is mapped into dest at irec->br_startoff.
970 */
971STATIC int
972xfs_reflink_remap_extent(
973 struct xfs_inode *ip,
974 struct xfs_bmbt_irec *irec,
975 xfs_fileoff_t destoff,
976 xfs_off_t new_isize)
977{
978 struct xfs_mount *mp = ip->i_mount;
979 struct xfs_trans *tp;
980 xfs_fsblock_t firstfsb;
981 unsigned int resblks;
982 struct xfs_defer_ops dfops;
983 struct xfs_bmbt_irec uirec;
984 bool real_extent;
985 xfs_filblks_t rlen;
986 xfs_filblks_t unmap_len;
987 xfs_off_t newlen;
988 int error;
989
990 unmap_len = irec->br_startoff + irec->br_blockcount - destoff;
991 trace_xfs_reflink_punch_range(ip, destoff, unmap_len);
992
993 /* Only remap normal extents. */
994 real_extent = (irec->br_startblock != HOLESTARTBLOCK &&
995 irec->br_startblock != DELAYSTARTBLOCK &&
996 !ISUNWRITTEN(irec));
997
998 /* Start a rolling transaction to switch the mappings */
999 resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
1000 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
1001 if (error)
1002 goto out;
1003
1004 xfs_ilock(ip, XFS_ILOCK_EXCL);
1005 xfs_trans_ijoin(tp, ip, 0);
1006
1007 /* If we're not just clearing space, then do we have enough quota? */
1008 if (real_extent) {
1009 error = xfs_trans_reserve_quota_nblks(tp, ip,
1010 irec->br_blockcount, 0, XFS_QMOPT_RES_REGBLKS);
1011 if (error)
1012 goto out_cancel;
1013 }
1014
1015 trace_xfs_reflink_remap(ip, irec->br_startoff,
1016 irec->br_blockcount, irec->br_startblock);
1017
1018 /* Unmap the old blocks in the data fork. */
1019 rlen = unmap_len;
1020 while (rlen) {
1021 xfs_defer_init(&dfops, &firstfsb);
1022 error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1,
1023 &firstfsb, &dfops);
1024 if (error)
1025 goto out_defer;
1026
1027 /*
1028 * Trim the extent to whatever got unmapped.
1029 * Remember, bunmapi works backwards.
1030 */
1031 uirec.br_startblock = irec->br_startblock + rlen;
1032 uirec.br_startoff = irec->br_startoff + rlen;
1033 uirec.br_blockcount = unmap_len - rlen;
1034 unmap_len = rlen;
1035
1036 /* If this isn't a real mapping, we're done. */
1037 if (!real_extent || uirec.br_blockcount == 0)
1038 goto next_extent;
1039
1040 trace_xfs_reflink_remap(ip, uirec.br_startoff,
1041 uirec.br_blockcount, uirec.br_startblock);
1042
1043 /* Update the refcount tree */
1044 error = xfs_refcount_increase_extent(mp, &dfops, &uirec);
1045 if (error)
1046 goto out_defer;
1047
1048 /* Map the new blocks into the data fork. */
1049 error = xfs_bmap_map_extent(mp, &dfops, ip, &uirec);
1050 if (error)
1051 goto out_defer;
1052
1053 /* Update quota accounting. */
1054 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT,
1055 uirec.br_blockcount);
1056
1057 /* Update dest isize if needed. */
1058 newlen = XFS_FSB_TO_B(mp,
1059 uirec.br_startoff + uirec.br_blockcount);
1060 newlen = min_t(xfs_off_t, newlen, new_isize);
1061 if (newlen > i_size_read(VFS_I(ip))) {
1062 trace_xfs_reflink_update_inode_size(ip, newlen);
1063 i_size_write(VFS_I(ip), newlen);
1064 ip->i_d.di_size = newlen;
1065 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1066 }
1067
1068next_extent:
1069 /* Process all the deferred stuff. */
1070 error = xfs_defer_finish(&tp, &dfops, ip);
1071 if (error)
1072 goto out_defer;
1073 }
1074
1075 error = xfs_trans_commit(tp);
1076 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1077 if (error)
1078 goto out;
1079 return 0;
1080
1081out_defer:
1082 xfs_defer_cancel(&dfops);
1083out_cancel:
1084 xfs_trans_cancel(tp);
1085 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1086out:
1087 trace_xfs_reflink_remap_extent_error(ip, error, _RET_IP_);
1088 return error;
1089}
1090
1091/*
1092 * Iteratively remap one file's extents (and holes) to another's.
1093 */
1094STATIC int
1095xfs_reflink_remap_blocks(
1096 struct xfs_inode *src,
1097 xfs_fileoff_t srcoff,
1098 struct xfs_inode *dest,
1099 xfs_fileoff_t destoff,
1100 xfs_filblks_t len,
1101 xfs_off_t new_isize)
1102{
1103 struct xfs_bmbt_irec imap;
1104 int nimaps;
1105 int error = 0;
1106 xfs_filblks_t range_len;
1107
1108 /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
1109 while (len) {
1110 trace_xfs_reflink_remap_blocks_loop(src, srcoff, len,
1111 dest, destoff);
1112 /* Read extent from the source file */
1113 nimaps = 1;
1114 xfs_ilock(src, XFS_ILOCK_EXCL);
1115 error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
1116 xfs_iunlock(src, XFS_ILOCK_EXCL);
1117 if (error)
1118 goto err;
1119 ASSERT(nimaps == 1);
1120
1121 trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE,
1122 &imap);
1123
1124 /* Translate imap into the destination file. */
1125 range_len = imap.br_startoff + imap.br_blockcount - srcoff;
1126 imap.br_startoff += destoff - srcoff;
1127
1128 /* Clear dest from destoff to the end of imap and map it in. */
1129 error = xfs_reflink_remap_extent(dest, &imap, destoff,
1130 new_isize);
1131 if (error)
1132 goto err;
1133
1134 if (fatal_signal_pending(current)) {
1135 error = -EINTR;
1136 goto err;
1137 }
1138
1139 /* Advance drange/srange */
1140 srcoff += range_len;
1141 destoff += range_len;
1142 len -= range_len;
1143 }
1144
1145 return 0;
1146
1147err:
1148 trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
1149 return error;
1150}
1151
1152/*
1153 * Link a range of blocks from one file to another.
1154 */
1155int
1156xfs_reflink_remap_range(
1157 struct xfs_inode *src,
1158 xfs_off_t srcoff,
1159 struct xfs_inode *dest,
1160 xfs_off_t destoff,
1161 xfs_off_t len)
1162{
1163 struct xfs_mount *mp = src->i_mount;
1164 xfs_fileoff_t sfsbno, dfsbno;
1165 xfs_filblks_t fsblen;
1166 int error;
1167
1168 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1169 return -EOPNOTSUPP;
1170
1171 if (XFS_FORCED_SHUTDOWN(mp))
1172 return -EIO;
1173
1174 /* Don't reflink realtime inodes */
1175 if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
1176 return -EINVAL;
1177
1178 trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
1179
1180 /* Lock both files against IO */
1181 if (src->i_ino == dest->i_ino) {
1182 xfs_ilock(src, XFS_IOLOCK_EXCL);
1183 xfs_ilock(src, XFS_MMAPLOCK_EXCL);
1184 } else {
1185 xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL);
1186 xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
1187 }
1188
1189 error = xfs_reflink_set_inode_flag(src, dest);
1190 if (error)
1191 goto out_error;
1192
1193 /*
1194 * Invalidate the page cache so that we can clear any CoW mappings
1195 * in the destination file.
1196 */
1197 truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff,
1198 PAGE_ALIGN(destoff + len) - 1);
1199
1200 dfsbno = XFS_B_TO_FSBT(mp, destoff);
1201 sfsbno = XFS_B_TO_FSBT(mp, srcoff);
1202 fsblen = XFS_B_TO_FSB(mp, len);
1203 error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
1204 destoff + len);
1205 if (error)
1206 goto out_error;
1207
1208 error = xfs_reflink_update_dest(dest, destoff + len);
1209 if (error)
1210 goto out_error;
1211
1212out_error:
1213 xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
1214 xfs_iunlock(src, XFS_IOLOCK_EXCL);
1215 if (src->i_ino != dest->i_ino) {
1216 xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
1217 xfs_iunlock(dest, XFS_IOLOCK_EXCL);
1218 }
1219 if (error)
1220 trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_);
1221 return error;
1222}