diff options
author | Brian Foster <bfoster@redhat.com> | 2014-04-24 02:00:53 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2014-04-24 02:00:53 -0400 |
commit | 6dd8638e4e8764e0d6557fc62840a815a99c136d (patch) | |
tree | ec6653b8440c548bf78fe65c9d87ab920049d63a /fs/xfs/xfs_ialloc.c | |
parent | 0aa0a756ec255cfc8b733fe0d8993c1758b1240c (diff) |
xfs: use and update the finobt on inode allocation
Replace xfs_dialloc_ag() with an implementation that looks for a
record in the finobt. The finobt only tracks records with at least
one free inode. This eliminates the need for the intra-ag scan in
the original algorithm. Once the inode is allocated, update the
finobt appropriately (possibly removing the record) as well as the
inobt.
Move the original xfs_dialloc_ag() algorithm to
xfs_dialloc_ag_inobt() and fall back as such if finobt support is
not enabled.
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/xfs_ialloc.c')
-rw-r--r-- | fs/xfs/xfs_ialloc.c | 295 |
1 files changed, 290 insertions, 5 deletions
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 203db98f0bc9..ab3540b675d5 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -722,13 +722,10 @@ xfs_ialloc_get_rec( | |||
722 | } | 722 | } |
723 | 723 | ||
724 | /* | 724 | /* |
725 | * Allocate an inode. | 725 | * Allocate an inode using the inobt-only algorithm. |
726 | * | ||
727 | * The caller selected an AG for us, and made sure that free inodes are | ||
728 | * available. | ||
729 | */ | 726 | */ |
730 | STATIC int | 727 | STATIC int |
731 | xfs_dialloc_ag( | 728 | xfs_dialloc_ag_inobt( |
732 | struct xfs_trans *tp, | 729 | struct xfs_trans *tp, |
733 | struct xfs_buf *agbp, | 730 | struct xfs_buf *agbp, |
734 | xfs_ino_t parent, | 731 | xfs_ino_t parent, |
@@ -987,6 +984,294 @@ error0: | |||
987 | } | 984 | } |
988 | 985 | ||
989 | /* | 986 | /* |
987 | * Use the free inode btree to allocate an inode based on distance from the | ||
988 | * parent. Note that the provided cursor may be deleted and replaced. | ||
989 | */ | ||
990 | STATIC int | ||
991 | xfs_dialloc_ag_finobt_near( | ||
992 | xfs_agino_t pagino, | ||
993 | struct xfs_btree_cur **ocur, | ||
994 | struct xfs_inobt_rec_incore *rec) | ||
995 | { | ||
996 | struct xfs_btree_cur *lcur = *ocur; /* left search cursor */ | ||
997 | struct xfs_btree_cur *rcur; /* right search cursor */ | ||
998 | struct xfs_inobt_rec_incore rrec; | ||
999 | int error; | ||
1000 | int i, j; | ||
1001 | |||
1002 | error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); | ||
1003 | if (error) | ||
1004 | return error; | ||
1005 | |||
1006 | if (i == 1) { | ||
1007 | error = xfs_inobt_get_rec(lcur, rec, &i); | ||
1008 | if (error) | ||
1009 | return error; | ||
1010 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1011 | |||
1012 | /* | ||
1013 | * See if we've landed in the parent inode record. The finobt | ||
1014 | * only tracks chunks with at least one free inode, so record | ||
1015 | * existence is enough. | ||
1016 | */ | ||
1017 | if (pagino >= rec->ir_startino && | ||
1018 | pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) | ||
1019 | return 0; | ||
1020 | } | ||
1021 | |||
1022 | error = xfs_btree_dup_cursor(lcur, &rcur); | ||
1023 | if (error) | ||
1024 | return error; | ||
1025 | |||
1026 | error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); | ||
1027 | if (error) | ||
1028 | goto error_rcur; | ||
1029 | if (j == 1) { | ||
1030 | error = xfs_inobt_get_rec(rcur, &rrec, &j); | ||
1031 | if (error) | ||
1032 | goto error_rcur; | ||
1033 | XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); | ||
1034 | } | ||
1035 | |||
1036 | XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); | ||
1037 | if (i == 1 && j == 1) { | ||
1038 | /* | ||
1039 | * Both the left and right records are valid. Choose the closer | ||
1040 | * inode chunk to the target. | ||
1041 | */ | ||
1042 | if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) > | ||
1043 | (rrec.ir_startino - pagino)) { | ||
1044 | *rec = rrec; | ||
1045 | xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); | ||
1046 | *ocur = rcur; | ||
1047 | } else { | ||
1048 | xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); | ||
1049 | } | ||
1050 | } else if (j == 1) { | ||
1051 | /* only the right record is valid */ | ||
1052 | *rec = rrec; | ||
1053 | xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); | ||
1054 | *ocur = rcur; | ||
1055 | } else if (i == 1) { | ||
1056 | /* only the left record is valid */ | ||
1057 | xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); | ||
1058 | } | ||
1059 | |||
1060 | return 0; | ||
1061 | |||
1062 | error_rcur: | ||
1063 | xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR); | ||
1064 | return error; | ||
1065 | } | ||
1066 | |||
1067 | /* | ||
1068 | * Use the free inode btree to find a free inode based on a newino hint. If | ||
1069 | * the hint is NULL, find the first free inode in the AG. | ||
1070 | */ | ||
1071 | STATIC int | ||
1072 | xfs_dialloc_ag_finobt_newino( | ||
1073 | struct xfs_agi *agi, | ||
1074 | struct xfs_btree_cur *cur, | ||
1075 | struct xfs_inobt_rec_incore *rec) | ||
1076 | { | ||
1077 | int error; | ||
1078 | int i; | ||
1079 | |||
1080 | if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { | ||
1081 | error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, | ||
1082 | &i); | ||
1083 | if (error) | ||
1084 | return error; | ||
1085 | if (i == 1) { | ||
1086 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
1087 | if (error) | ||
1088 | return error; | ||
1089 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1090 | |||
1091 | return 0; | ||
1092 | } | ||
1093 | } | ||
1094 | |||
1095 | /* | ||
1096 | * Find the first inode available in the AG. | ||
1097 | */ | ||
1098 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | ||
1099 | if (error) | ||
1100 | return error; | ||
1101 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1102 | |||
1103 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
1104 | if (error) | ||
1105 | return error; | ||
1106 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1107 | |||
1108 | return 0; | ||
1109 | } | ||
1110 | |||
1111 | /* | ||
1112 | * Update the inobt based on a modification made to the finobt. Also ensure that | ||
1113 | * the records from both trees are equivalent post-modification. | ||
1114 | */ | ||
1115 | STATIC int | ||
1116 | xfs_dialloc_ag_update_inobt( | ||
1117 | struct xfs_btree_cur *cur, /* inobt cursor */ | ||
1118 | struct xfs_inobt_rec_incore *frec, /* finobt record */ | ||
1119 | int offset) /* inode offset */ | ||
1120 | { | ||
1121 | struct xfs_inobt_rec_incore rec; | ||
1122 | int error; | ||
1123 | int i; | ||
1124 | |||
1125 | error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); | ||
1126 | if (error) | ||
1127 | return error; | ||
1128 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1129 | |||
1130 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
1131 | if (error) | ||
1132 | return error; | ||
1133 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
1134 | ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % | ||
1135 | XFS_INODES_PER_CHUNK) == 0); | ||
1136 | |||
1137 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | ||
1138 | rec.ir_freecount--; | ||
1139 | |||
1140 | XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && | ||
1141 | (rec.ir_freecount == frec->ir_freecount)); | ||
1142 | |||
1143 | error = xfs_inobt_update(cur, &rec); | ||
1144 | if (error) | ||
1145 | return error; | ||
1146 | |||
1147 | return 0; | ||
1148 | } | ||
1149 | |||
1150 | /* | ||
1151 | * Allocate an inode using the free inode btree, if available. Otherwise, fall | ||
1152 | * back to the inobt search algorithm. | ||
1153 | * | ||
1154 | * The caller selected an AG for us, and made sure that free inodes are | ||
1155 | * available. | ||
1156 | */ | ||
1157 | STATIC int | ||
1158 | xfs_dialloc_ag( | ||
1159 | struct xfs_trans *tp, | ||
1160 | struct xfs_buf *agbp, | ||
1161 | xfs_ino_t parent, | ||
1162 | xfs_ino_t *inop) | ||
1163 | { | ||
1164 | struct xfs_mount *mp = tp->t_mountp; | ||
1165 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
1166 | xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); | ||
1167 | xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); | ||
1168 | xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); | ||
1169 | struct xfs_perag *pag; | ||
1170 | struct xfs_btree_cur *cur; /* finobt cursor */ | ||
1171 | struct xfs_btree_cur *icur; /* inobt cursor */ | ||
1172 | struct xfs_inobt_rec_incore rec; | ||
1173 | xfs_ino_t ino; | ||
1174 | int error; | ||
1175 | int offset; | ||
1176 | int i; | ||
1177 | |||
1178 | if (!xfs_sb_version_hasfinobt(&mp->m_sb)) | ||
1179 | return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); | ||
1180 | |||
1181 | pag = xfs_perag_get(mp, agno); | ||
1182 | |||
1183 | /* | ||
1184 | * If pagino is 0 (this is the root inode allocation) use newino. | ||
1185 | * This must work because we've just allocated some. | ||
1186 | */ | ||
1187 | if (!pagino) | ||
1188 | pagino = be32_to_cpu(agi->agi_newino); | ||
1189 | |||
1190 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); | ||
1191 | |||
1192 | error = xfs_check_agi_freecount(cur, agi); | ||
1193 | if (error) | ||
1194 | goto error_cur; | ||
1195 | |||
1196 | /* | ||
1197 | * The search algorithm depends on whether we're in the same AG as the | ||
1198 | * parent. If so, find the closest available inode to the parent. If | ||
1199 | * not, consider the agi hint or find the first free inode in the AG. | ||
1200 | */ | ||
1201 | if (agno == pagno) | ||
1202 | error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); | ||
1203 | else | ||
1204 | error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); | ||
1205 | if (error) | ||
1206 | goto error_cur; | ||
1207 | |||
1208 | offset = xfs_lowbit64(rec.ir_free); | ||
1209 | ASSERT(offset >= 0); | ||
1210 | ASSERT(offset < XFS_INODES_PER_CHUNK); | ||
1211 | ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % | ||
1212 | XFS_INODES_PER_CHUNK) == 0); | ||
1213 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); | ||
1214 | |||
1215 | /* | ||
1216 | * Modify or remove the finobt record. | ||
1217 | */ | ||
1218 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | ||
1219 | rec.ir_freecount--; | ||
1220 | if (rec.ir_freecount) | ||
1221 | error = xfs_inobt_update(cur, &rec); | ||
1222 | else | ||
1223 | error = xfs_btree_delete(cur, &i); | ||
1224 | if (error) | ||
1225 | goto error_cur; | ||
1226 | |||
1227 | /* | ||
1228 | * The finobt has now been updated appropriately. We haven't updated the | ||
1229 | * agi and superblock yet, so we can create an inobt cursor and validate | ||
1230 | * the original freecount. If all is well, make the equivalent update to | ||
1231 | * the inobt using the finobt record and offset information. | ||
1232 | */ | ||
1233 | icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); | ||
1234 | |||
1235 | error = xfs_check_agi_freecount(icur, agi); | ||
1236 | if (error) | ||
1237 | goto error_icur; | ||
1238 | |||
1239 | error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); | ||
1240 | if (error) | ||
1241 | goto error_icur; | ||
1242 | |||
1243 | /* | ||
1244 | * Both trees have now been updated. We must update the perag and | ||
1245 | * superblock before we can check the freecount for each btree. | ||
1246 | */ | ||
1247 | be32_add_cpu(&agi->agi_freecount, -1); | ||
1248 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); | ||
1249 | pag->pagi_freecount--; | ||
1250 | |||
1251 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); | ||
1252 | |||
1253 | error = xfs_check_agi_freecount(icur, agi); | ||
1254 | if (error) | ||
1255 | goto error_icur; | ||
1256 | error = xfs_check_agi_freecount(cur, agi); | ||
1257 | if (error) | ||
1258 | goto error_icur; | ||
1259 | |||
1260 | xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); | ||
1261 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
1262 | xfs_perag_put(pag); | ||
1263 | *inop = ino; | ||
1264 | return 0; | ||
1265 | |||
1266 | error_icur: | ||
1267 | xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); | ||
1268 | error_cur: | ||
1269 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
1270 | xfs_perag_put(pag); | ||
1271 | return error; | ||
1272 | } | ||
1273 | |||
1274 | /* | ||
990 | * Allocate an inode on disk. | 1275 | * Allocate an inode on disk. |
991 | * | 1276 | * |
992 | * Mode is used to tell whether the new inode will need space, and whether it | 1277 | * Mode is used to tell whether the new inode will need space, and whether it |