diff options
| author | Brian Foster <bfoster@redhat.com> | 2014-04-24 02:00:53 -0400 |
|---|---|---|
| committer | Dave Chinner <david@fromorbit.com> | 2014-04-24 02:00:53 -0400 |
| commit | 6dd8638e4e8764e0d6557fc62840a815a99c136d (patch) | |
| tree | ec6653b8440c548bf78fe65c9d87ab920049d63a | |
| parent | 0aa0a756ec255cfc8b733fe0d8993c1758b1240c (diff) | |
xfs: use and update the finobt on inode allocation
Replace xfs_dialloc_ag() with an implementation that looks for a
record in the finobt. The finobt only tracks records with at least
one free inode. This eliminates the need for the intra-ag scan in
the original algorithm. Once the inode is allocated, update the
finobt appropriately (possibly removing the record) as well as the
inobt.
Move the original xfs_dialloc_ag() algorithm to
xfs_dialloc_ag_inobt() and fall back as such if finobt support is
not enabled.
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
| -rw-r--r-- | fs/xfs/xfs_ialloc.c | 295 |
1 files changed, 290 insertions, 5 deletions
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 203db98f0bc9..ab3540b675d5 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
| @@ -722,13 +722,10 @@ xfs_ialloc_get_rec( | |||
| 722 | } | 722 | } |
| 723 | 723 | ||
| 724 | /* | 724 | /* |
| 725 | * Allocate an inode. | 725 | * Allocate an inode using the inobt-only algorithm. |
| 726 | * | ||
| 727 | * The caller selected an AG for us, and made sure that free inodes are | ||
| 728 | * available. | ||
| 729 | */ | 726 | */ |
| 730 | STATIC int | 727 | STATIC int |
| 731 | xfs_dialloc_ag( | 728 | xfs_dialloc_ag_inobt( |
| 732 | struct xfs_trans *tp, | 729 | struct xfs_trans *tp, |
| 733 | struct xfs_buf *agbp, | 730 | struct xfs_buf *agbp, |
| 734 | xfs_ino_t parent, | 731 | xfs_ino_t parent, |
| @@ -987,6 +984,294 @@ error0: | |||
| 987 | } | 984 | } |
| 988 | 985 | ||
| 989 | /* | 986 | /* |
| 987 | * Use the free inode btree to allocate an inode based on distance from the | ||
| 988 | * parent. Note that the provided cursor may be deleted and replaced. | ||
| 989 | */ | ||
| 990 | STATIC int | ||
| 991 | xfs_dialloc_ag_finobt_near( | ||
| 992 | xfs_agino_t pagino, | ||
| 993 | struct xfs_btree_cur **ocur, | ||
| 994 | struct xfs_inobt_rec_incore *rec) | ||
| 995 | { | ||
| 996 | struct xfs_btree_cur *lcur = *ocur; /* left search cursor */ | ||
| 997 | struct xfs_btree_cur *rcur; /* right search cursor */ | ||
| 998 | struct xfs_inobt_rec_incore rrec; | ||
| 999 | int error; | ||
| 1000 | int i, j; | ||
| 1001 | |||
| 1002 | error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); | ||
| 1003 | if (error) | ||
| 1004 | return error; | ||
| 1005 | |||
| 1006 | if (i == 1) { | ||
| 1007 | error = xfs_inobt_get_rec(lcur, rec, &i); | ||
| 1008 | if (error) | ||
| 1009 | return error; | ||
| 1010 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
| 1011 | |||
| 1012 | /* | ||
| 1013 | * See if we've landed in the parent inode record. The finobt | ||
| 1014 | * only tracks chunks with at least one free inode, so record | ||
| 1015 | * existence is enough. | ||
| 1016 | */ | ||
| 1017 | if (pagino >= rec->ir_startino && | ||
| 1018 | pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) | ||
| 1019 | return 0; | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | error = xfs_btree_dup_cursor(lcur, &rcur); | ||
| 1023 | if (error) | ||
| 1024 | return error; | ||
| 1025 | |||
| 1026 | error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); | ||
| 1027 | if (error) | ||
| 1028 | goto error_rcur; | ||
| 1029 | if (j == 1) { | ||
| 1030 | error = xfs_inobt_get_rec(rcur, &rrec, &j); | ||
| 1031 | if (error) | ||
| 1032 | goto error_rcur; | ||
| 1033 | XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); | ||
| 1037 | if (i == 1 && j == 1) { | ||
| 1038 | /* | ||
| 1039 | * Both the left and right records are valid. Choose the closer | ||
| 1040 | * inode chunk to the target. | ||
| 1041 | */ | ||
| 1042 | if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) > | ||
| 1043 | (rrec.ir_startino - pagino)) { | ||
| 1044 | *rec = rrec; | ||
| 1045 | xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); | ||
| 1046 | *ocur = rcur; | ||
| 1047 | } else { | ||
| 1048 | xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); | ||
| 1049 | } | ||
| 1050 | } else if (j == 1) { | ||
| 1051 | /* only the right record is valid */ | ||
| 1052 | *rec = rrec; | ||
| 1053 | xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); | ||
| 1054 | *ocur = rcur; | ||
| 1055 | } else if (i == 1) { | ||
| 1056 | /* only the left record is valid */ | ||
| 1057 | xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | return 0; | ||
| 1061 | |||
| 1062 | error_rcur: | ||
| 1063 | xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR); | ||
| 1064 | return error; | ||
| 1065 | } | ||
| 1066 | |||
| 1067 | /* | ||
| 1068 | * Use the free inode btree to find a free inode based on a newino hint. If | ||
| 1069 | * the hint is NULL, find the first free inode in the AG. | ||
| 1070 | */ | ||
| 1071 | STATIC int | ||
| 1072 | xfs_dialloc_ag_finobt_newino( | ||
| 1073 | struct xfs_agi *agi, | ||
| 1074 | struct xfs_btree_cur *cur, | ||
| 1075 | struct xfs_inobt_rec_incore *rec) | ||
| 1076 | { | ||
| 1077 | int error; | ||
| 1078 | int i; | ||
| 1079 | |||
| 1080 | if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { | ||
| 1081 | error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, | ||
| 1082 | &i); | ||
| 1083 | if (error) | ||
| 1084 | return error; | ||
| 1085 | if (i == 1) { | ||
| 1086 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
| 1087 | if (error) | ||
| 1088 | return error; | ||
| 1089 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
| 1090 | |||
| 1091 | return 0; | ||
| 1092 | } | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | /* | ||
| 1096 | * Find the first inode available in the AG. | ||
| 1097 | */ | ||
| 1098 | error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); | ||
| 1099 | if (error) | ||
| 1100 | return error; | ||
| 1101 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
| 1102 | |||
| 1103 | error = xfs_inobt_get_rec(cur, rec, &i); | ||
| 1104 | if (error) | ||
| 1105 | return error; | ||
| 1106 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
| 1107 | |||
| 1108 | return 0; | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | /* | ||
| 1112 | * Update the inobt based on a modification made to the finobt. Also ensure that | ||
| 1113 | * the records from both trees are equivalent post-modification. | ||
| 1114 | */ | ||
| 1115 | STATIC int | ||
| 1116 | xfs_dialloc_ag_update_inobt( | ||
| 1117 | struct xfs_btree_cur *cur, /* inobt cursor */ | ||
| 1118 | struct xfs_inobt_rec_incore *frec, /* finobt record */ | ||
| 1119 | int offset) /* inode offset */ | ||
| 1120 | { | ||
| 1121 | struct xfs_inobt_rec_incore rec; | ||
| 1122 | int error; | ||
| 1123 | int i; | ||
| 1124 | |||
| 1125 | error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); | ||
| 1126 | if (error) | ||
| 1127 | return error; | ||
| 1128 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
| 1129 | |||
| 1130 | error = xfs_inobt_get_rec(cur, &rec, &i); | ||
| 1131 | if (error) | ||
| 1132 | return error; | ||
| 1133 | XFS_WANT_CORRUPTED_RETURN(i == 1); | ||
| 1134 | ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % | ||
| 1135 | XFS_INODES_PER_CHUNK) == 0); | ||
| 1136 | |||
| 1137 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | ||
| 1138 | rec.ir_freecount--; | ||
| 1139 | |||
| 1140 | XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && | ||
| 1141 | (rec.ir_freecount == frec->ir_freecount)); | ||
| 1142 | |||
| 1143 | error = xfs_inobt_update(cur, &rec); | ||
| 1144 | if (error) | ||
| 1145 | return error; | ||
| 1146 | |||
| 1147 | return 0; | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | /* | ||
| 1151 | * Allocate an inode using the free inode btree, if available. Otherwise, fall | ||
| 1152 | * back to the inobt search algorithm. | ||
| 1153 | * | ||
| 1154 | * The caller selected an AG for us, and made sure that free inodes are | ||
| 1155 | * available. | ||
| 1156 | */ | ||
| 1157 | STATIC int | ||
| 1158 | xfs_dialloc_ag( | ||
| 1159 | struct xfs_trans *tp, | ||
| 1160 | struct xfs_buf *agbp, | ||
| 1161 | xfs_ino_t parent, | ||
| 1162 | xfs_ino_t *inop) | ||
| 1163 | { | ||
| 1164 | struct xfs_mount *mp = tp->t_mountp; | ||
| 1165 | struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); | ||
| 1166 | xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); | ||
| 1167 | xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); | ||
| 1168 | xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); | ||
| 1169 | struct xfs_perag *pag; | ||
| 1170 | struct xfs_btree_cur *cur; /* finobt cursor */ | ||
| 1171 | struct xfs_btree_cur *icur; /* inobt cursor */ | ||
| 1172 | struct xfs_inobt_rec_incore rec; | ||
| 1173 | xfs_ino_t ino; | ||
| 1174 | int error; | ||
| 1175 | int offset; | ||
| 1176 | int i; | ||
| 1177 | |||
| 1178 | if (!xfs_sb_version_hasfinobt(&mp->m_sb)) | ||
| 1179 | return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); | ||
| 1180 | |||
| 1181 | pag = xfs_perag_get(mp, agno); | ||
| 1182 | |||
| 1183 | /* | ||
| 1184 | * If pagino is 0 (this is the root inode allocation) use newino. | ||
| 1185 | * This must work because we've just allocated some. | ||
| 1186 | */ | ||
| 1187 | if (!pagino) | ||
| 1188 | pagino = be32_to_cpu(agi->agi_newino); | ||
| 1189 | |||
| 1190 | cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); | ||
| 1191 | |||
| 1192 | error = xfs_check_agi_freecount(cur, agi); | ||
| 1193 | if (error) | ||
| 1194 | goto error_cur; | ||
| 1195 | |||
| 1196 | /* | ||
| 1197 | * The search algorithm depends on whether we're in the same AG as the | ||
| 1198 | * parent. If so, find the closest available inode to the parent. If | ||
| 1199 | * not, consider the agi hint or find the first free inode in the AG. | ||
| 1200 | */ | ||
| 1201 | if (agno == pagno) | ||
| 1202 | error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); | ||
| 1203 | else | ||
| 1204 | error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); | ||
| 1205 | if (error) | ||
| 1206 | goto error_cur; | ||
| 1207 | |||
| 1208 | offset = xfs_lowbit64(rec.ir_free); | ||
| 1209 | ASSERT(offset >= 0); | ||
| 1210 | ASSERT(offset < XFS_INODES_PER_CHUNK); | ||
| 1211 | ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % | ||
| 1212 | XFS_INODES_PER_CHUNK) == 0); | ||
| 1213 | ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); | ||
| 1214 | |||
| 1215 | /* | ||
| 1216 | * Modify or remove the finobt record. | ||
| 1217 | */ | ||
| 1218 | rec.ir_free &= ~XFS_INOBT_MASK(offset); | ||
| 1219 | rec.ir_freecount--; | ||
| 1220 | if (rec.ir_freecount) | ||
| 1221 | error = xfs_inobt_update(cur, &rec); | ||
| 1222 | else | ||
| 1223 | error = xfs_btree_delete(cur, &i); | ||
| 1224 | if (error) | ||
| 1225 | goto error_cur; | ||
| 1226 | |||
| 1227 | /* | ||
| 1228 | * The finobt has now been updated appropriately. We haven't updated the | ||
| 1229 | * agi and superblock yet, so we can create an inobt cursor and validate | ||
| 1230 | * the original freecount. If all is well, make the equivalent update to | ||
| 1231 | * the inobt using the finobt record and offset information. | ||
| 1232 | */ | ||
| 1233 | icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); | ||
| 1234 | |||
| 1235 | error = xfs_check_agi_freecount(icur, agi); | ||
| 1236 | if (error) | ||
| 1237 | goto error_icur; | ||
| 1238 | |||
| 1239 | error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); | ||
| 1240 | if (error) | ||
| 1241 | goto error_icur; | ||
| 1242 | |||
| 1243 | /* | ||
| 1244 | * Both trees have now been updated. We must update the perag and | ||
| 1245 | * superblock before we can check the freecount for each btree. | ||
| 1246 | */ | ||
| 1247 | be32_add_cpu(&agi->agi_freecount, -1); | ||
| 1248 | xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); | ||
| 1249 | pag->pagi_freecount--; | ||
| 1250 | |||
| 1251 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); | ||
| 1252 | |||
| 1253 | error = xfs_check_agi_freecount(icur, agi); | ||
| 1254 | if (error) | ||
| 1255 | goto error_icur; | ||
| 1256 | error = xfs_check_agi_freecount(cur, agi); | ||
| 1257 | if (error) | ||
| 1258 | goto error_icur; | ||
| 1259 | |||
| 1260 | xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); | ||
| 1261 | xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); | ||
| 1262 | xfs_perag_put(pag); | ||
| 1263 | *inop = ino; | ||
| 1264 | return 0; | ||
| 1265 | |||
| 1266 | error_icur: | ||
| 1267 | xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); | ||
| 1268 | error_cur: | ||
| 1269 | xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); | ||
| 1270 | xfs_perag_put(pag); | ||
| 1271 | return error; | ||
| 1272 | } | ||
| 1273 | |||
| 1274 | /* | ||
| 990 | * Allocate an inode on disk. | 1275 | * Allocate an inode on disk. |
| 991 | * | 1276 | * |
| 992 | * Mode is used to tell whether the new inode will need space, and whether it | 1277 | * Mode is used to tell whether the new inode will need space, and whether it |
