aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_mount.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_mount.c')
-rw-r--r--fs/xfs/xfs_mount.c918
1 files changed, 108 insertions, 810 deletions
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4fa80e63eea2..2ce7ee3b4ec1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,18 +43,6 @@
43#include "xfs_sysfs.h" 43#include "xfs_sysfs.h"
44 44
45 45
46#ifdef HAVE_PERCPU_SB
47STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
48 int);
49STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
50 int);
51STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
52#else
53
54#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
55#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
56#endif
57
58static DEFINE_MUTEX(xfs_uuid_table_mutex); 46static DEFINE_MUTEX(xfs_uuid_table_mutex);
59static int xfs_uuid_table_size; 47static int xfs_uuid_table_size;
60static uuid_t *xfs_uuid_table; 48static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
347 goto reread; 335 goto reread;
348 } 336 }
349 337
350 /* Initialize per-cpu counters */ 338 xfs_reinit_percpu_counters(mp);
351 xfs_icsb_reinit_counters(mp);
352 339
353 /* no need to be quiet anymore, so reset the buf ops */ 340 /* no need to be quiet anymore, so reset the buf ops */
354 bp->b_ops = &xfs_sb_buf_ops; 341 bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
1087 if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) 1074 if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
1088 return 0; 1075 return 0;
1089 1076
1090 xfs_icsb_sync_counters(mp, 0);
1091
1092 /* 1077 /*
1093 * we don't need to do this if we are updating the superblock 1078 * we don't need to do this if we are updating the superblock
1094 * counters on every modification. 1079 * counters on every modification.
@@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp)
1099 return xfs_sync_sb(mp, true); 1084 return xfs_sync_sb(mp, true);
1100} 1085}
1101 1086
1102/* 1087int
1103 * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply 1088xfs_mod_icount(
1104 * a delta to a specified field in the in-core superblock. Simply 1089 struct xfs_mount *mp,
1105 * switch on the field indicated and apply the delta to that field. 1090 int64_t delta)
1106 * Fields are not allowed to dip below zero, so if the delta would
1107 * do this do not apply it and return EINVAL.
1108 *
1109 * The m_sb_lock must be held when this routine is called.
1110 */
1111STATIC int
1112xfs_mod_incore_sb_unlocked(
1113 xfs_mount_t *mp,
1114 xfs_sb_field_t field,
1115 int64_t delta,
1116 int rsvd)
1117{ 1091{
1118 int scounter; /* short counter for 32 bit fields */ 1092 /* deltas are +/-64, hence the large batch size of 128. */
1119 long long lcounter; /* long counter for 64 bit fields */ 1093 __percpu_counter_add(&mp->m_icount, delta, 128);
1120 long long res_used, rem; 1094 if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
1121
1122 /*
1123 * With the in-core superblock spin lock held, switch
1124 * on the indicated field. Apply the delta to the
1125 * proper field. If the fields value would dip below
1126 * 0, then do not apply the delta and return EINVAL.
1127 */
1128 switch (field) {
1129 case XFS_SBS_ICOUNT:
1130 lcounter = (long long)mp->m_sb.sb_icount;
1131 lcounter += delta;
1132 if (lcounter < 0) {
1133 ASSERT(0);
1134 return -EINVAL;
1135 }
1136 mp->m_sb.sb_icount = lcounter;
1137 return 0;
1138 case XFS_SBS_IFREE:
1139 lcounter = (long long)mp->m_sb.sb_ifree;
1140 lcounter += delta;
1141 if (lcounter < 0) {
1142 ASSERT(0);
1143 return -EINVAL;
1144 }
1145 mp->m_sb.sb_ifree = lcounter;
1146 return 0;
1147 case XFS_SBS_FDBLOCKS:
1148 lcounter = (long long)
1149 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1150 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1151
1152 if (delta > 0) { /* Putting blocks back */
1153 if (res_used > delta) {
1154 mp->m_resblks_avail += delta;
1155 } else {
1156 rem = delta - res_used;
1157 mp->m_resblks_avail = mp->m_resblks;
1158 lcounter += rem;
1159 }
1160 } else { /* Taking blocks away */
1161 lcounter += delta;
1162 if (lcounter >= 0) {
1163 mp->m_sb.sb_fdblocks = lcounter +
1164 XFS_ALLOC_SET_ASIDE(mp);
1165 return 0;
1166 }
1167
1168 /*
1169 * We are out of blocks, use any available reserved
1170 * blocks if were allowed to.
1171 */
1172 if (!rsvd)
1173 return -ENOSPC;
1174
1175 lcounter = (long long)mp->m_resblks_avail + delta;
1176 if (lcounter >= 0) {
1177 mp->m_resblks_avail = lcounter;
1178 return 0;
1179 }
1180 printk_once(KERN_WARNING
1181 "Filesystem \"%s\": reserve blocks depleted! "
1182 "Consider increasing reserve pool size.",
1183 mp->m_fsname);
1184 return -ENOSPC;
1185 }
1186
1187 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
1188 return 0;
1189 case XFS_SBS_FREXTENTS:
1190 lcounter = (long long)mp->m_sb.sb_frextents;
1191 lcounter += delta;
1192 if (lcounter < 0) {
1193 return -ENOSPC;
1194 }
1195 mp->m_sb.sb_frextents = lcounter;
1196 return 0;
1197 case XFS_SBS_DBLOCKS:
1198 lcounter = (long long)mp->m_sb.sb_dblocks;
1199 lcounter += delta;
1200 if (lcounter < 0) {
1201 ASSERT(0);
1202 return -EINVAL;
1203 }
1204 mp->m_sb.sb_dblocks = lcounter;
1205 return 0;
1206 case XFS_SBS_AGCOUNT:
1207 scounter = mp->m_sb.sb_agcount;
1208 scounter += delta;
1209 if (scounter < 0) {
1210 ASSERT(0);
1211 return -EINVAL;
1212 }
1213 mp->m_sb.sb_agcount = scounter;
1214 return 0;
1215 case XFS_SBS_IMAX_PCT:
1216 scounter = mp->m_sb.sb_imax_pct;
1217 scounter += delta;
1218 if (scounter < 0) {
1219 ASSERT(0);
1220 return -EINVAL;
1221 }
1222 mp->m_sb.sb_imax_pct = scounter;
1223 return 0;
1224 case XFS_SBS_REXTSIZE:
1225 scounter = mp->m_sb.sb_rextsize;
1226 scounter += delta;
1227 if (scounter < 0) {
1228 ASSERT(0);
1229 return -EINVAL;
1230 }
1231 mp->m_sb.sb_rextsize = scounter;
1232 return 0;
1233 case XFS_SBS_RBMBLOCKS:
1234 scounter = mp->m_sb.sb_rbmblocks;
1235 scounter += delta;
1236 if (scounter < 0) {
1237 ASSERT(0);
1238 return -EINVAL;
1239 }
1240 mp->m_sb.sb_rbmblocks = scounter;
1241 return 0;
1242 case XFS_SBS_RBLOCKS:
1243 lcounter = (long long)mp->m_sb.sb_rblocks;
1244 lcounter += delta;
1245 if (lcounter < 0) {
1246 ASSERT(0);
1247 return -EINVAL;
1248 }
1249 mp->m_sb.sb_rblocks = lcounter;
1250 return 0;
1251 case XFS_SBS_REXTENTS:
1252 lcounter = (long long)mp->m_sb.sb_rextents;
1253 lcounter += delta;
1254 if (lcounter < 0) {
1255 ASSERT(0);
1256 return -EINVAL;
1257 }
1258 mp->m_sb.sb_rextents = lcounter;
1259 return 0;
1260 case XFS_SBS_REXTSLOG:
1261 scounter = mp->m_sb.sb_rextslog;
1262 scounter += delta;
1263 if (scounter < 0) {
1264 ASSERT(0);
1265 return -EINVAL;
1266 }
1267 mp->m_sb.sb_rextslog = scounter;
1268 return 0;
1269 default:
1270 ASSERT(0); 1095 ASSERT(0);
1096 percpu_counter_add(&mp->m_icount, -delta);
1271 return -EINVAL; 1097 return -EINVAL;
1272 } 1098 }
1099 return 0;
1273} 1100}
1274 1101
1275/*
1276 * xfs_mod_incore_sb() is used to change a field in the in-core
1277 * superblock structure by the specified delta. This modification
1278 * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked()
1279 * routine to do the work.
1280 */
1281int 1102int
1282xfs_mod_incore_sb( 1103xfs_mod_ifree(
1283 struct xfs_mount *mp, 1104 struct xfs_mount *mp,
1284 xfs_sb_field_t field, 1105 int64_t delta)
1285 int64_t delta,
1286 int rsvd)
1287{ 1106{
1288 int status; 1107 percpu_counter_add(&mp->m_ifree, delta);
1289 1108 if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
1290#ifdef HAVE_PERCPU_SB 1109 ASSERT(0);
1291 ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); 1110 percpu_counter_add(&mp->m_ifree, -delta);
1292#endif 1111 return -EINVAL;
1293 spin_lock(&mp->m_sb_lock); 1112 }
1294 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1113 return 0;
1295 spin_unlock(&mp->m_sb_lock);
1296
1297 return status;
1298} 1114}
1299 1115
1300/*
1301 * Change more than one field in the in-core superblock structure at a time.
1302 *
1303 * The fields and changes to those fields are specified in the array of
1304 * xfs_mod_sb structures passed in. Either all of the specified deltas
1305 * will be applied or none of them will. If any modified field dips below 0,
1306 * then all modifications will be backed out and EINVAL will be returned.
1307 *
1308 * Note that this function may not be used for the superblock values that
1309 * are tracked with the in-memory per-cpu counters - a direct call to
1310 * xfs_icsb_modify_counters is required for these.
1311 */
1312int 1116int
1313xfs_mod_incore_sb_batch( 1117xfs_mod_fdblocks(
1314 struct xfs_mount *mp, 1118 struct xfs_mount *mp,
1315 xfs_mod_sb_t *msb, 1119 int64_t delta,
1316 uint nmsb, 1120 bool rsvd)
1317 int rsvd)
1318{ 1121{
1319 xfs_mod_sb_t *msbp; 1122 int64_t lcounter;
1320 int error = 0; 1123 long long res_used;
1124 s32 batch;
1125
1126 if (delta > 0) {
1127 /*
1128 * If the reserve pool is depleted, put blocks back into it
1129 * first. Most of the time the pool is full.
1130 */
1131 if (likely(mp->m_resblks == mp->m_resblks_avail)) {
1132 percpu_counter_add(&mp->m_fdblocks, delta);
1133 return 0;
1134 }
1135
1136 spin_lock(&mp->m_sb_lock);
1137 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1138
1139 if (res_used > delta) {
1140 mp->m_resblks_avail += delta;
1141 } else {
1142 delta -= res_used;
1143 mp->m_resblks_avail = mp->m_resblks;
1144 percpu_counter_add(&mp->m_fdblocks, delta);
1145 }
1146 spin_unlock(&mp->m_sb_lock);
1147 return 0;
1148 }
1321 1149
1322 /* 1150 /*
1323 * Loop through the array of mod structures and apply each individually. 1151 * Taking blocks away, need to be more accurate the closer we
1324 * If any fail, then back out all those which have already been applied. 1152 * are to zero.
1325 * Do all of this within the scope of the m_sb_lock so that all of the 1153 *
1326 * changes will be atomic. 1154 * batch size is set to a maximum of 1024 blocks - if we are
1155 * allocating of freeing extents larger than this then we aren't
1156 * going to be hammering the counter lock so a lock per update
1157 * is not a problem.
1158 *
1159 * If the counter has a value of less than 2 * max batch size,
1160 * then make everything serialise as we are real close to
1161 * ENOSPC.
1162 */
1163#define __BATCH 1024
1164 if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
1165 batch = 1;
1166 else
1167 batch = __BATCH;
1168#undef __BATCH
1169
1170 __percpu_counter_add(&mp->m_fdblocks, delta, batch);
1171 if (percpu_counter_compare(&mp->m_fdblocks,
1172 XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
1173 /* we had space! */
1174 return 0;
1175 }
1176
1177 /*
1178 * lock up the sb for dipping into reserves before releasing the space
1179 * that took us to ENOSPC.
1327 */ 1180 */
1328 spin_lock(&mp->m_sb_lock); 1181 spin_lock(&mp->m_sb_lock);
1329 for (msbp = msb; msbp < (msb + nmsb); msbp++) { 1182 percpu_counter_add(&mp->m_fdblocks, -delta);
1330 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || 1183 if (!rsvd)
1331 msbp->msb_field > XFS_SBS_FDBLOCKS); 1184 goto fdblocks_enospc;
1332 1185
1333 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, 1186 lcounter = (long long)mp->m_resblks_avail + delta;
1334 msbp->msb_delta, rsvd); 1187 if (lcounter >= 0) {
1335 if (error) 1188 mp->m_resblks_avail = lcounter;
1336 goto unwind; 1189 spin_unlock(&mp->m_sb_lock);
1190 return 0;
1337 } 1191 }
1192 printk_once(KERN_WARNING
1193 "Filesystem \"%s\": reserve blocks depleted! "
1194 "Consider increasing reserve pool size.",
1195 mp->m_fsname);
1196fdblocks_enospc:
1338 spin_unlock(&mp->m_sb_lock); 1197 spin_unlock(&mp->m_sb_lock);
1339 return 0; 1198 return -ENOSPC;
1199}
1340 1200
1341unwind: 1201int
1342 while (--msbp >= msb) { 1202xfs_mod_frextents(
1343 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, 1203 struct xfs_mount *mp,
1344 -msbp->msb_delta, rsvd); 1204 int64_t delta)
1345 ASSERT(error == 0); 1205{
1346 } 1206 int64_t lcounter;
1207 int ret = 0;
1208
1209 spin_lock(&mp->m_sb_lock);
1210 lcounter = mp->m_sb.sb_frextents + delta;
1211 if (lcounter < 0)
1212 ret = -ENOSPC;
1213 else
1214 mp->m_sb.sb_frextents = lcounter;
1347 spin_unlock(&mp->m_sb_lock); 1215 spin_unlock(&mp->m_sb_lock);
1348 return error; 1216 return ret;
1349} 1217}
1350 1218
1351/* 1219/*
@@ -1407,573 +1275,3 @@ xfs_dev_is_read_only(
1407 } 1275 }
1408 return 0; 1276 return 0;
1409} 1277}
1410
1411#ifdef HAVE_PERCPU_SB
1412/*
1413 * Per-cpu incore superblock counters
1414 *
1415 * Simple concept, difficult implementation
1416 *
1417 * Basically, replace the incore superblock counters with a distributed per cpu
1418 * counter for contended fields (e.g. free block count).
1419 *
1420 * Difficulties arise in that the incore sb is used for ENOSPC checking, and
1421 * hence needs to be accurately read when we are running low on space. Hence
1422 * there is a method to enable and disable the per-cpu counters based on how
1423 * much "stuff" is available in them.
1424 *
1425 * Basically, a counter is enabled if there is enough free resource to justify
1426 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
1427 * ENOSPC), then we disable the counters to synchronise all callers and
1428 * re-distribute the available resources.
1429 *
1430 * If, once we redistributed the available resources, we still get a failure,
1431 * we disable the per-cpu counter and go through the slow path.
1432 *
1433 * The slow path is the current xfs_mod_incore_sb() function. This means that
1434 * when we disable a per-cpu counter, we need to drain its resources back to
1435 * the global superblock. We do this after disabling the counter to prevent
1436 * more threads from queueing up on the counter.
1437 *
1438 * Essentially, this means that we still need a lock in the fast path to enable
1439 * synchronisation between the global counters and the per-cpu counters. This
1440 * is not a problem because the lock will be local to a CPU almost all the time
1441 * and have little contention except when we get to ENOSPC conditions.
1442 *
1443 * Basically, this lock becomes a barrier that enables us to lock out the fast
1444 * path while we do things like enabling and disabling counters and
1445 * synchronising the counters.
1446 *
1447 * Locking rules:
1448 *
1449 * 1. m_sb_lock before picking up per-cpu locks
1450 * 2. per-cpu locks always picked up via for_each_online_cpu() order
1451 * 3. accurate counter sync requires m_sb_lock + per cpu locks
1452 * 4. modifying per-cpu counters requires holding per-cpu lock
1453 * 5. modifying global counters requires holding m_sb_lock
1454 * 6. enabling or disabling a counter requires holding the m_sb_lock
1455 * and _none_ of the per-cpu locks.
1456 *
1457 * Disabled counters are only ever re-enabled by a balance operation
1458 * that results in more free resources per CPU than a given threshold.
1459 * To ensure counters don't remain disabled, they are rebalanced when
1460 * the global resource goes above a higher threshold (i.e. some hysteresis
1461 * is present to prevent thrashing).
1462 */
1463
1464#ifdef CONFIG_HOTPLUG_CPU
1465/*
1466 * hot-plug CPU notifier support.
1467 *
1468 * We need a notifier per filesystem as we need to be able to identify
1469 * the filesystem to balance the counters out. This is achieved by
1470 * having a notifier block embedded in the xfs_mount_t and doing pointer
1471 * magic to get the mount pointer from the notifier block address.
1472 */
1473STATIC int
1474xfs_icsb_cpu_notify(
1475 struct notifier_block *nfb,
1476 unsigned long action,
1477 void *hcpu)
1478{
1479 xfs_icsb_cnts_t *cntp;
1480 xfs_mount_t *mp;
1481
1482 mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
1483 cntp = (xfs_icsb_cnts_t *)
1484 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
1485 switch (action) {
1486 case CPU_UP_PREPARE:
1487 case CPU_UP_PREPARE_FROZEN:
1488 /* Easy Case - initialize the area and locks, and
1489 * then rebalance when online does everything else for us. */
1490 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1491 break;
1492 case CPU_ONLINE:
1493 case CPU_ONLINE_FROZEN:
1494 xfs_icsb_lock(mp);
1495 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
1496 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
1497 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
1498 xfs_icsb_unlock(mp);
1499 break;
1500 case CPU_DEAD:
1501 case CPU_DEAD_FROZEN:
1502 /* Disable all the counters, then fold the dead cpu's
1503 * count into the total on the global superblock and
1504 * re-enable the counters. */
1505 xfs_icsb_lock(mp);
1506 spin_lock(&mp->m_sb_lock);
1507 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
1508 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
1509 xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
1510
1511 mp->m_sb.sb_icount += cntp->icsb_icount;
1512 mp->m_sb.sb_ifree += cntp->icsb_ifree;
1513 mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
1514
1515 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1516
1517 xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
1518 xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
1519 xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
1520 spin_unlock(&mp->m_sb_lock);
1521 xfs_icsb_unlock(mp);
1522 break;
1523 }
1524
1525 return NOTIFY_OK;
1526}
1527#endif /* CONFIG_HOTPLUG_CPU */
1528
1529int
1530xfs_icsb_init_counters(
1531 xfs_mount_t *mp)
1532{
1533 xfs_icsb_cnts_t *cntp;
1534 int i;
1535
1536 mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
1537 if (mp->m_sb_cnts == NULL)
1538 return -ENOMEM;
1539
1540 for_each_online_cpu(i) {
1541 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1542 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1543 }
1544
1545 mutex_init(&mp->m_icsb_mutex);
1546
1547 /*
1548 * start with all counters disabled so that the
1549 * initial balance kicks us off correctly
1550 */
1551 mp->m_icsb_counters = -1;
1552
1553#ifdef CONFIG_HOTPLUG_CPU
1554 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
1555 mp->m_icsb_notifier.priority = 0;
1556 register_hotcpu_notifier(&mp->m_icsb_notifier);
1557#endif /* CONFIG_HOTPLUG_CPU */
1558
1559 return 0;
1560}
1561
1562void
1563xfs_icsb_reinit_counters(
1564 xfs_mount_t *mp)
1565{
1566 xfs_icsb_lock(mp);
1567 /*
1568 * start with all counters disabled so that the
1569 * initial balance kicks us off correctly
1570 */
1571 mp->m_icsb_counters = -1;
1572 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
1573 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
1574 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
1575 xfs_icsb_unlock(mp);
1576}
1577
1578void
1579xfs_icsb_destroy_counters(
1580 xfs_mount_t *mp)
1581{
1582 if (mp->m_sb_cnts) {
1583 unregister_hotcpu_notifier(&mp->m_icsb_notifier);
1584 free_percpu(mp->m_sb_cnts);
1585 }
1586 mutex_destroy(&mp->m_icsb_mutex);
1587}
1588
1589STATIC void
1590xfs_icsb_lock_cntr(
1591 xfs_icsb_cnts_t *icsbp)
1592{
1593 while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
1594 ndelay(1000);
1595 }
1596}
1597
1598STATIC void
1599xfs_icsb_unlock_cntr(
1600 xfs_icsb_cnts_t *icsbp)
1601{
1602 clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
1603}
1604
1605
1606STATIC void
1607xfs_icsb_lock_all_counters(
1608 xfs_mount_t *mp)
1609{
1610 xfs_icsb_cnts_t *cntp;
1611 int i;
1612
1613 for_each_online_cpu(i) {
1614 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1615 xfs_icsb_lock_cntr(cntp);
1616 }
1617}
1618
1619STATIC void
1620xfs_icsb_unlock_all_counters(
1621 xfs_mount_t *mp)
1622{
1623 xfs_icsb_cnts_t *cntp;
1624 int i;
1625
1626 for_each_online_cpu(i) {
1627 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1628 xfs_icsb_unlock_cntr(cntp);
1629 }
1630}
1631
1632STATIC void
1633xfs_icsb_count(
1634 xfs_mount_t *mp,
1635 xfs_icsb_cnts_t *cnt,
1636 int flags)
1637{
1638 xfs_icsb_cnts_t *cntp;
1639 int i;
1640
1641 memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
1642
1643 if (!(flags & XFS_ICSB_LAZY_COUNT))
1644 xfs_icsb_lock_all_counters(mp);
1645
1646 for_each_online_cpu(i) {
1647 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1648 cnt->icsb_icount += cntp->icsb_icount;
1649 cnt->icsb_ifree += cntp->icsb_ifree;
1650 cnt->icsb_fdblocks += cntp->icsb_fdblocks;
1651 }
1652
1653 if (!(flags & XFS_ICSB_LAZY_COUNT))
1654 xfs_icsb_unlock_all_counters(mp);
1655}
1656
1657STATIC int
1658xfs_icsb_counter_disabled(
1659 xfs_mount_t *mp,
1660 xfs_sb_field_t field)
1661{
1662 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1663 return test_bit(field, &mp->m_icsb_counters);
1664}
1665
1666STATIC void
1667xfs_icsb_disable_counter(
1668 xfs_mount_t *mp,
1669 xfs_sb_field_t field)
1670{
1671 xfs_icsb_cnts_t cnt;
1672
1673 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1674
1675 /*
1676 * If we are already disabled, then there is nothing to do
1677 * here. We check before locking all the counters to avoid
1678 * the expensive lock operation when being called in the
1679 * slow path and the counter is already disabled. This is
1680 * safe because the only time we set or clear this state is under
1681 * the m_icsb_mutex.
1682 */
1683 if (xfs_icsb_counter_disabled(mp, field))
1684 return;
1685
1686 xfs_icsb_lock_all_counters(mp);
1687 if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
1688 /* drain back to superblock */
1689
1690 xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
1691 switch(field) {
1692 case XFS_SBS_ICOUNT:
1693 mp->m_sb.sb_icount = cnt.icsb_icount;
1694 break;
1695 case XFS_SBS_IFREE:
1696 mp->m_sb.sb_ifree = cnt.icsb_ifree;
1697 break;
1698 case XFS_SBS_FDBLOCKS:
1699 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
1700 break;
1701 default:
1702 BUG();
1703 }
1704 }
1705
1706 xfs_icsb_unlock_all_counters(mp);
1707}
1708
1709STATIC void
1710xfs_icsb_enable_counter(
1711 xfs_mount_t *mp,
1712 xfs_sb_field_t field,
1713 uint64_t count,
1714 uint64_t resid)
1715{
1716 xfs_icsb_cnts_t *cntp;
1717 int i;
1718
1719 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1720
1721 xfs_icsb_lock_all_counters(mp);
1722 for_each_online_cpu(i) {
1723 cntp = per_cpu_ptr(mp->m_sb_cnts, i);
1724 switch (field) {
1725 case XFS_SBS_ICOUNT:
1726 cntp->icsb_icount = count + resid;
1727 break;
1728 case XFS_SBS_IFREE:
1729 cntp->icsb_ifree = count + resid;
1730 break;
1731 case XFS_SBS_FDBLOCKS:
1732 cntp->icsb_fdblocks = count + resid;
1733 break;
1734 default:
1735 BUG();
1736 break;
1737 }
1738 resid = 0;
1739 }
1740 clear_bit(field, &mp->m_icsb_counters);
1741 xfs_icsb_unlock_all_counters(mp);
1742}
1743
1744void
1745xfs_icsb_sync_counters_locked(
1746 xfs_mount_t *mp,
1747 int flags)
1748{
1749 xfs_icsb_cnts_t cnt;
1750
1751 xfs_icsb_count(mp, &cnt, flags);
1752
1753 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
1754 mp->m_sb.sb_icount = cnt.icsb_icount;
1755 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
1756 mp->m_sb.sb_ifree = cnt.icsb_ifree;
1757 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
1758 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
1759}
1760
1761/*
1762 * Accurate update of per-cpu counters to incore superblock
1763 */
1764void
1765xfs_icsb_sync_counters(
1766 xfs_mount_t *mp,
1767 int flags)
1768{
1769 spin_lock(&mp->m_sb_lock);
1770 xfs_icsb_sync_counters_locked(mp, flags);
1771 spin_unlock(&mp->m_sb_lock);
1772}
1773
1774/*
1775 * Balance and enable/disable counters as necessary.
1776 *
1777 * Thresholds for re-enabling counters are somewhat magic. inode counts are
1778 * chosen to be the same number as single on disk allocation chunk per CPU, and
1779 * free blocks is something far enough zero that we aren't going thrash when we
1780 * get near ENOSPC. We also need to supply a minimum we require per cpu to
1781 * prevent looping endlessly when xfs_alloc_space asks for more than will
1782 * be distributed to a single CPU but each CPU has enough blocks to be
1783 * reenabled.
1784 *
1785 * Note that we can be called when counters are already disabled.
1786 * xfs_icsb_disable_counter() optimises the counter locking in this case to
1787 * prevent locking every per-cpu counter needlessly.
1788 */
1789
1790#define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64
1791#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
1792 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
1793STATIC void
1794xfs_icsb_balance_counter_locked(
1795 xfs_mount_t *mp,
1796 xfs_sb_field_t field,
1797 int min_per_cpu)
1798{
1799 uint64_t count, resid;
1800 int weight = num_online_cpus();
1801 uint64_t min = (uint64_t)min_per_cpu;
1802
1803 /* disable counter and sync counter */
1804 xfs_icsb_disable_counter(mp, field);
1805
1806 /* update counters - first CPU gets residual*/
1807 switch (field) {
1808 case XFS_SBS_ICOUNT:
1809 count = mp->m_sb.sb_icount;
1810 resid = do_div(count, weight);
1811 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
1812 return;
1813 break;
1814 case XFS_SBS_IFREE:
1815 count = mp->m_sb.sb_ifree;
1816 resid = do_div(count, weight);
1817 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
1818 return;
1819 break;
1820 case XFS_SBS_FDBLOCKS:
1821 count = mp->m_sb.sb_fdblocks;
1822 resid = do_div(count, weight);
1823 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
1824 return;
1825 break;
1826 default:
1827 BUG();
1828 count = resid = 0; /* quiet, gcc */
1829 break;
1830 }
1831
1832 xfs_icsb_enable_counter(mp, field, count, resid);
1833}
1834
1835STATIC void
1836xfs_icsb_balance_counter(
1837 xfs_mount_t *mp,
1838 xfs_sb_field_t fields,
1839 int min_per_cpu)
1840{
1841 spin_lock(&mp->m_sb_lock);
1842 xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
1843 spin_unlock(&mp->m_sb_lock);
1844}
1845
1846int
1847xfs_icsb_modify_counters(
1848 xfs_mount_t *mp,
1849 xfs_sb_field_t field,
1850 int64_t delta,
1851 int rsvd)
1852{
1853 xfs_icsb_cnts_t *icsbp;
1854 long long lcounter; /* long counter for 64 bit fields */
1855 int ret = 0;
1856
1857 might_sleep();
1858again:
1859 preempt_disable();
1860 icsbp = this_cpu_ptr(mp->m_sb_cnts);
1861
1862 /*
1863 * if the counter is disabled, go to slow path
1864 */
1865 if (unlikely(xfs_icsb_counter_disabled(mp, field)))
1866 goto slow_path;
1867 xfs_icsb_lock_cntr(icsbp);
1868 if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
1869 xfs_icsb_unlock_cntr(icsbp);
1870 goto slow_path;
1871 }
1872
1873 switch (field) {
1874 case XFS_SBS_ICOUNT:
1875 lcounter = icsbp->icsb_icount;
1876 lcounter += delta;
1877 if (unlikely(lcounter < 0))
1878 goto balance_counter;
1879 icsbp->icsb_icount = lcounter;
1880 break;
1881
1882 case XFS_SBS_IFREE:
1883 lcounter = icsbp->icsb_ifree;
1884 lcounter += delta;
1885 if (unlikely(lcounter < 0))
1886 goto balance_counter;
1887 icsbp->icsb_ifree = lcounter;
1888 break;
1889
1890 case XFS_SBS_FDBLOCKS:
1891 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
1892
1893 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1894 lcounter += delta;
1895 if (unlikely(lcounter < 0))
1896 goto balance_counter;
1897 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
1898 break;
1899 default:
1900 BUG();
1901 break;
1902 }
1903 xfs_icsb_unlock_cntr(icsbp);
1904 preempt_enable();
1905 return 0;
1906
1907slow_path:
1908 preempt_enable();
1909
1910 /*
1911 * serialise with a mutex so we don't burn lots of cpu on
1912 * the superblock lock. We still need to hold the superblock
1913 * lock, however, when we modify the global structures.
1914 */
1915 xfs_icsb_lock(mp);
1916
1917 /*
1918 * Now running atomically.
1919 *
1920 * If the counter is enabled, someone has beaten us to rebalancing.
1921 * Drop the lock and try again in the fast path....
1922 */
1923 if (!(xfs_icsb_counter_disabled(mp, field))) {
1924 xfs_icsb_unlock(mp);
1925 goto again;
1926 }
1927
1928 /*
1929 * The counter is currently disabled. Because we are
1930 * running atomically here, we know a rebalance cannot
1931 * be in progress. Hence we can go straight to operating
1932 * on the global superblock. We do not call xfs_mod_incore_sb()
1933 * here even though we need to get the m_sb_lock. Doing so
1934 * will cause us to re-enter this function and deadlock.
1935 * Hence we get the m_sb_lock ourselves and then call
1936 * xfs_mod_incore_sb_unlocked() as the unlocked path operates
1937 * directly on the global counters.
1938 */
1939 spin_lock(&mp->m_sb_lock);
1940 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
1941 spin_unlock(&mp->m_sb_lock);
1942
1943 /*
1944 * Now that we've modified the global superblock, we
1945 * may be able to re-enable the distributed counters
1946 * (e.g. lots of space just got freed). After that
1947 * we are done.
1948 */
1949 if (ret != -ENOSPC)
1950 xfs_icsb_balance_counter(mp, field, 0);
1951 xfs_icsb_unlock(mp);
1952 return ret;
1953
1954balance_counter:
1955 xfs_icsb_unlock_cntr(icsbp);
1956 preempt_enable();
1957
1958 /*
1959 * We may have multiple threads here if multiple per-cpu
1960 * counters run dry at the same time. This will mean we can
1961 * do more balances than strictly necessary but it is not
1962 * the common slowpath case.
1963 */
1964 xfs_icsb_lock(mp);
1965
1966 /*
1967 * running atomically.
1968 *
1969 * This will leave the counter in the correct state for future
1970 * accesses. After the rebalance, we simply try again and our retry
1971 * will either succeed through the fast path or slow path without
1972 * another balance operation being required.
1973 */
1974 xfs_icsb_balance_counter(mp, field, delta);
1975 xfs_icsb_unlock(mp);
1976 goto again;
1977}
1978
1979#endif