diff options
author | Andrey Sidorov <qrxd43@motorola.com> | 2013-04-09 12:22:29 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-04-09 12:22:29 -0400 |
commit | eabe0444df90b0cdfa7fdc4f0b4b253f0a498ff7 (patch) | |
tree | cde3d8ee1419d56910ca3fb34745124c880ac2d4 /fs/ext4 | |
parent | 5c1ff33640293499f9b16450029c9fb4dc06543e (diff) |
ext4: speed-up releasing blocks on commit
Improve mb_free_blocks speed by clearing entire range at once instead
of iterating over each bit. Freeing block-by-block also makes buddy
bitmap subtree flip twice making most of the work a no-op. Very few
bits in buddy bitmap require change, e.g. freeing entire group is a 1
bit flip only. As a result, releasing blocks of 60G file now takes
5ms instead of 2.7s. This is especially good for non-preemptive
kernels as there is no rescheduling during release.
Signed-off-by: Andrey Sidorov <qrxd43@motorola.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/mballoc.c | 233 |
1 files changed, 172 insertions, 61 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8c8d05218021..6a87f7217474 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -405,6 +405,12 @@ static inline void mb_clear_bit(int bit, void *addr) | |||
405 | ext4_clear_bit(bit, addr); | 405 | ext4_clear_bit(bit, addr); |
406 | } | 406 | } |
407 | 407 | ||
408 | static inline int mb_test_and_clear_bit(int bit, void *addr) | ||
409 | { | ||
410 | addr = mb_correct_addr_and_bit(&bit, addr); | ||
411 | return ext4_test_and_clear_bit(bit, addr); | ||
412 | } | ||
413 | |||
408 | static inline int mb_find_next_zero_bit(void *addr, int max, int start) | 414 | static inline int mb_find_next_zero_bit(void *addr, int max, int start) |
409 | { | 415 | { |
410 | int fix = 0, ret, tmpmax; | 416 | int fix = 0, ret, tmpmax; |
@@ -764,6 +770,24 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
764 | spin_unlock(&EXT4_SB(sb)->s_bal_lock); | 770 | spin_unlock(&EXT4_SB(sb)->s_bal_lock); |
765 | } | 771 | } |
766 | 772 | ||
773 | static void mb_regenerate_buddy(struct ext4_buddy *e4b) | ||
774 | { | ||
775 | int count; | ||
776 | int order = 1; | ||
777 | void *buddy; | ||
778 | |||
779 | while ((buddy = mb_find_buddy(e4b, order++, &count))) { | ||
780 | ext4_set_bits(buddy, 0, count); | ||
781 | } | ||
782 | e4b->bd_info->bb_fragments = 0; | ||
783 | memset(e4b->bd_info->bb_counters, 0, | ||
784 | sizeof(*e4b->bd_info->bb_counters) * | ||
785 | (e4b->bd_sb->s_blocksize_bits + 2)); | ||
786 | |||
787 | ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, | ||
788 | e4b->bd_bitmap, e4b->bd_group); | ||
789 | } | ||
790 | |||
767 | /* The buddy information is attached the buddy cache inode | 791 | /* The buddy information is attached the buddy cache inode |
768 | * for convenience. The information regarding each group | 792 | * for convenience. The information regarding each group |
769 | * is loaded via ext4_mb_load_buddy. The information involve | 793 | * is loaded via ext4_mb_load_buddy. The information involve |
@@ -1246,6 +1270,33 @@ static void mb_clear_bits(void *bm, int cur, int len) | |||
1246 | } | 1270 | } |
1247 | } | 1271 | } |
1248 | 1272 | ||
1273 | /* clear bits in given range | ||
1274 | * will return first found zero bit if any, -1 otherwise | ||
1275 | */ | ||
1276 | static int mb_test_and_clear_bits(void *bm, int cur, int len) | ||
1277 | { | ||
1278 | __u32 *addr; | ||
1279 | int zero_bit = -1; | ||
1280 | |||
1281 | len = cur + len; | ||
1282 | while (cur < len) { | ||
1283 | if ((cur & 31) == 0 && (len - cur) >= 32) { | ||
1284 | /* fast path: clear whole word at once */ | ||
1285 | addr = bm + (cur >> 3); | ||
1286 | if (*addr != (__u32)(-1) && zero_bit == -1) | ||
1287 | zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0); | ||
1288 | *addr = 0; | ||
1289 | cur += 32; | ||
1290 | continue; | ||
1291 | } | ||
1292 | if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1) | ||
1293 | zero_bit = cur; | ||
1294 | cur++; | ||
1295 | } | ||
1296 | |||
1297 | return zero_bit; | ||
1298 | } | ||
1299 | |||
1249 | void ext4_set_bits(void *bm, int cur, int len) | 1300 | void ext4_set_bits(void *bm, int cur, int len) |
1250 | { | 1301 | { |
1251 | __u32 *addr; | 1302 | __u32 *addr; |
@@ -1264,17 +1315,90 @@ void ext4_set_bits(void *bm, int cur, int len) | |||
1264 | } | 1315 | } |
1265 | } | 1316 | } |
1266 | 1317 | ||
1318 | /* | ||
1319 | * _________________________________________________________________ */ | ||
1320 | |||
1321 | static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side) | ||
1322 | { | ||
1323 | if (mb_test_bit(*bit + side, bitmap)) { | ||
1324 | mb_clear_bit(*bit, bitmap); | ||
1325 | (*bit) -= side; | ||
1326 | return 1; | ||
1327 | } | ||
1328 | else { | ||
1329 | (*bit) += side; | ||
1330 | mb_set_bit(*bit, bitmap); | ||
1331 | return -1; | ||
1332 | } | ||
1333 | } | ||
1334 | |||
1335 | static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last) | ||
1336 | { | ||
1337 | int max; | ||
1338 | int order = 1; | ||
1339 | void *buddy = mb_find_buddy(e4b, order, &max); | ||
1340 | |||
1341 | while (buddy) { | ||
1342 | void *buddy2; | ||
1343 | |||
1344 | /* Bits in range [first; last] are known to be set since | ||
1345 | * corresponding blocks were allocated. Bits in range | ||
1346 | * (first; last) will stay set because they form buddies on | ||
1347 | * upper layer. We just deal with borders if they don't | ||
1348 | * align with upper layer and then go up. | ||
1349 | * Releasing entire group is all about clearing | ||
1350 | * single bit of highest order buddy. | ||
1351 | */ | ||
1352 | |||
1353 | /* Example: | ||
1354 | * --------------------------------- | ||
1355 | * | 1 | 1 | 1 | 1 | | ||
1356 | * --------------------------------- | ||
1357 | * | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | | ||
1358 | * --------------------------------- | ||
1359 | * 0 1 2 3 4 5 6 7 | ||
1360 | * \_____________________/ | ||
1361 | * | ||
1362 | * Neither [1] nor [6] is aligned to above layer. | ||
1363 | * Left neighbour [0] is free, so mark it busy, | ||
1364 | * decrease bb_counters and extend range to | ||
1365 | * [0; 6] | ||
1366 | * Right neighbour [7] is busy. It can't be coaleasced with [6], so | ||
1367 | * mark [6] free, increase bb_counters and shrink range to | ||
1368 | * [0; 5]. | ||
1369 | * Then shift range to [0; 2], go up and do the same. | ||
1370 | */ | ||
1371 | |||
1372 | |||
1373 | if (first & 1) | ||
1374 | e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1); | ||
1375 | if (!(last & 1)) | ||
1376 | e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1); | ||
1377 | if (first > last) | ||
1378 | break; | ||
1379 | order++; | ||
1380 | |||
1381 | if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) { | ||
1382 | mb_clear_bits(buddy, first, last - first + 1); | ||
1383 | e4b->bd_info->bb_counters[order - 1] += last - first + 1; | ||
1384 | break; | ||
1385 | } | ||
1386 | first >>= 1; | ||
1387 | last >>= 1; | ||
1388 | buddy = buddy2; | ||
1389 | } | ||
1390 | } | ||
1391 | |||
1267 | static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | 1392 | static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, |
1268 | int first, int count) | 1393 | int first, int count) |
1269 | { | 1394 | { |
1270 | int block = 0; | 1395 | int left_is_free = 0; |
1271 | int max = 0; | 1396 | int right_is_free = 0; |
1272 | int order; | 1397 | int block; |
1273 | void *buddy; | 1398 | int last = first + count - 1; |
1274 | void *buddy2; | ||
1275 | struct super_block *sb = e4b->bd_sb; | 1399 | struct super_block *sb = e4b->bd_sb; |
1276 | 1400 | ||
1277 | BUG_ON(first + count > (sb->s_blocksize << 3)); | 1401 | BUG_ON(last >= (sb->s_blocksize << 3)); |
1278 | assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); | 1402 | assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); |
1279 | mb_check_buddy(e4b); | 1403 | mb_check_buddy(e4b); |
1280 | mb_free_blocks_double(inode, e4b, first, count); | 1404 | mb_free_blocks_double(inode, e4b, first, count); |
@@ -1283,67 +1407,54 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1283 | if (first < e4b->bd_info->bb_first_free) | 1407 | if (first < e4b->bd_info->bb_first_free) |
1284 | e4b->bd_info->bb_first_free = first; | 1408 | e4b->bd_info->bb_first_free = first; |
1285 | 1409 | ||
1286 | /* let's maintain fragments counter */ | 1410 | /* access memory sequentially: check left neighbour, |
1411 | * clear range and then check right neighbour | ||
1412 | */ | ||
1287 | if (first != 0) | 1413 | if (first != 0) |
1288 | block = !mb_test_bit(first - 1, e4b->bd_bitmap); | 1414 | left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap); |
1289 | if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) | 1415 | block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count); |
1290 | max = !mb_test_bit(first + count, e4b->bd_bitmap); | 1416 | if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0]) |
1291 | if (block && max) | 1417 | right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap); |
1292 | e4b->bd_info->bb_fragments--; | ||
1293 | else if (!block && !max) | ||
1294 | e4b->bd_info->bb_fragments++; | ||
1295 | |||
1296 | /* let's maintain buddy itself */ | ||
1297 | while (count-- > 0) { | ||
1298 | block = first++; | ||
1299 | order = 0; | ||
1300 | |||
1301 | if (!mb_test_bit(block, e4b->bd_bitmap)) { | ||
1302 | ext4_fsblk_t blocknr; | ||
1303 | |||
1304 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | ||
1305 | blocknr += EXT4_C2B(EXT4_SB(sb), block); | ||
1306 | ext4_grp_locked_error(sb, e4b->bd_group, | ||
1307 | inode ? inode->i_ino : 0, | ||
1308 | blocknr, | ||
1309 | "freeing already freed block " | ||
1310 | "(bit %u)", block); | ||
1311 | } | ||
1312 | mb_clear_bit(block, e4b->bd_bitmap); | ||
1313 | e4b->bd_info->bb_counters[order]++; | ||
1314 | |||
1315 | /* start of the buddy */ | ||
1316 | buddy = mb_find_buddy(e4b, order, &max); | ||
1317 | 1418 | ||
1318 | do { | 1419 | if (unlikely(block != -1)) { |
1319 | block &= ~1UL; | 1420 | ext4_fsblk_t blocknr; |
1320 | if (mb_test_bit(block, buddy) || | ||
1321 | mb_test_bit(block + 1, buddy)) | ||
1322 | break; | ||
1323 | 1421 | ||
1324 | /* both the buddies are free, try to coalesce them */ | 1422 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
1325 | buddy2 = mb_find_buddy(e4b, order + 1, &max); | 1423 | blocknr += EXT4_C2B(EXT4_SB(sb), block); |
1424 | ext4_grp_locked_error(sb, e4b->bd_group, | ||
1425 | inode ? inode->i_ino : 0, | ||
1426 | blocknr, | ||
1427 | "freeing already freed block " | ||
1428 | "(bit %u)", block); | ||
1429 | mb_regenerate_buddy(e4b); | ||
1430 | goto done; | ||
1431 | } | ||
1326 | 1432 | ||
1327 | if (!buddy2) | 1433 | /* let's maintain fragments counter */ |
1328 | break; | 1434 | if (left_is_free && right_is_free) |
1435 | e4b->bd_info->bb_fragments--; | ||
1436 | else if (!left_is_free && !right_is_free) | ||
1437 | e4b->bd_info->bb_fragments++; | ||
1329 | 1438 | ||
1330 | if (order > 0) { | 1439 | /* buddy[0] == bd_bitmap is a special case, so handle |
1331 | /* for special purposes, we don't set | 1440 | * it right away and let mb_buddy_mark_free stay free of |
1332 | * free bits in bitmap */ | 1441 | * zero order checks. |
1333 | mb_set_bit(block, buddy); | 1442 | * Check if neighbours are to be coaleasced, |
1334 | mb_set_bit(block + 1, buddy); | 1443 | * adjust bitmap bb_counters and borders appropriately. |
1335 | } | 1444 | */ |
1336 | e4b->bd_info->bb_counters[order]--; | 1445 | if (first & 1) { |
1337 | e4b->bd_info->bb_counters[order]--; | 1446 | first += !left_is_free; |
1447 | e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1; | ||
1448 | } | ||
1449 | if (!(last & 1)) { | ||
1450 | last -= !right_is_free; | ||
1451 | e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1; | ||
1452 | } | ||
1338 | 1453 | ||
1339 | block = block >> 1; | 1454 | if (first <= last) |
1340 | order++; | 1455 | mb_buddy_mark_free(e4b, first >> 1, last >> 1); |
1341 | e4b->bd_info->bb_counters[order]++; | ||
1342 | 1456 | ||
1343 | mb_clear_bit(block, buddy2); | 1457 | done: |
1344 | buddy = buddy2; | ||
1345 | } while (1); | ||
1346 | } | ||
1347 | mb_set_largest_free_order(sb, e4b->bd_info); | 1458 | mb_set_largest_free_order(sb, e4b->bd_info); |
1348 | mb_check_buddy(e4b); | 1459 | mb_check_buddy(e4b); |
1349 | } | 1460 | } |