aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorAndrey Sidorov <qrxd43@motorola.com>2013-04-09 12:22:29 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-04-09 12:22:29 -0400
commiteabe0444df90b0cdfa7fdc4f0b4b253f0a498ff7 (patch)
treecde3d8ee1419d56910ca3fb34745124c880ac2d4 /fs/ext4
parent5c1ff33640293499f9b16450029c9fb4dc06543e (diff)
ext4: speed-up releasing blocks on commit
Improve mb_free_blocks speed by clearing entire range at once instead of iterating over each bit. Freeing block-by-block also makes buddy bitmap subtree flip twice making most of the work a no-op. Very few bits in buddy bitmap require change, e.g. freeing entire group is a 1 bit flip only. As a result, releasing blocks of 60G file now takes 5ms instead of 2.7s. This is especially good for non-preemptive kernels as there is no rescheduling during release. Signed-off-by: Andrey Sidorov <qrxd43@motorola.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/mballoc.c233
1 files changed, 172 insertions, 61 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8c8d05218021..6a87f7217474 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -405,6 +405,12 @@ static inline void mb_clear_bit(int bit, void *addr)
405 ext4_clear_bit(bit, addr); 405 ext4_clear_bit(bit, addr);
406} 406}
407 407
408static inline int mb_test_and_clear_bit(int bit, void *addr)
409{
410 addr = mb_correct_addr_and_bit(&bit, addr);
411 return ext4_test_and_clear_bit(bit, addr);
412}
413
408static inline int mb_find_next_zero_bit(void *addr, int max, int start) 414static inline int mb_find_next_zero_bit(void *addr, int max, int start)
409{ 415{
410 int fix = 0, ret, tmpmax; 416 int fix = 0, ret, tmpmax;
@@ -764,6 +770,24 @@ void ext4_mb_generate_buddy(struct super_block *sb,
764 spin_unlock(&EXT4_SB(sb)->s_bal_lock); 770 spin_unlock(&EXT4_SB(sb)->s_bal_lock);
765} 771}
766 772
773static void mb_regenerate_buddy(struct ext4_buddy *e4b)
774{
775 int count;
776 int order = 1;
777 void *buddy;
778
779 while ((buddy = mb_find_buddy(e4b, order++, &count))) {
780 ext4_set_bits(buddy, 0, count);
781 }
782 e4b->bd_info->bb_fragments = 0;
783 memset(e4b->bd_info->bb_counters, 0,
784 sizeof(*e4b->bd_info->bb_counters) *
785 (e4b->bd_sb->s_blocksize_bits + 2));
786
787 ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
788 e4b->bd_bitmap, e4b->bd_group);
789}
790
767/* The buddy information is attached the buddy cache inode 791/* The buddy information is attached the buddy cache inode
768 * for convenience. The information regarding each group 792 * for convenience. The information regarding each group
769 * is loaded via ext4_mb_load_buddy. The information involve 793 * is loaded via ext4_mb_load_buddy. The information involve
@@ -1246,6 +1270,33 @@ static void mb_clear_bits(void *bm, int cur, int len)
1246 } 1270 }
1247} 1271}
1248 1272
1273/* clear bits in given range
1274 * will return first found zero bit if any, -1 otherwise
1275 */
1276static int mb_test_and_clear_bits(void *bm, int cur, int len)
1277{
1278 __u32 *addr;
1279 int zero_bit = -1;
1280
1281 len = cur + len;
1282 while (cur < len) {
1283 if ((cur & 31) == 0 && (len - cur) >= 32) {
1284 /* fast path: clear whole word at once */
1285 addr = bm + (cur >> 3);
1286 if (*addr != (__u32)(-1) && zero_bit == -1)
1287 zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
1288 *addr = 0;
1289 cur += 32;
1290 continue;
1291 }
1292 if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
1293 zero_bit = cur;
1294 cur++;
1295 }
1296
1297 return zero_bit;
1298}
1299
1249void ext4_set_bits(void *bm, int cur, int len) 1300void ext4_set_bits(void *bm, int cur, int len)
1250{ 1301{
1251 __u32 *addr; 1302 __u32 *addr;
@@ -1264,17 +1315,90 @@ void ext4_set_bits(void *bm, int cur, int len)
1264 } 1315 }
1265} 1316}
1266 1317
1318/*
1319 * _________________________________________________________________ */
1320
1321static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
1322{
1323 if (mb_test_bit(*bit + side, bitmap)) {
1324 mb_clear_bit(*bit, bitmap);
1325 (*bit) -= side;
1326 return 1;
1327 }
1328 else {
1329 (*bit) += side;
1330 mb_set_bit(*bit, bitmap);
1331 return -1;
1332 }
1333}
1334
1335static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
1336{
1337 int max;
1338 int order = 1;
1339 void *buddy = mb_find_buddy(e4b, order, &max);
1340
1341 while (buddy) {
1342 void *buddy2;
1343
1344 /* Bits in range [first; last] are known to be set since
1345 * corresponding blocks were allocated. Bits in range
1346 * (first; last) will stay set because they form buddies on
1347 * upper layer. We just deal with borders if they don't
1348 * align with upper layer and then go up.
1349 * Releasing entire group is all about clearing
1350 * single bit of highest order buddy.
1351 */
1352
1353 /* Example:
1354 * ---------------------------------
1355 * | 1 | 1 | 1 | 1 |
1356 * ---------------------------------
1357 * | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
1358 * ---------------------------------
1359 * 0 1 2 3 4 5 6 7
1360 * \_____________________/
1361 *
1362 * Neither [1] nor [6] is aligned to above layer.
1363 * Left neighbour [0] is free, so mark it busy,
1364 * decrease bb_counters and extend range to
1365 * [0; 6]
1366 * Right neighbour [7] is busy. It can't be coaleasced with [6], so
1367 * mark [6] free, increase bb_counters and shrink range to
1368 * [0; 5].
1369 * Then shift range to [0; 2], go up and do the same.
1370 */
1371
1372
1373 if (first & 1)
1374 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
1375 if (!(last & 1))
1376 e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
1377 if (first > last)
1378 break;
1379 order++;
1380
1381 if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
1382 mb_clear_bits(buddy, first, last - first + 1);
1383 e4b->bd_info->bb_counters[order - 1] += last - first + 1;
1384 break;
1385 }
1386 first >>= 1;
1387 last >>= 1;
1388 buddy = buddy2;
1389 }
1390}
1391
1267static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, 1392static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1268 int first, int count) 1393 int first, int count)
1269{ 1394{
1270 int block = 0; 1395 int left_is_free = 0;
1271 int max = 0; 1396 int right_is_free = 0;
1272 int order; 1397 int block;
1273 void *buddy; 1398 int last = first + count - 1;
1274 void *buddy2;
1275 struct super_block *sb = e4b->bd_sb; 1399 struct super_block *sb = e4b->bd_sb;
1276 1400
1277 BUG_ON(first + count > (sb->s_blocksize << 3)); 1401 BUG_ON(last >= (sb->s_blocksize << 3));
1278 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); 1402 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1279 mb_check_buddy(e4b); 1403 mb_check_buddy(e4b);
1280 mb_free_blocks_double(inode, e4b, first, count); 1404 mb_free_blocks_double(inode, e4b, first, count);
@@ -1283,67 +1407,54 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1283 if (first < e4b->bd_info->bb_first_free) 1407 if (first < e4b->bd_info->bb_first_free)
1284 e4b->bd_info->bb_first_free = first; 1408 e4b->bd_info->bb_first_free = first;
1285 1409
1286 /* let's maintain fragments counter */ 1410 /* access memory sequentially: check left neighbour,
1411 * clear range and then check right neighbour
1412 */
1287 if (first != 0) 1413 if (first != 0)
1288 block = !mb_test_bit(first - 1, e4b->bd_bitmap); 1414 left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
1289 if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) 1415 block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
1290 max = !mb_test_bit(first + count, e4b->bd_bitmap); 1416 if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
1291 if (block && max) 1417 right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
1292 e4b->bd_info->bb_fragments--;
1293 else if (!block && !max)
1294 e4b->bd_info->bb_fragments++;
1295
1296 /* let's maintain buddy itself */
1297 while (count-- > 0) {
1298 block = first++;
1299 order = 0;
1300
1301 if (!mb_test_bit(block, e4b->bd_bitmap)) {
1302 ext4_fsblk_t blocknr;
1303
1304 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1305 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1306 ext4_grp_locked_error(sb, e4b->bd_group,
1307 inode ? inode->i_ino : 0,
1308 blocknr,
1309 "freeing already freed block "
1310 "(bit %u)", block);
1311 }
1312 mb_clear_bit(block, e4b->bd_bitmap);
1313 e4b->bd_info->bb_counters[order]++;
1314
1315 /* start of the buddy */
1316 buddy = mb_find_buddy(e4b, order, &max);
1317 1418
1318 do { 1419 if (unlikely(block != -1)) {
1319 block &= ~1UL; 1420 ext4_fsblk_t blocknr;
1320 if (mb_test_bit(block, buddy) ||
1321 mb_test_bit(block + 1, buddy))
1322 break;
1323 1421
1324 /* both the buddies are free, try to coalesce them */ 1422 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1325 buddy2 = mb_find_buddy(e4b, order + 1, &max); 1423 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1424 ext4_grp_locked_error(sb, e4b->bd_group,
1425 inode ? inode->i_ino : 0,
1426 blocknr,
1427 "freeing already freed block "
1428 "(bit %u)", block);
1429 mb_regenerate_buddy(e4b);
1430 goto done;
1431 }
1326 1432
1327 if (!buddy2) 1433 /* let's maintain fragments counter */
1328 break; 1434 if (left_is_free && right_is_free)
1435 e4b->bd_info->bb_fragments--;
1436 else if (!left_is_free && !right_is_free)
1437 e4b->bd_info->bb_fragments++;
1329 1438
1330 if (order > 0) { 1439 /* buddy[0] == bd_bitmap is a special case, so handle
1331 /* for special purposes, we don't set 1440 * it right away and let mb_buddy_mark_free stay free of
1332 * free bits in bitmap */ 1441 * zero order checks.
1333 mb_set_bit(block, buddy); 1442 * Check if neighbours are to be coaleasced,
1334 mb_set_bit(block + 1, buddy); 1443 * adjust bitmap bb_counters and borders appropriately.
1335 } 1444 */
1336 e4b->bd_info->bb_counters[order]--; 1445 if (first & 1) {
1337 e4b->bd_info->bb_counters[order]--; 1446 first += !left_is_free;
1447 e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
1448 }
1449 if (!(last & 1)) {
1450 last -= !right_is_free;
1451 e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
1452 }
1338 1453
1339 block = block >> 1; 1454 if (first <= last)
1340 order++; 1455 mb_buddy_mark_free(e4b, first >> 1, last >> 1);
1341 e4b->bd_info->bb_counters[order]++;
1342 1456
1343 mb_clear_bit(block, buddy2); 1457done:
1344 buddy = buddy2;
1345 } while (1);
1346 }
1347 mb_set_largest_free_order(sb, e4b->bd_info); 1458 mb_set_largest_free_order(sb, e4b->bd_info);
1348 mb_check_buddy(e4b); 1459 mb_check_buddy(e4b);
1349} 1460}