aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2/rgrp.c
diff options
context:
space:
mode:
authorBob Peterson <rpeterso@redhat.com>2012-07-19 08:12:40 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2012-07-19 09:51:08 -0400
commit8e2e00473598dd5379d8408cb974dade000acafc (patch)
tree1f7bfdf0d07b6c0315bbd11ffee174742d66a459 /fs/gfs2/rgrp.c
parent294f2ad5a545eb71d397623743ddd8201131bdad (diff)
GFS2: Reduce file fragmentation
This patch reduces GFS2 file fragmentation by pre-reserving blocks. The resulting improved on disk layout greatly speeds up operations in cases which would have resulted in interlaced allocation of blocks previously. A typical example of this is 10 parallel dd processes, each writing to a file in a common dirctory. The implementation uses an rbtree of reservations attached to each resource group (and each inode). Signed-off-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2/rgrp.c')
-rw-r--r--fs/gfs2/rgrp.c578
1 files changed, 523 insertions, 55 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index fb7079263ea7..4d34887a601d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -35,6 +35,9 @@
35#define BFITNOENT ((u32)~0) 35#define BFITNOENT ((u32)~0)
36#define NO_BLOCK ((u64)~0) 36#define NO_BLOCK ((u64)~0)
37 37
38#define RSRV_CONTENTION_FACTOR 4
39#define RGRP_RSRV_MAX_CONTENDERS 2
40
38#if BITS_PER_LONG == 32 41#if BITS_PER_LONG == 32
39#define LBITMASK (0x55555555UL) 42#define LBITMASK (0x55555555UL)
40#define LBITSKIP55 (0x55555555UL) 43#define LBITSKIP55 (0x55555555UL)
@@ -178,6 +181,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
178} 181}
179 182
180/** 183/**
184 * rs_cmp - multi-block reservation range compare
185 * @blk: absolute file system block number of the new reservation
186 * @len: number of blocks in the new reservation
187 * @rs: existing reservation to compare against
188 *
189 * returns: 1 if the block range is beyond the reach of the reservation
190 * -1 if the block range is before the start of the reservation
191 * 0 if the block range overlaps with the reservation
192 */
193static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
194{
195 u64 startblk = gfs2_rs_startblk(rs);
196
197 if (blk >= startblk + rs->rs_free)
198 return 1;
199 if (blk + len - 1 < startblk)
200 return -1;
201 return 0;
202}
203
204/**
205 * rs_find - Find a rgrp multi-block reservation that contains a given block
206 * @rgd: The rgrp
207 * @rgblk: The block we're looking for, relative to the rgrp
208 */
209static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
210{
211 struct rb_node **newn;
212 int rc;
213 u64 fsblk = rgblk + rgd->rd_data0;
214
215 spin_lock(&rgd->rd_rsspin);
216 newn = &rgd->rd_rstree.rb_node;
217 while (*newn) {
218 struct gfs2_blkreserv *cur =
219 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
220 rc = rs_cmp(fsblk, 1, cur);
221 if (rc < 0)
222 newn = &((*newn)->rb_left);
223 else if (rc > 0)
224 newn = &((*newn)->rb_right);
225 else {
226 spin_unlock(&rgd->rd_rsspin);
227 return cur;
228 }
229 }
230 spin_unlock(&rgd->rd_rsspin);
231 return NULL;
232}
233
234/**
181 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 235 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
182 * a block in a given allocation state. 236 * a block in a given allocation state.
183 * @buf: the buffer that holds the bitmaps 237 * @buf: the buffer that holds the bitmaps
@@ -424,19 +478,93 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
424int gfs2_rs_alloc(struct gfs2_inode *ip) 478int gfs2_rs_alloc(struct gfs2_inode *ip)
425{ 479{
426 int error = 0; 480 int error = 0;
481 struct gfs2_blkreserv *res;
482
483 if (ip->i_res)
484 return 0;
485
486 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
487 if (!res)
488 error = -ENOMEM;
427 489
428 down_write(&ip->i_rw_mutex); 490 down_write(&ip->i_rw_mutex);
429 if (!ip->i_res) { 491 if (ip->i_res)
430 ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); 492 kmem_cache_free(gfs2_rsrv_cachep, res);
431 if (!ip->i_res) 493 else
432 error = -ENOMEM; 494 ip->i_res = res;
433 }
434 up_write(&ip->i_rw_mutex); 495 up_write(&ip->i_rw_mutex);
435 return error; 496 return error;
436} 497}
437 498
499static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
500{
501 gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n",
502 rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk,
503 rs->rs_free);
504}
505
438/** 506/**
439 * gfs2_rs_delete - delete a reservation 507 * __rs_deltree - remove a multi-block reservation from the rgd tree
508 * @rs: The reservation to remove
509 *
510 */
511static void __rs_deltree(struct gfs2_blkreserv *rs)
512{
513 struct gfs2_rgrpd *rgd;
514
515 if (!gfs2_rs_active(rs))
516 return;
517
518 rgd = rs->rs_rgd;
519 /* We can't do this: The reason is that when the rgrp is invalidated,
520 it's in the "middle" of acquiring the glock, but the HOLDER bit
521 isn't set yet:
522 BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
523 trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
524
525 if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
526 rb_erase(&rs->rs_node, &rgd->rd_rstree);
527 BUG_ON(!rgd->rd_rs_cnt);
528 rgd->rd_rs_cnt--;
529
530 if (rs->rs_free) {
531 /* return reserved blocks to the rgrp and the ip */
532 BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
533 rs->rs_rgd->rd_reserved -= rs->rs_free;
534 rs->rs_free = 0;
535 clear_bit(GBF_FULL, &rs->rs_bi->bi_flags);
536 smp_mb__after_clear_bit();
537 }
538 /* We can't change any of the step 1 or step 2 components of the rs.
539 E.g. We can't set rs_rgd to NULL because the rgd glock is held and
540 dequeued through this pointer.
541 Can't: atomic_set(&rs->rs_sizehint, 0);
542 Can't: rs->rs_requested = 0;
543 Can't: rs->rs_rgd = NULL;*/
544 rs->rs_bi = NULL;
545 rs->rs_biblk = 0;
546}
547
548/**
549 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
550 * @rs: The reservation to remove
551 *
552 */
553void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
554{
555 struct gfs2_rgrpd *rgd;
556
557 if (!gfs2_rs_active(rs))
558 return;
559
560 rgd = rs->rs_rgd;
561 spin_lock(&rgd->rd_rsspin);
562 __rs_deltree(rs);
563 spin_unlock(&rgd->rd_rsspin);
564}
565
566/**
567 * gfs2_rs_delete - delete a multi-block reservation
440 * @ip: The inode for this reservation 568 * @ip: The inode for this reservation
441 * 569 *
442 */ 570 */
@@ -444,12 +572,36 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
444{ 572{
445 down_write(&ip->i_rw_mutex); 573 down_write(&ip->i_rw_mutex);
446 if (ip->i_res) { 574 if (ip->i_res) {
575 gfs2_rs_deltree(ip->i_res);
576 trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
577 BUG_ON(ip->i_res->rs_free);
447 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); 578 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
448 ip->i_res = NULL; 579 ip->i_res = NULL;
449 } 580 }
450 up_write(&ip->i_rw_mutex); 581 up_write(&ip->i_rw_mutex);
451} 582}
452 583
584/**
585 * return_all_reservations - return all reserved blocks back to the rgrp.
586 * @rgd: the rgrp that needs its space back
587 *
588 * We previously reserved a bunch of blocks for allocation. Now we need to
589 * give them back. This leave the reservation structures in tact, but removes
590 * all of their corresponding "no-fly zones".
591 */
592static void return_all_reservations(struct gfs2_rgrpd *rgd)
593{
594 struct rb_node *n;
595 struct gfs2_blkreserv *rs;
596
597 spin_lock(&rgd->rd_rsspin);
598 while ((n = rb_first(&rgd->rd_rstree))) {
599 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
600 __rs_deltree(rs);
601 }
602 spin_unlock(&rgd->rd_rsspin);
603}
604
453void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) 605void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
454{ 606{
455 struct rb_node *n; 607 struct rb_node *n;
@@ -472,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
472 624
473 gfs2_free_clones(rgd); 625 gfs2_free_clones(rgd);
474 kfree(rgd->rd_bits); 626 kfree(rgd->rd_bits);
627 return_all_reservations(rgd);
475 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 628 kmem_cache_free(gfs2_rgrpd_cachep, rgd);
476 } 629 }
477} 630}
@@ -649,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
649 rgd->rd_data0 = be64_to_cpu(buf.ri_data0); 802 rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
650 rgd->rd_data = be32_to_cpu(buf.ri_data); 803 rgd->rd_data = be32_to_cpu(buf.ri_data);
651 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); 804 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
805 spin_lock_init(&rgd->rd_rsspin);
652 806
653 error = compute_bitstructs(rgd); 807 error = compute_bitstructs(rgd);
654 if (error) 808 if (error)
@@ -1115,29 +1269,212 @@ out:
1115} 1269}
1116 1270
1117/** 1271/**
1272 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1273 * @bi: the bitmap with the blocks
1274 * @ip: the inode structure
1275 * @biblk: the 32-bit block number relative to the start of the bitmap
1276 * @amount: the number of blocks to reserve
1277 *
1278 * Returns: NULL - reservation was already taken, so not inserted
1279 * pointer to the inserted reservation
1280 */
1281static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
1282 struct gfs2_inode *ip, u32 biblk,
1283 int amount)
1284{
1285 struct rb_node **newn, *parent = NULL;
1286 int rc;
1287 struct gfs2_blkreserv *rs = ip->i_res;
1288 struct gfs2_rgrpd *rgd = rs->rs_rgd;
1289 u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
1290
1291 spin_lock(&rgd->rd_rsspin);
1292 newn = &rgd->rd_rstree.rb_node;
1293 BUG_ON(!ip->i_res);
1294 BUG_ON(gfs2_rs_active(rs));
1295 /* Figure out where to put new node */
1296 /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
1297 while (*newn) {
1298 struct gfs2_blkreserv *cur =
1299 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1300
1301 parent = *newn;
1302 rc = rs_cmp(fsblock, amount, cur);
1303 if (rc > 0)
1304 newn = &((*newn)->rb_right);
1305 else if (rc < 0)
1306 newn = &((*newn)->rb_left);
1307 else {
1308 spin_unlock(&rgd->rd_rsspin);
1309 return NULL; /* reservation already in use */
1310 }
1311 }
1312
1313 /* Do our reservation work */
1314 rs = ip->i_res;
1315 rs->rs_free = amount;
1316 rs->rs_biblk = biblk;
1317 rs->rs_bi = bi;
1318 rb_link_node(&rs->rs_node, parent, newn);
1319 rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1320
1321 /* Do our inode accounting for the reservation */
1322 /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
1323
1324 /* Do our rgrp accounting for the reservation */
1325 rgd->rd_reserved += amount; /* blocks reserved */
1326 rgd->rd_rs_cnt++; /* number of in-tree reservations */
1327 spin_unlock(&rgd->rd_rsspin);
1328 trace_gfs2_rs(ip, rs, TRACE_RS_INSERT);
1329 return rs;
1330}
1331
1332/**
1333 * unclaimed_blocks - return number of blocks that aren't spoken for
1334 */
1335static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
1336{
1337 return rgd->rd_free_clone - rgd->rd_reserved;
1338}
1339
1340/**
1341 * rg_mblk_search - find a group of multiple free blocks
1342 * @rgd: the resource group descriptor
1343 * @rs: the block reservation
1344 * @ip: pointer to the inode for which we're reserving blocks
1345 *
1346 * This is very similar to rgblk_search, except we're looking for whole
1347 * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
1348 * on aligned dwords for speed's sake.
1349 *
1350 * Returns: 0 if successful or BFITNOENT if there isn't enough free space
1351 */
1352
1353static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1354{
1355 struct gfs2_bitmap *bi = rgd->rd_bits;
1356 const u32 length = rgd->rd_length;
1357 u32 blk;
1358 unsigned int buf, x, search_bytes;
1359 u8 *buffer = NULL;
1360 u8 *ptr, *end, *nonzero;
1361 u32 goal, rsv_bytes;
1362 struct gfs2_blkreserv *rs;
1363 u32 best_rs_bytes, unclaimed;
1364 int best_rs_blocks;
1365
1366 /* Find bitmap block that contains bits for goal block */
1367 if (rgrp_contains_block(rgd, ip->i_goal))
1368 goal = ip->i_goal - rgd->rd_data0;
1369 else
1370 goal = rgd->rd_last_alloc;
1371 for (buf = 0; buf < length; buf++) {
1372 bi = rgd->rd_bits + buf;
1373 /* Convert scope of "goal" from rgrp-wide to within
1374 found bit block */
1375 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
1376 goal -= bi->bi_start * GFS2_NBBY;
1377 goto do_search;
1378 }
1379 }
1380 buf = 0;
1381 goal = 0;
1382
1383do_search:
1384 best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint),
1385 (RGRP_RSRV_MINBLKS * rgd->rd_length));
1386 best_rs_bytes = (best_rs_blocks *
1387 (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
1388 GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
1389 best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
1390 unclaimed = unclaimed_blocks(rgd);
1391 if (best_rs_bytes * GFS2_NBBY > unclaimed)
1392 best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
1393
1394 for (x = 0; x <= length; x++) {
1395 bi = rgd->rd_bits + buf;
1396
1397 if (test_bit(GBF_FULL, &bi->bi_flags))
1398 goto skip;
1399
1400 WARN_ON(!buffer_uptodate(bi->bi_bh));
1401 if (bi->bi_clone)
1402 buffer = bi->bi_clone + bi->bi_offset;
1403 else
1404 buffer = bi->bi_bh->b_data + bi->bi_offset;
1405
1406 /* We have to keep the reservations aligned on u64 boundaries
1407 otherwise we could get situations where a byte can't be
1408 used because it's after a reservation, but a free bit still
1409 is within the reservation's area. */
1410 ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64));
1411 end = (buffer + bi->bi_len);
1412 while (ptr < end) {
1413 rsv_bytes = 0;
1414 if ((ptr + best_rs_bytes) <= end)
1415 search_bytes = best_rs_bytes;
1416 else
1417 search_bytes = end - ptr;
1418 BUG_ON(!search_bytes);
1419 nonzero = memchr_inv(ptr, 0, search_bytes);
1420 /* If the lot is all zeroes, reserve the whole size. If
1421 there's enough zeroes to satisfy the request, use
1422 what we can. If there's not enough, keep looking. */
1423 if (nonzero == NULL)
1424 rsv_bytes = search_bytes;
1425 else if ((nonzero - ptr) * GFS2_NBBY >=
1426 ip->i_res->rs_requested)
1427 rsv_bytes = (nonzero - ptr);
1428
1429 if (rsv_bytes) {
1430 blk = ((ptr - buffer) * GFS2_NBBY);
1431 BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
1432 rs = rs_insert(bi, ip, blk,
1433 rsv_bytes * GFS2_NBBY);
1434 if (IS_ERR(rs))
1435 return PTR_ERR(rs);
1436 if (rs)
1437 return 0;
1438 }
1439 ptr += ALIGN(search_bytes, sizeof(u64));
1440 }
1441skip:
1442 /* Try next bitmap block (wrap back to rgrp header
1443 if at end) */
1444 buf++;
1445 buf %= length;
1446 goal = 0;
1447 }
1448
1449 return BFITNOENT;
1450}
1451
1452/**
1118 * try_rgrp_fit - See if a given reservation will fit in a given RG 1453 * try_rgrp_fit - See if a given reservation will fit in a given RG
1119 * @rgd: the RG data 1454 * @rgd: the RG data
1120 * @ip: the inode 1455 * @ip: the inode
1121 * 1456 *
1122 * If there's room for the requested blocks to be allocated from the RG: 1457 * If there's room for the requested blocks to be allocated from the RG:
1458 * This will try to get a multi-block reservation first, and if that doesn't
1459 * fit, it will take what it can.
1123 * 1460 *
1124 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) 1461 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
1125 */ 1462 */
1126 1463
1127static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) 1464static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1128{ 1465{
1129 const struct gfs2_blkreserv *rs = ip->i_res; 1466 struct gfs2_blkreserv *rs = ip->i_res;
1130 1467
1131 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 1468 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
1132 return 0; 1469 return 0;
1133 if (rgd->rd_free_clone >= rs->rs_requested) 1470 /* Look for a multi-block reservation. */
1471 if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS &&
1472 rg_mblk_search(rgd, ip) != BFITNOENT)
1473 return 1;
1474 if (unclaimed_blocks(rgd) >= rs->rs_requested)
1134 return 1; 1475 return 1;
1135 return 0;
1136}
1137 1476
1138static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) 1477 return 0;
1139{
1140 return (bi->bi_start * GFS2_NBBY) + blk;
1141} 1478}
1142 1479
1143/** 1480/**
@@ -1217,7 +1554,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
1217int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) 1554int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1218{ 1555{
1219 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1556 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1220 struct gfs2_rgrpd *rgd, *begin = NULL; 1557 struct gfs2_rgrpd *begin = NULL;
1221 struct gfs2_blkreserv *rs = ip->i_res; 1558 struct gfs2_blkreserv *rs = ip->i_res;
1222 int error = 0, rg_locked, flags = LM_FLAG_TRY; 1559 int error = 0, rg_locked, flags = LM_FLAG_TRY;
1223 u64 last_unlinked = NO_BLOCK; 1560 u64 last_unlinked = NO_BLOCK;
@@ -1225,32 +1562,40 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1225 1562
1226 if (sdp->sd_args.ar_rgrplvb) 1563 if (sdp->sd_args.ar_rgrplvb)
1227 flags |= GL_SKIP; 1564 flags |= GL_SKIP;
1228 rs = ip->i_res;
1229 rs->rs_requested = requested; 1565 rs->rs_requested = requested;
1230 if (gfs2_assert_warn(sdp, requested)) { 1566 if (gfs2_assert_warn(sdp, requested)) {
1231 error = -EINVAL; 1567 error = -EINVAL;
1232 goto out; 1568 goto out;
1233 } 1569 }
1234 1570 if (gfs2_rs_active(rs)) {
1235 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) 1571 begin = rs->rs_rgd;
1236 rgd = begin = ip->i_rgd; 1572 flags = 0; /* Yoda: Do or do not. There is no try */
1237 else 1573 } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1238 rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1574 rs->rs_rgd = begin = ip->i_rgd;
1239 1575 } else {
1240 if (rgd == NULL) 1576 rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1577 }
1578 if (rs->rs_rgd == NULL)
1241 return -EBADSLT; 1579 return -EBADSLT;
1242 1580
1243 while (loops < 3) { 1581 while (loops < 3) {
1244 rg_locked = 0; 1582 rg_locked = 0;
1245 1583
1246 if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { 1584 if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
1247 rg_locked = 1; 1585 rg_locked = 1;
1248 error = 0; 1586 error = 0;
1587 } else if (!loops && !gfs2_rs_active(rs) &&
1588 rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
1589 /* If the rgrp already is maxed out for contenders,
1590 we can eliminate it as a "first pass" without even
1591 requesting the rgrp glock. */
1592 error = GLR_TRYFAILED;
1249 } else { 1593 } else {
1250 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 1594 error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
1251 flags, &rs->rs_rgd_gh); 1595 LM_ST_EXCLUSIVE, flags,
1596 &rs->rs_rgd_gh);
1252 if (!error && sdp->sd_args.ar_rgrplvb) { 1597 if (!error && sdp->sd_args.ar_rgrplvb) {
1253 error = update_rgrp_lvb(rgd); 1598 error = update_rgrp_lvb(rs->rs_rgd);
1254 if (error) { 1599 if (error) {
1255 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1600 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1256 return error; 1601 return error;
@@ -1259,25 +1604,37 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1259 } 1604 }
1260 switch (error) { 1605 switch (error) {
1261 case 0: 1606 case 0:
1262 if (try_rgrp_fit(rgd, ip)) { 1607 if (gfs2_rs_active(rs)) {
1608 if (unclaimed_blocks(rs->rs_rgd) +
1609 rs->rs_free >= rs->rs_requested) {
1610 ip->i_rgd = rs->rs_rgd;
1611 return 0;
1612 }
1613 /* We have a multi-block reservation, but the
1614 rgrp doesn't have enough free blocks to
1615 satisfy the request. Free the reservation
1616 and look for a suitable rgrp. */
1617 gfs2_rs_deltree(rs);
1618 }
1619 if (try_rgrp_fit(rs->rs_rgd, ip)) {
1263 if (sdp->sd_args.ar_rgrplvb) 1620 if (sdp->sd_args.ar_rgrplvb)
1264 gfs2_rgrp_bh_get(rgd); 1621 gfs2_rgrp_bh_get(rs->rs_rgd);
1265 ip->i_rgd = rgd; 1622 ip->i_rgd = rs->rs_rgd;
1266 return 0; 1623 return 0;
1267 } 1624 }
1268 if (rgd->rd_flags & GFS2_RDF_CHECK) { 1625 if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
1269 if (sdp->sd_args.ar_rgrplvb) 1626 if (sdp->sd_args.ar_rgrplvb)
1270 gfs2_rgrp_bh_get(rgd); 1627 gfs2_rgrp_bh_get(rs->rs_rgd);
1271 try_rgrp_unlink(rgd, &last_unlinked, 1628 try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
1272 ip->i_no_addr); 1629 ip->i_no_addr);
1273 } 1630 }
1274 if (!rg_locked) 1631 if (!rg_locked)
1275 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1632 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1276 /* fall through */ 1633 /* fall through */
1277 case GLR_TRYFAILED: 1634 case GLR_TRYFAILED:
1278 rgd = gfs2_rgrpd_get_next(rgd); 1635 rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
1279 rgd = rgd ? : begin; /* if NULL, wrap */ 1636 rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */
1280 if (rgd != begin) /* If we didn't wrap */ 1637 if (rs->rs_rgd != begin) /* If we didn't wrap */
1281 break; 1638 break;
1282 1639
1283 flags &= ~LM_FLAG_TRY; 1640 flags &= ~LM_FLAG_TRY;
@@ -1315,6 +1672,12 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
1315{ 1672{
1316 struct gfs2_blkreserv *rs = ip->i_res; 1673 struct gfs2_blkreserv *rs = ip->i_res;
1317 1674
1675 if (!rs)
1676 return;
1677
1678 if (!rs->rs_free)
1679 gfs2_rs_deltree(rs);
1680
1318 if (rs->rs_rgd_gh.gh_gl) 1681 if (rs->rs_rgd_gh.gh_gl)
1319 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1682 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1320 rs->rs_requested = 0; 1683 rs->rs_requested = 0;
@@ -1413,7 +1776,27 @@ do_search:
1413 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) 1776 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1414 buffer = bi->bi_clone + bi->bi_offset; 1777 buffer = bi->bi_clone + bi->bi_offset;
1415 1778
1416 biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); 1779 while (1) {
1780 struct gfs2_blkreserv *rs;
1781 u32 rgblk;
1782
1783 biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
1784 if (biblk == BFITNOENT)
1785 break;
1786 /* Check if this block is reserved() */
1787 rgblk = gfs2_bi2rgd_blk(bi, biblk);
1788 rs = rs_find(rgd, rgblk);
1789 if (rs == NULL)
1790 break;
1791
1792 BUG_ON(rs->rs_bi != bi);
1793 biblk = BFITNOENT;
1794 /* This should jump to the first block after the
1795 reservation. */
1796 goal = rs->rs_biblk + rs->rs_free;
1797 if (goal >= bi->bi_len * GFS2_NBBY)
1798 break;
1799 }
1417 if (biblk != BFITNOENT) 1800 if (biblk != BFITNOENT)
1418 break; 1801 break;
1419 1802
@@ -1449,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
1449 u32 blk, bool dinode, unsigned int *n) 1832 u32 blk, bool dinode, unsigned int *n)
1450{ 1833{
1451 const unsigned int elen = *n; 1834 const unsigned int elen = *n;
1452 u32 goal; 1835 u32 goal, rgblk;
1453 const u8 *buffer = NULL; 1836 const u8 *buffer = NULL;
1837 struct gfs2_blkreserv *rs;
1454 1838
1455 *n = 0; 1839 *n = 0;
1456 buffer = bi->bi_bh->b_data + bi->bi_offset; 1840 buffer = bi->bi_bh->b_data + bi->bi_offset;
@@ -1463,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
1463 goal++; 1847 goal++;
1464 if (goal >= (bi->bi_len * GFS2_NBBY)) 1848 if (goal >= (bi->bi_len * GFS2_NBBY))
1465 break; 1849 break;
1850 rgblk = gfs2_bi2rgd_blk(bi, goal);
1851 rs = rs_find(rgd, rgblk);
1852 if (rs) /* Oops, we bumped into someone's reservation */
1853 break;
1466 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != 1854 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
1467 GFS2_BLKST_FREE) 1855 GFS2_BLKST_FREE)
1468 break; 1856 break;
@@ -1538,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1538 1926
1539int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) 1927int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
1540{ 1928{
1541 const struct gfs2_rgrpd *rgd = gl->gl_object; 1929 struct gfs2_rgrpd *rgd = gl->gl_object;
1930 struct gfs2_blkreserv *trs;
1931 const struct rb_node *n;
1932
1542 if (rgd == NULL) 1933 if (rgd == NULL)
1543 return 0; 1934 return 0;
1544 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", 1935 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
1545 (unsigned long long)rgd->rd_addr, rgd->rd_flags, 1936 (unsigned long long)rgd->rd_addr, rgd->rd_flags,
1546 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); 1937 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
1938 rgd->rd_reserved);
1939 spin_lock(&rgd->rd_rsspin);
1940 for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
1941 trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1942 dump_rs(seq, trs);
1943 }
1944 spin_unlock(&rgd->rd_rsspin);
1547 return 0; 1945 return 0;
1548} 1946}
1549 1947
@@ -1558,10 +1956,63 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
1558} 1956}
1559 1957
1560/** 1958/**
1959 * claim_reserved_blks - Claim previously reserved blocks
1960 * @ip: the inode that's claiming the reservation
1961 * @dinode: 1 if this block is a dinode block, otherwise data block
1962 * @nblocks: desired extent length
1963 *
1964 * Lay claim to previously allocated block reservation blocks.
1965 * Returns: Starting block number of the blocks claimed.
1966 * Sets *nblocks to the actual extent length allocated.
1967 */
1968static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
1969 unsigned int *nblocks)
1970{
1971 struct gfs2_blkreserv *rs = ip->i_res;
1972 struct gfs2_rgrpd *rgd = rs->rs_rgd;
1973 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1974 struct gfs2_bitmap *bi;
1975 u64 start_block = gfs2_rs_startblk(rs);
1976 const unsigned int elen = *nblocks;
1977
1978 /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
1979 gfs2_assert_withdraw(sdp, rgd);
1980 /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
1981 bi = rs->rs_bi;
1982 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1983
1984 for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
1985 /* Make sure the bitmap hasn't changed */
1986 gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
1987 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1988 rs->rs_biblk++;
1989 rs->rs_free--;
1990
1991 BUG_ON(!rgd->rd_reserved);
1992 rgd->rd_reserved--;
1993 dinode = false;
1994 trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
1995 }
1996
1997 if (!rs->rs_free) {
1998 struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
1999
2000 gfs2_rs_deltree(rs);
2001 /* -nblocks because we haven't returned to do the math yet.
2002 I'm doing the math backwards to prevent negative numbers,
2003 but think of it as:
2004 if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */
2005 if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks)
2006 rg_mblk_search(rgd, ip);
2007 }
2008 return start_block;
2009}
2010
2011/**
1561 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode 2012 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
1562 * @ip: the inode to allocate the block for 2013 * @ip: the inode to allocate the block for
1563 * @bn: Used to return the starting block number 2014 * @bn: Used to return the starting block number
1564 * @ndata: requested number of blocks/extent length (value/result) 2015 * @nblocks: requested number of blocks/extent length (value/result)
1565 * @dinode: 1 if we're allocating a dinode block, else 0 2016 * @dinode: 1 if we're allocating a dinode block, else 0
1566 * @generation: the generation number of the inode 2017 * @generation: the generation number of the inode
1567 * 2018 *
@@ -1586,20 +2037,34 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
1586 if (ip->i_res->rs_requested == 0) 2037 if (ip->i_res->rs_requested == 0)
1587 return -ECANCELED; 2038 return -ECANCELED;
1588 2039
1589 rgd = ip->i_rgd; 2040 /* Check if we have a multi-block reservation, and if so, claim the
1590 2041 next free block from it. */
1591 if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) 2042 if (gfs2_rs_active(ip->i_res)) {
1592 goal = ip->i_goal - rgd->rd_data0; 2043 BUG_ON(!ip->i_res->rs_free);
1593 else 2044 rgd = ip->i_res->rs_rgd;
1594 goal = rgd->rd_last_alloc; 2045 block = claim_reserved_blks(ip, dinode, nblocks);
1595 2046 } else {
1596 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); 2047 rgd = ip->i_rgd;
1597 2048
1598 /* Since all blocks are reserved in advance, this shouldn't happen */ 2049 if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
1599 if (blk == BFITNOENT) 2050 goal = ip->i_goal - rgd->rd_data0;
1600 goto rgrp_error; 2051 else
2052 goal = rgd->rd_last_alloc;
2053
2054 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
2055
2056 /* Since all blocks are reserved in advance, this shouldn't
2057 happen */
2058 if (blk == BFITNOENT) {
2059 printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
2060 *nblocks);
2061 printk(KERN_WARNING "FULL=%d\n",
2062 test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
2063 goto rgrp_error;
2064 }
1601 2065
1602 block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); 2066 block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
2067 }
1603 ndata = *nblocks; 2068 ndata = *nblocks;
1604 if (dinode) 2069 if (dinode)
1605 ndata--; 2070 ndata--;
@@ -1616,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
1616 brelse(dibh); 2081 brelse(dibh);
1617 } 2082 }
1618 } 2083 }
1619 if (rgd->rd_free < *nblocks) 2084 if (rgd->rd_free < *nblocks) {
2085 printk(KERN_WARNING "nblocks=%u\n", *nblocks);
1620 goto rgrp_error; 2086 goto rgrp_error;
2087 }
1621 2088
1622 rgd->rd_free -= *nblocks; 2089 rgd->rd_free -= *nblocks;
1623 if (dinode) { 2090 if (dinode) {
@@ -1877,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1877 for (x = 0; x < rlist->rl_rgrps; x++) 2344 for (x = 0; x < rlist->rl_rgrps; x++)
1878 gfs2_holder_uninit(&rlist->rl_ghs[x]); 2345 gfs2_holder_uninit(&rlist->rl_ghs[x]);
1879 kfree(rlist->rl_ghs); 2346 kfree(rlist->rl_ghs);
2347 rlist->rl_ghs = NULL;
1880 } 2348 }
1881} 2349}
1882 2350