aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2010-01-11 06:47:44 -0500
committerAlex Elder <aelder@sgi.com>2010-01-15 16:33:52 -0500
commit1c1c6ebcf5284aee4910f3b906ac90c20e510c82 (patch)
treebbcf74752bf7bc058a5c5bdd6bd03090c845b041 /fs
parent44b56e0a1aed522a10051645e85d300e10926fd3 (diff)
xfs: Replace per-ag array with a radix tree
The use of an array for the per-ag structures requires reallocation of the array when growing the filesystem. This requires locking access to the array to avoid use after free situations, and the locking is difficult to get right. To avoid needing to reallocate an array, change the per-ag structures to an allocated object per ag and index them using a tree structure. The AGs are always densely indexed (hence the use of an array), but the number supported is 2^32 and lookups tend to be random and hence indexing needs to scale. A simple choice is a radix tree - it works well with this sort of index. This change also removes another large contiguous allocation from the mount/growfs path in XFS. The growing process now needs to change to only initialise the new AGs required for the extra space, and as such only needs to exclusively lock the tree for inserts. The rest of the code only needs to lock the tree while doing lookups, and hence this will remove all the deadlocks that currently occur on the m_perag_lock as it is now an innermost lock. The lock is also changed to a spinlock from a read/write lock as the hold time is now extremely short. To complete the picture, the per-ag structures will need to be reference counted to ensure that we don't free/modify them while they are still in use. This will be done in subsequent patch. Signed-off-by: Dave Chinner <david@fromorbit.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_alloc.c8
-rw-r--r--fs/xfs/xfs_bmap.c7
-rw-r--r--fs/xfs/xfs_filestream.c13
-rw-r--r--fs/xfs/xfs_fsops.c42
-rw-r--r--fs/xfs/xfs_ialloc.c25
-rw-r--r--fs/xfs/xfs_itable.c4
-rw-r--r--fs/xfs/xfs_mount.c63
-rw-r--r--fs/xfs/xfs_mount.h14
8 files changed, 86 insertions, 90 deletions
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 84070f2e0ba4..4d66bb75579c 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2276,7 +2276,6 @@ xfs_alloc_vextent(
2276 * These three force us into a single a.g. 2276 * These three force us into a single a.g.
2277 */ 2277 */
2278 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2278 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2279 down_read(&mp->m_peraglock);
2280 args->pag = xfs_perag_get(mp, args->agno); 2279 args->pag = xfs_perag_get(mp, args->agno);
2281 args->minleft = 0; 2280 args->minleft = 0;
2282 error = xfs_alloc_fix_freelist(args, 0); 2281 error = xfs_alloc_fix_freelist(args, 0);
@@ -2286,14 +2285,12 @@ xfs_alloc_vextent(
2286 goto error0; 2285 goto error0;
2287 } 2286 }
2288 if (!args->agbp) { 2287 if (!args->agbp) {
2289 up_read(&mp->m_peraglock);
2290 trace_xfs_alloc_vextent_noagbp(args); 2288 trace_xfs_alloc_vextent_noagbp(args);
2291 break; 2289 break;
2292 } 2290 }
2293 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2291 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2294 if ((error = xfs_alloc_ag_vextent(args))) 2292 if ((error = xfs_alloc_ag_vextent(args)))
2295 goto error0; 2293 goto error0;
2296 up_read(&mp->m_peraglock);
2297 break; 2294 break;
2298 case XFS_ALLOCTYPE_START_BNO: 2295 case XFS_ALLOCTYPE_START_BNO:
2299 /* 2296 /*
@@ -2345,7 +2342,6 @@ xfs_alloc_vextent(
2345 * Loop over allocation groups twice; first time with 2342 * Loop over allocation groups twice; first time with
2346 * trylock set, second time without. 2343 * trylock set, second time without.
2347 */ 2344 */
2348 down_read(&mp->m_peraglock);
2349 for (;;) { 2345 for (;;) {
2350 args->pag = xfs_perag_get(mp, args->agno); 2346 args->pag = xfs_perag_get(mp, args->agno);
2351 if (no_min) args->minleft = 0; 2347 if (no_min) args->minleft = 0;
@@ -2408,7 +2404,6 @@ xfs_alloc_vextent(
2408 } 2404 }
2409 xfs_perag_put(args->pag); 2405 xfs_perag_put(args->pag);
2410 } 2406 }
2411 up_read(&mp->m_peraglock);
2412 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { 2407 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2413 if (args->agno == sagno) 2408 if (args->agno == sagno)
2414 mp->m_agfrotor = (mp->m_agfrotor + 1) % 2409 mp->m_agfrotor = (mp->m_agfrotor + 1) %
@@ -2438,7 +2433,6 @@ xfs_alloc_vextent(
2438 return 0; 2433 return 0;
2439error0: 2434error0:
2440 xfs_perag_put(args->pag); 2435 xfs_perag_put(args->pag);
2441 up_read(&mp->m_peraglock);
2442 return error; 2436 return error;
2443} 2437}
2444 2438
@@ -2463,7 +2457,6 @@ xfs_free_extent(
2463 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2457 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2464 ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2458 ASSERT(args.agno < args.mp->m_sb.sb_agcount);
2465 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2459 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2466 down_read(&args.mp->m_peraglock);
2467 args.pag = xfs_perag_get(args.mp, args.agno); 2460 args.pag = xfs_perag_get(args.mp, args.agno);
2468 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2461 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
2469 goto error0; 2462 goto error0;
@@ -2475,7 +2468,6 @@ xfs_free_extent(
2475 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2468 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2476error0: 2469error0:
2477 xfs_perag_put(args.pag); 2470 xfs_perag_put(args.pag);
2478 up_read(&args.mp->m_peraglock);
2479 return error; 2471 return error;
2480} 2472}
2481 2473
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index a9b95d9cf2ad..7c6d9acd7154 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2629,14 +2629,12 @@ xfs_bmap_btalloc(
2629 if (startag == NULLAGNUMBER) 2629 if (startag == NULLAGNUMBER)
2630 startag = ag = 0; 2630 startag = ag = 0;
2631 notinit = 0; 2631 notinit = 0;
2632 down_read(&mp->m_peraglock);
2633 pag = xfs_perag_get(mp, ag); 2632 pag = xfs_perag_get(mp, ag);
2634 while (blen < ap->alen) { 2633 while (blen < ap->alen) {
2635 if (!pag->pagf_init && 2634 if (!pag->pagf_init &&
2636 (error = xfs_alloc_pagf_init(mp, args.tp, 2635 (error = xfs_alloc_pagf_init(mp, args.tp,
2637 ag, XFS_ALLOC_FLAG_TRYLOCK))) { 2636 ag, XFS_ALLOC_FLAG_TRYLOCK))) {
2638 xfs_perag_put(pag); 2637 xfs_perag_put(pag);
2639 up_read(&mp->m_peraglock);
2640 return error; 2638 return error;
2641 } 2639 }
2642 /* 2640 /*
@@ -2669,10 +2667,8 @@ xfs_bmap_btalloc(
2669 2667
2670 error = xfs_filestream_new_ag(ap, &ag); 2668 error = xfs_filestream_new_ag(ap, &ag);
2671 xfs_perag_put(pag); 2669 xfs_perag_put(pag);
2672 if (error) { 2670 if (error)
2673 up_read(&mp->m_peraglock);
2674 return error; 2671 return error;
2675 }
2676 2672
2677 /* loop again to set 'blen'*/ 2673 /* loop again to set 'blen'*/
2678 startag = NULLAGNUMBER; 2674 startag = NULLAGNUMBER;
@@ -2688,7 +2684,6 @@ xfs_bmap_btalloc(
2688 pag = xfs_perag_get(mp, ag); 2684 pag = xfs_perag_get(mp, ag);
2689 } 2685 }
2690 xfs_perag_put(pag); 2686 xfs_perag_put(pag);
2691 up_read(&mp->m_peraglock);
2692 /* 2687 /*
2693 * Since the above loop did a BUF_TRYLOCK, it is 2688 * Since the above loop did a BUF_TRYLOCK, it is
2694 * possible that there is space for this request. 2689 * possible that there is space for this request.
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index e61f2aa088a9..914d00d0f119 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -253,8 +253,7 @@ next_ag:
253 253
254/* 254/*
255 * Set the allocation group number for a file or a directory, updating inode 255 * Set the allocation group number for a file or a directory, updating inode
256 * references and per-AG references as appropriate. Must be called with the 256 * references and per-AG references as appropriate.
257 * m_peraglock held in read mode.
258 */ 257 */
259static int 258static int
260_xfs_filestream_update_ag( 259_xfs_filestream_update_ag(
@@ -456,10 +455,10 @@ xfs_filestream_unmount(
456} 455}
457 456
458/* 457/*
459 * If the mount point's m_perag array is going to be reallocated, all 458 * If the mount point's m_perag tree is going to be modified, all
460 * outstanding cache entries must be flushed to avoid accessing reference count 459 * outstanding cache entries must be flushed to avoid accessing reference count
461 * addresses that have been freed. The call to xfs_filestream_flush() must be 460 * addresses that have been freed. The call to xfs_filestream_flush() must be
462 * made inside the block that holds the m_peraglock in write mode to do the 461 * made inside the block that holds the m_perag_lock in write mode to do the
463 * reallocation. 462 * reallocation.
464 */ 463 */
465void 464void
@@ -531,7 +530,6 @@ xfs_filestream_associate(
531 530
532 mp = pip->i_mount; 531 mp = pip->i_mount;
533 cache = mp->m_filestream; 532 cache = mp->m_filestream;
534 down_read(&mp->m_peraglock);
535 533
536 /* 534 /*
537 * We have a problem, Houston. 535 * We have a problem, Houston.
@@ -548,10 +546,8 @@ xfs_filestream_associate(
548 * 546 *
549 * So, if we can't get the iolock without sleeping then just give up 547 * So, if we can't get the iolock without sleeping then just give up
550 */ 548 */
551 if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) { 549 if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL))
552 up_read(&mp->m_peraglock);
553 return 1; 550 return 1;
554 }
555 551
556 /* If the parent directory is already in the cache, use its AG. */ 552 /* If the parent directory is already in the cache, use its AG. */
557 item = xfs_mru_cache_lookup(cache, pip->i_ino); 553 item = xfs_mru_cache_lookup(cache, pip->i_ino);
@@ -606,7 +602,6 @@ exit_did_pick:
606 602
607exit: 603exit:
608 xfs_iunlock(pip, XFS_IOLOCK_EXCL); 604 xfs_iunlock(pip, XFS_IOLOCK_EXCL);
609 up_read(&mp->m_peraglock);
610 return -err; 605 return -err;
611} 606}
612 607
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a13919a6a364..37a6f62c57b6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -167,27 +167,14 @@ xfs_growfs_data_private(
167 } 167 }
168 new = nb - mp->m_sb.sb_dblocks; 168 new = nb - mp->m_sb.sb_dblocks;
169 oagcount = mp->m_sb.sb_agcount; 169 oagcount = mp->m_sb.sb_agcount;
170 if (nagcount > oagcount) {
171 void *new_perag, *old_perag;
172
173 xfs_filestream_flush(mp);
174
175 new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
176 KM_MAYFAIL);
177 if (!new_perag)
178 return XFS_ERROR(ENOMEM);
179
180 down_write(&mp->m_peraglock);
181 memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
182 old_perag = mp->m_perag;
183 mp->m_perag = new_perag;
184
185 mp->m_flags |= XFS_MOUNT_32BITINODES;
186 nagimax = xfs_initialize_perag(mp, nagcount);
187 up_write(&mp->m_peraglock);
188 170
189 kmem_free(old_perag); 171 /* allocate the new per-ag structures */
172 if (nagcount > oagcount) {
173 error = xfs_initialize_perag(mp, nagcount, &nagimax);
174 if (error)
175 return error;
190 } 176 }
177
191 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 178 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
192 tp->t_flags |= XFS_TRANS_RESERVE; 179 tp->t_flags |= XFS_TRANS_RESERVE;
193 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp), 180 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
@@ -196,6 +183,11 @@ xfs_growfs_data_private(
196 return error; 183 return error;
197 } 184 }
198 185
186 /*
187 * Write new AG headers to disk. Non-transactional, but written
188 * synchronously so they are completed prior to the growfs transaction
189 * being logged.
190 */
199 nfree = 0; 191 nfree = 0;
200 for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { 192 for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
201 /* 193 /*
@@ -359,6 +351,12 @@ xfs_growfs_data_private(
359 goto error0; 351 goto error0;
360 } 352 }
361 } 353 }
354
355 /*
356 * Update changed superblock fields transactionally. These are not
357 * seen by the rest of the world until the transaction commit applies
358 * them atomically to the superblock.
359 */
362 if (nagcount > oagcount) 360 if (nagcount > oagcount)
363 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); 361 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
364 if (nb > mp->m_sb.sb_dblocks) 362 if (nb > mp->m_sb.sb_dblocks)
@@ -369,9 +367,9 @@ xfs_growfs_data_private(
369 if (dpct) 367 if (dpct)
370 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); 368 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
371 error = xfs_trans_commit(tp, 0); 369 error = xfs_trans_commit(tp, 0);
372 if (error) { 370 if (error)
373 return error; 371 return error;
374 } 372
375 /* New allocation groups fully initialized, so update mount struct */ 373 /* New allocation groups fully initialized, so update mount struct */
376 if (nagimax) 374 if (nagimax)
377 mp->m_maxagi = nagimax; 375 mp->m_maxagi = nagimax;
@@ -381,6 +379,8 @@ xfs_growfs_data_private(
381 mp->m_maxicount = icount << mp->m_sb.sb_inopblog; 379 mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
382 } else 380 } else
383 mp->m_maxicount = 0; 381 mp->m_maxicount = 0;
382
383 /* update secondary superblocks. */
384 for (agno = 1; agno < nagcount; agno++) { 384 for (agno = 1; agno < nagcount; agno++) {
385 error = xfs_read_buf(mp, mp->m_ddev_targp, 385 error = xfs_read_buf(mp, mp->m_ddev_targp,
386 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 386 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 884ee1367f46..52c9d006c0e6 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -383,11 +383,9 @@ xfs_ialloc_ag_alloc(
383 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); 383 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
384 be32_add_cpu(&agi->agi_count, newlen); 384 be32_add_cpu(&agi->agi_count, newlen);
385 be32_add_cpu(&agi->agi_freecount, newlen); 385 be32_add_cpu(&agi->agi_freecount, newlen);
386 down_read(&args.mp->m_peraglock);
387 pag = xfs_perag_get(args.mp, agno); 386 pag = xfs_perag_get(args.mp, agno);
388 pag->pagi_freecount += newlen; 387 pag->pagi_freecount += newlen;
389 xfs_perag_put(pag); 388 xfs_perag_put(pag);
390 up_read(&args.mp->m_peraglock);
391 agi->agi_newino = cpu_to_be32(newino); 389 agi->agi_newino = cpu_to_be32(newino);
392 390
393 /* 391 /*
@@ -489,7 +487,6 @@ xfs_ialloc_ag_select(
489 */ 487 */
490 agno = pagno; 488 agno = pagno;
491 flags = XFS_ALLOC_FLAG_TRYLOCK; 489 flags = XFS_ALLOC_FLAG_TRYLOCK;
492 down_read(&mp->m_peraglock);
493 for (;;) { 490 for (;;) {
494 pag = xfs_perag_get(mp, agno); 491 pag = xfs_perag_get(mp, agno);
495 if (!pag->pagi_init) { 492 if (!pag->pagi_init) {
@@ -531,7 +528,6 @@ xfs_ialloc_ag_select(
531 goto nextag; 528 goto nextag;
532 } 529 }
533 xfs_perag_put(pag); 530 xfs_perag_put(pag);
534 up_read(&mp->m_peraglock);
535 return agbp; 531 return agbp;
536 } 532 }
537 } 533 }
@@ -544,18 +540,14 @@ nextag:
544 * No point in iterating over the rest, if we're shutting 540 * No point in iterating over the rest, if we're shutting
545 * down. 541 * down.
546 */ 542 */
547 if (XFS_FORCED_SHUTDOWN(mp)) { 543 if (XFS_FORCED_SHUTDOWN(mp))
548 up_read(&mp->m_peraglock);
549 return NULL; 544 return NULL;
550 }
551 agno++; 545 agno++;
552 if (agno >= agcount) 546 if (agno >= agcount)
553 agno = 0; 547 agno = 0;
554 if (agno == pagno) { 548 if (agno == pagno) {
555 if (flags == 0) { 549 if (flags == 0)
556 up_read(&mp->m_peraglock);
557 return NULL; 550 return NULL;
558 }
559 flags = 0; 551 flags = 0;
560 } 552 }
561 } 553 }
@@ -777,16 +769,13 @@ nextag:
777 *inop = NULLFSINO; 769 *inop = NULLFSINO;
778 return noroom ? ENOSPC : 0; 770 return noroom ? ENOSPC : 0;
779 } 771 }
780 down_read(&mp->m_peraglock);
781 pag = xfs_perag_get(mp, tagno); 772 pag = xfs_perag_get(mp, tagno);
782 if (pag->pagi_inodeok == 0) { 773 if (pag->pagi_inodeok == 0) {
783 xfs_perag_put(pag); 774 xfs_perag_put(pag);
784 up_read(&mp->m_peraglock);
785 goto nextag; 775 goto nextag;
786 } 776 }
787 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); 777 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
788 xfs_perag_put(pag); 778 xfs_perag_put(pag);
789 up_read(&mp->m_peraglock);
790 if (error) 779 if (error)
791 goto nextag; 780 goto nextag;
792 agi = XFS_BUF_TO_AGI(agbp); 781 agi = XFS_BUF_TO_AGI(agbp);
@@ -1015,9 +1004,7 @@ alloc_inode:
1015 goto error0; 1004 goto error0;
1016 be32_add_cpu(&agi->agi_freecount, -1); 1005 be32_add_cpu(&agi->agi_freecount, -1);
1017 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1006 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1018 down_read(&mp->m_peraglock);
1019 pag->pagi_freecount--; 1007 pag->pagi_freecount--;
1020 up_read(&mp->m_peraglock);
1021 1008
1022 error = xfs_check_agi_freecount(cur, agi); 1009 error = xfs_check_agi_freecount(cur, agi);
1023 if (error) 1010 if (error)
@@ -1100,9 +1087,7 @@ xfs_difree(
1100 /* 1087 /*
1101 * Get the allocation group header. 1088 * Get the allocation group header.
1102 */ 1089 */
1103 down_read(&mp->m_peraglock);
1104 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1090 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1105 up_read(&mp->m_peraglock);
1106 if (error) { 1091 if (error) {
1107 cmn_err(CE_WARN, 1092 cmn_err(CE_WARN,
1108 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", 1093 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
@@ -1169,11 +1154,9 @@ xfs_difree(
1169 be32_add_cpu(&agi->agi_count, -ilen); 1154 be32_add_cpu(&agi->agi_count, -ilen);
1170 be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); 1155 be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
1171 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 1156 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
1172 down_read(&mp->m_peraglock);
1173 pag = xfs_perag_get(mp, agno); 1157 pag = xfs_perag_get(mp, agno);
1174 pag->pagi_freecount -= ilen - 1; 1158 pag->pagi_freecount -= ilen - 1;
1175 xfs_perag_put(pag); 1159 xfs_perag_put(pag);
1176 up_read(&mp->m_peraglock);
1177 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1160 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1178 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1161 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1179 1162
@@ -1202,11 +1185,9 @@ xfs_difree(
1202 */ 1185 */
1203 be32_add_cpu(&agi->agi_freecount, 1); 1186 be32_add_cpu(&agi->agi_freecount, 1);
1204 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); 1187 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1205 down_read(&mp->m_peraglock);
1206 pag = xfs_perag_get(mp, agno); 1188 pag = xfs_perag_get(mp, agno);
1207 pag->pagi_freecount++; 1189 pag->pagi_freecount++;
1208 xfs_perag_put(pag); 1190 xfs_perag_put(pag);
1209 up_read(&mp->m_peraglock);
1210 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); 1191 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
1211 } 1192 }
1212 1193
@@ -1328,9 +1309,7 @@ xfs_imap(
1328 xfs_buf_t *agbp; /* agi buffer */ 1309 xfs_buf_t *agbp; /* agi buffer */
1329 int i; /* temp state */ 1310 int i; /* temp state */
1330 1311
1331 down_read(&mp->m_peraglock);
1332 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1312 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1333 up_read(&mp->m_peraglock);
1334 if (error) { 1313 if (error) {
1335 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1314 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1336 "xfs_ialloc_read_agi() returned " 1315 "xfs_ialloc_read_agi() returned "
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 62efab2f3839..940307a6a60b 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -420,9 +420,7 @@ xfs_bulkstat(
420 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { 420 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
421 cond_resched(); 421 cond_resched();
422 bp = NULL; 422 bp = NULL;
423 down_read(&mp->m_peraglock);
424 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 423 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
425 up_read(&mp->m_peraglock);
426 if (error) { 424 if (error) {
427 /* 425 /*
428 * Skip this allocation group and go to the next one. 426 * Skip this allocation group and go to the next one.
@@ -849,9 +847,7 @@ xfs_inumbers(
849 agbp = NULL; 847 agbp = NULL;
850 while (left > 0 && agno < mp->m_sb.sb_agcount) { 848 while (left > 0 && agno < mp->m_sb.sb_agcount) {
851 if (agbp == NULL) { 849 if (agbp == NULL) {
852 down_read(&mp->m_peraglock);
853 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 850 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
854 up_read(&mp->m_peraglock);
855 if (error) { 851 if (error) {
856 /* 852 /*
857 * If we can't read the AGI of this ag, 853 * If we can't read the AGI of this ag,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9055b60730d0..c04dd83cb57c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -209,13 +209,16 @@ STATIC void
209xfs_free_perag( 209xfs_free_perag(
210 xfs_mount_t *mp) 210 xfs_mount_t *mp)
211{ 211{
212 if (mp->m_perag) { 212 xfs_agnumber_t agno;
213 int agno; 213 struct xfs_perag *pag;
214 214
215 for (agno = 0; agno < mp->m_maxagi; agno++) 215 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
216 if (mp->m_perag[agno].pagb_list) 216 spin_lock(&mp->m_perag_lock);
217 kmem_free(mp->m_perag[agno].pagb_list); 217 pag = radix_tree_delete(&mp->m_perag_tree, agno);
218 kmem_free(mp->m_perag); 218 spin_unlock(&mp->m_perag_lock);
219 ASSERT(pag);
220 kmem_free(pag->pagb_list);
221 kmem_free(pag);
219 } 222 }
220} 223}
221 224
@@ -389,10 +392,11 @@ xfs_initialize_perag_icache(
389 } 392 }
390} 393}
391 394
392xfs_agnumber_t 395int
393xfs_initialize_perag( 396xfs_initialize_perag(
394 xfs_mount_t *mp, 397 xfs_mount_t *mp,
395 xfs_agnumber_t agcount) 398 xfs_agnumber_t agcount,
399 xfs_agnumber_t *maxagi)
396{ 400{
397 xfs_agnumber_t index, max_metadata; 401 xfs_agnumber_t index, max_metadata;
398 xfs_perag_t *pag; 402 xfs_perag_t *pag;
@@ -405,6 +409,33 @@ xfs_initialize_perag(
405 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 409 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
406 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 410 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
407 411
412 /*
413 * Walk the current per-ag tree so we don't try to initialise AGs
414 * that already exist (growfs case). Allocate and insert all the
415 * AGs we don't find ready for initialisation.
416 */
417 for (index = 0; index < agcount; index++) {
418 pag = xfs_perag_get(mp, index);
419 if (pag) {
420 xfs_perag_put(pag);
421 continue;
422 }
423 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
424 if (!pag)
425 return -ENOMEM;
426 if (radix_tree_preload(GFP_NOFS))
427 return -ENOMEM;
428 spin_lock(&mp->m_perag_lock);
429 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
430 BUG();
431 spin_unlock(&mp->m_perag_lock);
432 kmem_free(pag);
433 return -EEXIST;
434 }
435 spin_unlock(&mp->m_perag_lock);
436 radix_tree_preload_end();
437 }
438
408 /* Clear the mount flag if no inode can overflow 32 bits 439 /* Clear the mount flag if no inode can overflow 32 bits
409 * on this filesystem, or if specifically requested.. 440 * on this filesystem, or if specifically requested..
410 */ 441 */
@@ -454,7 +485,9 @@ xfs_initialize_perag(
454 xfs_perag_put(pag); 485 xfs_perag_put(pag);
455 } 486 }
456 } 487 }
457 return index; 488 if (maxagi)
489 *maxagi = index;
490 return 0;
458} 491}
459 492
460void 493void
@@ -1155,13 +1188,13 @@ xfs_mountfs(
1155 /* 1188 /*
1156 * Allocate and initialize the per-ag data. 1189 * Allocate and initialize the per-ag data.
1157 */ 1190 */
1158 init_rwsem(&mp->m_peraglock); 1191 spin_lock_init(&mp->m_perag_lock);
1159 mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), 1192 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
1160 KM_MAYFAIL); 1193 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1161 if (!mp->m_perag) 1194 if (error) {
1195 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
1162 goto out_remove_uuid; 1196 goto out_remove_uuid;
1163 1197 }
1164 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
1165 1198
1166 if (!sbp->sb_logblocks) { 1199 if (!sbp->sb_logblocks) {
1167 cmn_err(CE_WARN, "XFS: no log defined"); 1200 cmn_err(CE_WARN, "XFS: no log defined");
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f8a68a2319b5..cfa7a5d22e72 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -207,8 +207,8 @@ typedef struct xfs_mount {
207 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 207 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
208 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 208 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
209 uint m_in_maxlevels; /* max inobt btree levels. */ 209 uint m_in_maxlevels; /* max inobt btree levels. */
210 struct xfs_perag *m_perag; /* per-ag accounting info */ 210 struct radix_tree_root m_perag_tree; /* per-ag accounting info */
211 struct rw_semaphore m_peraglock; /* lock for m_perag (pointer) */ 211 spinlock_t m_perag_lock; /* lock for m_perag_tree */
212 struct mutex m_growlock; /* growfs mutex */ 212 struct mutex m_growlock; /* growfs mutex */
213 int m_fixedfsid[2]; /* unchanged for life of FS */ 213 int m_fixedfsid[2]; /* unchanged for life of FS */
214 uint m_dmevmask; /* DMI events for this FS */ 214 uint m_dmevmask; /* DMI events for this FS */
@@ -389,7 +389,12 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
389static inline struct xfs_perag * 389static inline struct xfs_perag *
390xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) 390xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
391{ 391{
392 return &mp->m_perag[agno]; 392 struct xfs_perag *pag;
393
394 spin_lock(&mp->m_perag_lock);
395 pag = radix_tree_lookup(&mp->m_perag_tree, agno);
396 spin_unlock(&mp->m_perag_lock);
397 return pag;
393} 398}
394 399
395static inline void 400static inline void
@@ -450,7 +455,8 @@ extern struct xfs_dmops xfs_dmcore_xfs;
450#endif /* __KERNEL__ */ 455#endif /* __KERNEL__ */
451 456
452extern void xfs_mod_sb(struct xfs_trans *, __int64_t); 457extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
453extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t); 458extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
459 xfs_agnumber_t *);
454extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); 460extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
455extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); 461extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
456 462