aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2012-10-04 21:06:59 -0400
committerBen Myers <bpm@sgi.com>2012-10-18 18:42:48 -0400
commite04426b9202bccd4cfcbc70b2fa2aeca1c86d8f5 (patch)
tree2bab7921c9327c508d2ea207c9ef781a5df61874 /fs
parent2455881c0b52f87be539c4c7deab1afff4d8a560 (diff)
xfs: move allocation stack switch up to xfs_bmapi_allocate
Switching stacks are xfs_alloc_vextent can cause deadlocks when we run out of worker threads on the allocation workqueue. This can occur because xfs_bmap_btalloc can make multiple calls to xfs_alloc_vextent() and even if xfs_alloc_vextent() fails it can return with the AGF locked in the current allocation transaction. If we then need to make another allocation, and all the allocation worker contexts are exhausted because the are blocked waiting for the AGF lock, holder of the AGF cannot get it's xfs-alloc_vextent work completed to release the AGF. Hence allocation effectively deadlocks. To avoid this, move the stack switch one layer up to xfs_bmapi_allocate() so that all of the allocation attempts in a single switched stack transaction occur in a single worker context. This avoids the problem of an allocation being blocked waiting for a worker thread whilst holding the AGF. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_alloc.c42
-rw-r--r--fs/xfs/xfs_alloc.h4
-rw-r--r--fs/xfs/xfs_bmap.c60
-rw-r--r--fs/xfs/xfs_bmap.h4
4 files changed, 54 insertions, 56 deletions
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 43f791bcd8b1..335206a9c698 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2208,7 +2208,7 @@ xfs_alloc_read_agf(
2208 * group or loop over the allocation groups to find the result. 2208 * group or loop over the allocation groups to find the result.
2209 */ 2209 */
2210int /* error */ 2210int /* error */
2211__xfs_alloc_vextent( 2211xfs_alloc_vextent(
2212 xfs_alloc_arg_t *args) /* allocation argument structure */ 2212 xfs_alloc_arg_t *args) /* allocation argument structure */
2213{ 2213{
2214 xfs_agblock_t agsize; /* allocation group size */ 2214 xfs_agblock_t agsize; /* allocation group size */
@@ -2418,46 +2418,6 @@ error0:
2418 return error; 2418 return error;
2419} 2419}
2420 2420
2421static void
2422xfs_alloc_vextent_worker(
2423 struct work_struct *work)
2424{
2425 struct xfs_alloc_arg *args = container_of(work,
2426 struct xfs_alloc_arg, work);
2427 unsigned long pflags;
2428
2429 /* we are in a transaction context here */
2430 current_set_flags_nested(&pflags, PF_FSTRANS);
2431
2432 args->result = __xfs_alloc_vextent(args);
2433 complete(args->done);
2434
2435 current_restore_flags_nested(&pflags, PF_FSTRANS);
2436}
2437
2438/*
2439 * Data allocation requests often come in with little stack to work on. Push
2440 * them off to a worker thread so there is lots of stack to use. Metadata
2441 * requests, OTOH, are generally from low stack usage paths, so avoid the
2442 * context switch overhead here.
2443 */
2444int
2445xfs_alloc_vextent(
2446 struct xfs_alloc_arg *args)
2447{
2448 DECLARE_COMPLETION_ONSTACK(done);
2449
2450 if (!args->stack_switch)
2451 return __xfs_alloc_vextent(args);
2452
2453
2454 args->done = &done;
2455 INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
2456 queue_work(xfs_alloc_wq, &args->work);
2457 wait_for_completion(&done);
2458 return args->result;
2459}
2460
2461/* 2421/*
2462 * Free an extent. 2422 * Free an extent.
2463 * Just break up the extent address and hand off to xfs_free_ag_extent 2423 * Just break up the extent address and hand off to xfs_free_ag_extent
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index ef7d4885dc2d..feacb061bab7 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -120,10 +120,6 @@ typedef struct xfs_alloc_arg {
120 char isfl; /* set if is freelist blocks - !acctg */ 120 char isfl; /* set if is freelist blocks - !acctg */
121 char userdata; /* set if this is user data */ 121 char userdata; /* set if this is user data */
122 xfs_fsblock_t firstblock; /* io first block allocated */ 122 xfs_fsblock_t firstblock; /* io first block allocated */
123 struct completion *done;
124 struct work_struct work;
125 int result;
126 char stack_switch;
127} xfs_alloc_arg_t; 123} xfs_alloc_arg_t;
128 124
129/* 125/*
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 91259554df8b..83d0cf3df930 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2441,7 +2441,6 @@ xfs_bmap_btalloc(
2441 args.tp = ap->tp; 2441 args.tp = ap->tp;
2442 args.mp = mp; 2442 args.mp = mp;
2443 args.fsbno = ap->blkno; 2443 args.fsbno = ap->blkno;
2444 args.stack_switch = ap->stack_switch;
2445 2444
2446 /* Trim the allocation back to the maximum an AG can fit. */ 2445 /* Trim the allocation back to the maximum an AG can fit. */
2447 args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); 2446 args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
@@ -4620,12 +4619,11 @@ xfs_bmapi_delay(
4620 4619
4621 4620
4622STATIC int 4621STATIC int
4623xfs_bmapi_allocate( 4622__xfs_bmapi_allocate(
4624 struct xfs_bmalloca *bma, 4623 struct xfs_bmalloca *bma)
4625 int flags)
4626{ 4624{
4627 struct xfs_mount *mp = bma->ip->i_mount; 4625 struct xfs_mount *mp = bma->ip->i_mount;
4628 int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? 4626 int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
4629 XFS_ATTR_FORK : XFS_DATA_FORK; 4627 XFS_ATTR_FORK : XFS_DATA_FORK;
4630 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); 4628 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4631 int tmp_logflags = 0; 4629 int tmp_logflags = 0;
@@ -4658,25 +4656,25 @@ xfs_bmapi_allocate(
4658 * Indicate if this is the first user data in the file, or just any 4656 * Indicate if this is the first user data in the file, or just any
4659 * user data. 4657 * user data.
4660 */ 4658 */
4661 if (!(flags & XFS_BMAPI_METADATA)) { 4659 if (!(bma->flags & XFS_BMAPI_METADATA)) {
4662 bma->userdata = (bma->offset == 0) ? 4660 bma->userdata = (bma->offset == 0) ?
4663 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; 4661 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
4664 } 4662 }
4665 4663
4666 bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1; 4664 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4667 4665
4668 /* 4666 /*
4669 * Only want to do the alignment at the eof if it is userdata and 4667 * Only want to do the alignment at the eof if it is userdata and
4670 * allocation length is larger than a stripe unit. 4668 * allocation length is larger than a stripe unit.
4671 */ 4669 */
4672 if (mp->m_dalign && bma->length >= mp->m_dalign && 4670 if (mp->m_dalign && bma->length >= mp->m_dalign &&
4673 !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { 4671 !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4674 error = xfs_bmap_isaeof(bma, whichfork); 4672 error = xfs_bmap_isaeof(bma, whichfork);
4675 if (error) 4673 if (error)
4676 return error; 4674 return error;
4677 } 4675 }
4678 4676
4679 if (flags & XFS_BMAPI_STACK_SWITCH) 4677 if (bma->flags & XFS_BMAPI_STACK_SWITCH)
4680 bma->stack_switch = 1; 4678 bma->stack_switch = 1;
4681 4679
4682 error = xfs_bmap_alloc(bma); 4680 error = xfs_bmap_alloc(bma);
@@ -4713,7 +4711,7 @@ xfs_bmapi_allocate(
4713 * A wasdelay extent has been initialized, so shouldn't be flagged 4711 * A wasdelay extent has been initialized, so shouldn't be flagged
4714 * as unwritten. 4712 * as unwritten.
4715 */ 4713 */
4716 if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) && 4714 if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
4717 xfs_sb_version_hasextflgbit(&mp->m_sb)) 4715 xfs_sb_version_hasextflgbit(&mp->m_sb))
4718 bma->got.br_state = XFS_EXT_UNWRITTEN; 4716 bma->got.br_state = XFS_EXT_UNWRITTEN;
4719 4717
@@ -4741,6 +4739,45 @@ xfs_bmapi_allocate(
4741 return 0; 4739 return 0;
4742} 4740}
4743 4741
4742static void
4743xfs_bmapi_allocate_worker(
4744 struct work_struct *work)
4745{
4746 struct xfs_bmalloca *args = container_of(work,
4747 struct xfs_bmalloca, work);
4748 unsigned long pflags;
4749
4750 /* we are in a transaction context here */
4751 current_set_flags_nested(&pflags, PF_FSTRANS);
4752
4753 args->result = __xfs_bmapi_allocate(args);
4754 complete(args->done);
4755
4756 current_restore_flags_nested(&pflags, PF_FSTRANS);
4757}
4758
4759/*
4760 * Some allocation requests often come in with little stack to work on. Push
4761 * them off to a worker thread so there is lots of stack to use. Otherwise just
4762 * call directly to avoid the context switch overhead here.
4763 */
4764int
4765xfs_bmapi_allocate(
4766 struct xfs_bmalloca *args)
4767{
4768 DECLARE_COMPLETION_ONSTACK(done);
4769
4770 if (!args->stack_switch)
4771 return __xfs_bmapi_allocate(args);
4772
4773
4774 args->done = &done;
4775 INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
4776 queue_work(xfs_alloc_wq, &args->work);
4777 wait_for_completion(&done);
4778 return args->result;
4779}
4780
4744STATIC int 4781STATIC int
4745xfs_bmapi_convert_unwritten( 4782xfs_bmapi_convert_unwritten(
4746 struct xfs_bmalloca *bma, 4783 struct xfs_bmalloca *bma,
@@ -4926,6 +4963,7 @@ xfs_bmapi_write(
4926 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4963 bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4927 bma.wasdel = wasdelay; 4964 bma.wasdel = wasdelay;
4928 bma.offset = bno; 4965 bma.offset = bno;
4966 bma.flags = flags;
4929 4967
4930 /* 4968 /*
4931 * There's a 32/64 bit type mismatch between the 4969 * There's a 32/64 bit type mismatch between the
@@ -4941,7 +4979,7 @@ xfs_bmapi_write(
4941 4979
4942 ASSERT(len > 0); 4980 ASSERT(len > 0);
4943 ASSERT(bma.length > 0); 4981 ASSERT(bma.length > 0);
4944 error = xfs_bmapi_allocate(&bma, flags); 4982 error = xfs_bmapi_allocate(&bma);
4945 if (error) 4983 if (error)
4946 goto error0; 4984 goto error0;
4947 if (bma.blkno == NULLFSBLOCK) 4985 if (bma.blkno == NULLFSBLOCK)
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index b68c598034c1..5f469c3516eb 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -136,6 +136,10 @@ typedef struct xfs_bmalloca {
136 char aeof; /* allocated space at eof */ 136 char aeof; /* allocated space at eof */
137 char conv; /* overwriting unwritten extents */ 137 char conv; /* overwriting unwritten extents */
138 char stack_switch; 138 char stack_switch;
139 int flags;
140 struct completion *done;
141 struct work_struct work;
142 int result;
139} xfs_bmalloca_t; 143} xfs_bmalloca_t;
140 144
141/* 145/*