aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-07-18 12:21:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-07-18 12:21:43 -0400
commit847f56eb0e08fde16e721b76dcb2c70da0c730bf (patch)
tree1bfb37f319630094bfeb040b561be27a4da33a6a
parent59ca9ee42838d0f597137cf811e47eaf42fdcb69 (diff)
parent03e01349c654fbdea80d3d9b4ab599244eb55bb7 (diff)
Merge tag 'xfs-for-linus-3.16-rc5' of git://oss.sgi.com/xfs/xfs
Pull xfs fixes from Dave Chinner: "Fixes for low memory perforamnce regressions and a quota inode handling regression. These are regression fixes for issues recently introduced - the change in the stack switch location is fairly important, so I've held off sending this update until I was sure that it still addresses the stack usage problem the original solved. So while the commits in the xfs tree are recent, it has been under tested for several weeks now" * tag 'xfs-for-linus-3.16-rc5' of git://oss.sgi.com/xfs/xfs: xfs: null unused quota inodes when quota is on xfs: refine the allocation stack switch Revert "xfs: block allocation work needs to be kswapd aware"
-rw-r--r--fs/xfs/xfs_bmap.c7
-rw-r--r--fs/xfs/xfs_bmap.h4
-rw-r--r--fs/xfs/xfs_bmap_util.c53
-rw-r--r--fs/xfs/xfs_bmap_util.h4
-rw-r--r--fs/xfs/xfs_btree.c82
-rw-r--r--fs/xfs/xfs_iomap.c3
-rw-r--r--fs/xfs/xfs_sb.c25
7 files changed, 106 insertions, 72 deletions
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 96175df211b1..75c3fe5f3d9d 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4298,8 +4298,8 @@ xfs_bmapi_delay(
4298} 4298}
4299 4299
4300 4300
4301int 4301static int
4302__xfs_bmapi_allocate( 4302xfs_bmapi_allocate(
4303 struct xfs_bmalloca *bma) 4303 struct xfs_bmalloca *bma)
4304{ 4304{
4305 struct xfs_mount *mp = bma->ip->i_mount; 4305 struct xfs_mount *mp = bma->ip->i_mount;
@@ -4578,9 +4578,6 @@ xfs_bmapi_write(
4578 bma.flist = flist; 4578 bma.flist = flist;
4579 bma.firstblock = firstblock; 4579 bma.firstblock = firstblock;
4580 4580
4581 if (flags & XFS_BMAPI_STACK_SWITCH)
4582 bma.stack_switch = 1;
4583
4584 while (bno < end && n < *nmap) { 4581 while (bno < end && n < *nmap) {
4585 inhole = eof || bma.got.br_startoff > bno; 4582 inhole = eof || bma.got.br_startoff > bno;
4586 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); 4583 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 38ba36e9b2f0..b879ca56a64c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -77,7 +77,6 @@ typedef struct xfs_bmap_free
77 * from written to unwritten, otherwise convert from unwritten to written. 77 * from written to unwritten, otherwise convert from unwritten to written.
78 */ 78 */
79#define XFS_BMAPI_CONVERT 0x040 79#define XFS_BMAPI_CONVERT 0x040
80#define XFS_BMAPI_STACK_SWITCH 0x080
81 80
82#define XFS_BMAPI_FLAGS \ 81#define XFS_BMAPI_FLAGS \
83 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 82 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
@@ -86,8 +85,7 @@ typedef struct xfs_bmap_free
86 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 85 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
87 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 86 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
88 { XFS_BMAPI_CONTIG, "CONTIG" }, \ 87 { XFS_BMAPI_CONTIG, "CONTIG" }, \
89 { XFS_BMAPI_CONVERT, "CONVERT" }, \ 88 { XFS_BMAPI_CONVERT, "CONVERT" }
90 { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
91 89
92 90
93static inline int xfs_bmapi_aflag(int w) 91static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 703b3ec1796c..64731ef3324d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -249,59 +249,6 @@ xfs_bmap_rtalloc(
249} 249}
250 250
251/* 251/*
252 * Stack switching interfaces for allocation
253 */
254static void
255xfs_bmapi_allocate_worker(
256 struct work_struct *work)
257{
258 struct xfs_bmalloca *args = container_of(work,
259 struct xfs_bmalloca, work);
260 unsigned long pflags;
261 unsigned long new_pflags = PF_FSTRANS;
262
263 /*
264 * we are in a transaction context here, but may also be doing work
265 * in kswapd context, and hence we may need to inherit that state
266 * temporarily to ensure that we don't block waiting for memory reclaim
267 * in any way.
268 */
269 if (args->kswapd)
270 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
271
272 current_set_flags_nested(&pflags, new_pflags);
273
274 args->result = __xfs_bmapi_allocate(args);
275 complete(args->done);
276
277 current_restore_flags_nested(&pflags, new_pflags);
278}
279
280/*
281 * Some allocation requests often come in with little stack to work on. Push
282 * them off to a worker thread so there is lots of stack to use. Otherwise just
283 * call directly to avoid the context switch overhead here.
284 */
285int
286xfs_bmapi_allocate(
287 struct xfs_bmalloca *args)
288{
289 DECLARE_COMPLETION_ONSTACK(done);
290
291 if (!args->stack_switch)
292 return __xfs_bmapi_allocate(args);
293
294
295 args->done = &done;
296 args->kswapd = current_is_kswapd();
297 INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
298 queue_work(xfs_alloc_wq, &args->work);
299 wait_for_completion(&done);
300 destroy_work_on_stack(&args->work);
301 return args->result;
302}
303
304/*
305 * Check if the endoff is outside the last extent. If so the caller will grow 252 * Check if the endoff is outside the last extent. If so the caller will grow
306 * the allocation to a stripe unit boundary. All offsets are considered outside 253 * the allocation to a stripe unit boundary. All offsets are considered outside
307 * the end of file for an empty fork, so 1 is returned in *eof in that case. 254 * the end of file for an empty fork, so 1 is returned in *eof in that case.
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 075f72232a64..2fdb72d2c908 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -55,8 +55,6 @@ struct xfs_bmalloca {
55 bool userdata;/* set if is user data */ 55 bool userdata;/* set if is user data */
56 bool aeof; /* allocated space at eof */ 56 bool aeof; /* allocated space at eof */
57 bool conv; /* overwriting unwritten extents */ 57 bool conv; /* overwriting unwritten extents */
58 bool stack_switch;
59 bool kswapd; /* allocation in kswapd context */
60 int flags; 58 int flags;
61 struct completion *done; 59 struct completion *done;
62 struct work_struct work; 60 struct work_struct work;
@@ -66,8 +64,6 @@ struct xfs_bmalloca {
66int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, 64int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
67 int *committed); 65 int *committed);
68int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); 66int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
69int xfs_bmapi_allocate(struct xfs_bmalloca *args);
70int __xfs_bmapi_allocate(struct xfs_bmalloca *args);
71int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, 67int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
72 int whichfork, int *eof); 68 int whichfork, int *eof);
73int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, 69int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index bf810c6baf2b..cf893bc1e373 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -33,6 +33,7 @@
33#include "xfs_error.h" 33#include "xfs_error.h"
34#include "xfs_trace.h" 34#include "xfs_trace.h"
35#include "xfs_cksum.h" 35#include "xfs_cksum.h"
36#include "xfs_alloc.h"
36 37
37/* 38/*
38 * Cursor allocation zone. 39 * Cursor allocation zone.
@@ -2323,7 +2324,7 @@ error1:
2323 * record (to be inserted into parent). 2324 * record (to be inserted into parent).
2324 */ 2325 */
2325STATIC int /* error */ 2326STATIC int /* error */
2326xfs_btree_split( 2327__xfs_btree_split(
2327 struct xfs_btree_cur *cur, 2328 struct xfs_btree_cur *cur,
2328 int level, 2329 int level,
2329 union xfs_btree_ptr *ptrp, 2330 union xfs_btree_ptr *ptrp,
@@ -2503,6 +2504,85 @@ error0:
2503 return error; 2504 return error;
2504} 2505}
2505 2506
2507struct xfs_btree_split_args {
2508 struct xfs_btree_cur *cur;
2509 int level;
2510 union xfs_btree_ptr *ptrp;
2511 union xfs_btree_key *key;
2512 struct xfs_btree_cur **curp;
2513 int *stat; /* success/failure */
2514 int result;
2515 bool kswapd; /* allocation in kswapd context */
2516 struct completion *done;
2517 struct work_struct work;
2518};
2519
2520/*
2521 * Stack switching interfaces for allocation
2522 */
2523static void
2524xfs_btree_split_worker(
2525 struct work_struct *work)
2526{
2527 struct xfs_btree_split_args *args = container_of(work,
2528 struct xfs_btree_split_args, work);
2529 unsigned long pflags;
2530 unsigned long new_pflags = PF_FSTRANS;
2531
2532 /*
2533 * we are in a transaction context here, but may also be doing work
2534 * in kswapd context, and hence we may need to inherit that state
2535 * temporarily to ensure that we don't block waiting for memory reclaim
2536 * in any way.
2537 */
2538 if (args->kswapd)
2539 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2540
2541 current_set_flags_nested(&pflags, new_pflags);
2542
2543 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
2544 args->key, args->curp, args->stat);
2545 complete(args->done);
2546
2547 current_restore_flags_nested(&pflags, new_pflags);
2548}
2549
2550/*
2551 * BMBT split requests often come in with little stack to work on. Push
2552 * them off to a worker thread so there is lots of stack to use. For the other
2553 * btree types, just call directly to avoid the context switch overhead here.
2554 */
2555STATIC int /* error */
2556xfs_btree_split(
2557 struct xfs_btree_cur *cur,
2558 int level,
2559 union xfs_btree_ptr *ptrp,
2560 union xfs_btree_key *key,
2561 struct xfs_btree_cur **curp,
2562 int *stat) /* success/failure */
2563{
2564 struct xfs_btree_split_args args;
2565 DECLARE_COMPLETION_ONSTACK(done);
2566
2567 if (cur->bc_btnum != XFS_BTNUM_BMAP)
2568 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
2569
2570 args.cur = cur;
2571 args.level = level;
2572 args.ptrp = ptrp;
2573 args.key = key;
2574 args.curp = curp;
2575 args.stat = stat;
2576 args.done = &done;
2577 args.kswapd = current_is_kswapd();
2578 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
2579 queue_work(xfs_alloc_wq, &args.work);
2580 wait_for_completion(&done);
2581 destroy_work_on_stack(&args.work);
2582 return args.result;
2583}
2584
2585
2506/* 2586/*
2507 * Copy the old inode root contents into a real block and make the 2587 * Copy the old inode root contents into a real block and make the
2508 * broot point to it. 2588 * broot point to it.
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 6c5eb4c551e3..6d3ec2b6ee29 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -749,8 +749,7 @@ xfs_iomap_write_allocate(
749 * pointer that the caller gave to us. 749 * pointer that the caller gave to us.
750 */ 750 */
751 error = xfs_bmapi_write(tp, ip, map_start_fsb, 751 error = xfs_bmapi_write(tp, ip, map_start_fsb,
752 count_fsb, 752 count_fsb, 0,
753 XFS_BMAPI_STACK_SWITCH,
754 &first_block, 1, 753 &first_block, 1,
755 imap, &nimaps, &free_list); 754 imap, &nimaps, &free_list);
756 if (error) 755 if (error)
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index c3453b11f563..7703fa6770ff 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -483,10 +483,16 @@ xfs_sb_quota_to_disk(
483 } 483 }
484 484
485 /* 485 /*
486 * GQUOTINO and PQUOTINO cannot be used together in versions 486 * GQUOTINO and PQUOTINO cannot be used together in versions of
487 * of superblock that do not have pquotino. from->sb_flags 487 * superblock that do not have pquotino. from->sb_flags tells us which
488 * tells us which quota is active and should be copied to 488 * quota is active and should be copied to disk. If neither are active,
489 * disk. 489 * make sure we write NULLFSINO to the sb_gquotino field as a quota
490 * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature
491 * bit is set.
492 *
493 * Note that we don't need to handle the sb_uquotino or sb_pquotino here
494 * as they do not require any translation. Hence the main sb field loop
495 * will write them appropriately from the in-core superblock.
490 */ 496 */
491 if ((*fields & XFS_SB_GQUOTINO) && 497 if ((*fields & XFS_SB_GQUOTINO) &&
492 (from->sb_qflags & XFS_GQUOTA_ACCT)) 498 (from->sb_qflags & XFS_GQUOTA_ACCT))
@@ -494,6 +500,17 @@ xfs_sb_quota_to_disk(
494 else if ((*fields & XFS_SB_PQUOTINO) && 500 else if ((*fields & XFS_SB_PQUOTINO) &&
495 (from->sb_qflags & XFS_PQUOTA_ACCT)) 501 (from->sb_qflags & XFS_PQUOTA_ACCT))
496 to->sb_gquotino = cpu_to_be64(from->sb_pquotino); 502 to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
503 else {
504 /*
505 * We can't rely on just the fields being logged to tell us
506 * that it is safe to write NULLFSINO - we should only do that
507 * if quotas are not actually enabled. Hence only write
508 * NULLFSINO if both in-core quota inodes are NULL.
509 */
510 if (from->sb_gquotino == NULLFSINO &&
511 from->sb_pquotino == NULLFSINO)
512 to->sb_gquotino = cpu_to_be64(NULLFSINO);
513 }
497 514
498 *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO); 515 *fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
499} 516}