aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2013-10-29 07:11:57 -0400
committerBen Myers <bpm@sgi.com>2013-10-30 16:44:51 -0400
commitad22c7a043c2cc6792820e6c5da699935933e87d (patch)
treee129dd4d3d31de48b02feb0378c3677832d6594a /fs
parent632b89e82bf1c04c251924b49adc689f7b346321 (diff)
xfs: prevent stack overflows from page cache allocation
Page cache allocation doesn't always go through ->begin_write and hence we don't always get the opportunity to set the allocation context to GFP_NOFS. Failing to do this means we open up the direct relcaim stack to recurse into the filesystem and consume a significant amount of stack. On RHEL6.4 kernels we are seeing ra_submit() and generic_file_splice_read() from an nfsd context recursing into the filesystem via the inode cache shrinker and evicting inodes. This is causing truncation to be run (e.g EOF block freeing) and causing bmap btree block merges and free space btree block splits to occur. These btree manipulations are occurring with the call chain already 30 functions deep and hence there is not enough stack space to complete such operations. To avoid these specific overruns, we need to prevent the page cache allocation from recursing via direct reclaim. We can do that because the allocation functions take the allocation context from that which is stored in the mapping for the inode. We don't set that right now, so the default is GFP_HIGHUSER_MOVABLE, which is effectively a GFP_KERNEL context. We need it to be the equivalent of GFP_NOFS, so when we initialise an inode, set the mapping gfp mask appropriately. This makes the use of AOP_FLAG_NOFS redundant from other parts of the XFS IO path, so get rid of it. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_aops.c3
-rw-r--r--fs/xfs/xfs_iops.c9
2 files changed, 10 insertions, 2 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index eb310caf13b1..71c8c9d2b882 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1572,8 +1572,7 @@ xfs_vm_write_begin(
1572 1572
1573 ASSERT(len <= PAGE_CACHE_SIZE); 1573 ASSERT(len <= PAGE_CACHE_SIZE);
1574 1574
1575 page = grab_cache_page_write_begin(mapping, index, 1575 page = grab_cache_page_write_begin(mapping, index, flags);
1576 flags | AOP_FLAG_NOFS);
1577 if (!page) 1576 if (!page)
1578 return -ENOMEM; 1577 return -ENOMEM;
1579 1578
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index c4cd6d47f523..27e0e544e963 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1168,6 +1168,7 @@ xfs_setup_inode(
1168 struct xfs_inode *ip) 1168 struct xfs_inode *ip)
1169{ 1169{
1170 struct inode *inode = &ip->i_vnode; 1170 struct inode *inode = &ip->i_vnode;
1171 gfp_t gfp_mask;
1171 1172
1172 inode->i_ino = ip->i_ino; 1173 inode->i_ino = ip->i_ino;
1173 inode->i_state = I_NEW; 1174 inode->i_state = I_NEW;
@@ -1230,6 +1231,14 @@ xfs_setup_inode(
1230 } 1231 }
1231 1232
1232 /* 1233 /*
1234 * Ensure all page cache allocations are done from GFP_NOFS context to
1235 * prevent direct reclaim recursion back into the filesystem and blowing
1236 * stacks or deadlocking.
1237 */
1238 gfp_mask = mapping_gfp_mask(inode->i_mapping);
1239 mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));
1240
1241 /*
1233 * If there is no attribute fork no ACL can exist on this inode, 1242 * If there is no attribute fork no ACL can exist on this inode,
1234 * and it can't have any file capabilities attached to it either. 1243 * and it can't have any file capabilities attached to it either.
1235 */ 1244 */