summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2019-08-30 14:44:24 -0400
committerIlya Dryomov <idryomov@gmail.com>2019-09-16 06:06:25 -0400
commit10c12851a022662bf6085bd4384b4ebed4c447ce (patch)
tree609a2973a0bbbd33afc073e6a122ddec858efe3f
parent6fd4e634835208ddb331234bfa51d75396a5c42c (diff)
libceph: avoid a __vmalloc() deadlock in ceph_kvmalloc()
The vmalloc allocator doesn't fully respect the specified gfp mask: while the actual pages are allocated as requested, the page table pages are always allocated with GFP_KERNEL. ceph_kvmalloc() may be called with GFP_NOFS and GFP_NOIO (for ceph and rbd respectively), so this may result in a deadlock. There is no real reason for the current PAGE_ALLOC_COSTLY_ORDER logic, it's just something that seemed sensible at the time (ceph_kvmalloc() predates kvmalloc()). kvmalloc() is smarter: in an attempt to reduce long term fragmentation, it first tries to kmalloc non-disruptively. Switch to kvmalloc() and set the respective PF_MEMALLOC_* flag using the scope API to avoid the deadlock. Note that kvmalloc() needs to be passed GFP_KERNEL to enable the fallback. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Jeff Layton <jlayton@kernel.org>
-rw-r--r--net/ceph/ceph_common.c29
1 files changed, 23 insertions, 6 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index b412a3ccc4fc..2d568246803f 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -13,6 +13,7 @@
13#include <linux/nsproxy.h> 13#include <linux/nsproxy.h>
14#include <linux/parser.h> 14#include <linux/parser.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/sched/mm.h>
16#include <linux/seq_file.h> 17#include <linux/seq_file.h>
17#include <linux/slab.h> 18#include <linux/slab.h>
18#include <linux/statfs.h> 19#include <linux/statfs.h>
@@ -185,18 +186,34 @@ int ceph_compare_options(struct ceph_options *new_opt,
185} 186}
186EXPORT_SYMBOL(ceph_compare_options); 187EXPORT_SYMBOL(ceph_compare_options);
187 188
189/*
190 * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are
191 * compatible with (a superset of) GFP_KERNEL. This is because while the
192 * actual pages are allocated with the specified flags, the page table pages
193 * are always allocated with GFP_KERNEL. map_vm_area() doesn't even take
194 * flags because GFP_KERNEL is hard-coded in {p4d,pud,pmd,pte}_alloc().
195 *
196 * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO.
197 */
188void *ceph_kvmalloc(size_t size, gfp_t flags) 198void *ceph_kvmalloc(size_t size, gfp_t flags)
189{ 199{
190 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 200 void *p;
191 void *ptr = kmalloc(size, flags | __GFP_NOWARN); 201
192 if (ptr) 202 if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) {
193 return ptr; 203 p = kvmalloc(size, flags);
204 } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) {
205 unsigned int nofs_flag = memalloc_nofs_save();
206 p = kvmalloc(size, GFP_KERNEL);
207 memalloc_nofs_restore(nofs_flag);
208 } else {
209 unsigned int noio_flag = memalloc_noio_save();
210 p = kvmalloc(size, GFP_KERNEL);
211 memalloc_noio_restore(noio_flag);
194 } 212 }
195 213
196 return __vmalloc(size, flags, PAGE_KERNEL); 214 return p;
197} 215}
198 216
199
200static int parse_fsid(const char *str, struct ceph_fsid *fsid) 217static int parse_fsid(const char *str, struct ceph_fsid *fsid)
201{ 218{
202 int i = 0; 219 int i = 0;