diff options
author | Ilya Dryomov <idryomov@gmail.com> | 2019-08-30 14:44:24 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2019-09-16 06:06:25 -0400 |
commit | 10c12851a022662bf6085bd4384b4ebed4c447ce (patch) | |
tree | 609a2973a0bbbd33afc073e6a122ddec858efe3f | |
parent | 6fd4e634835208ddb331234bfa51d75396a5c42c (diff) |
libceph: avoid a __vmalloc() deadlock in ceph_kvmalloc()
The vmalloc allocator doesn't fully respect the specified gfp mask:
while the actual pages are allocated as requested, the page table pages
are always allocated with GFP_KERNEL. ceph_kvmalloc() may be called
with GFP_NOFS and GFP_NOIO (for ceph and rbd respectively), so this may
result in a deadlock.
There is no real reason for the current PAGE_ALLOC_COSTLY_ORDER logic,
it's just something that seemed sensible at the time (ceph_kvmalloc()
predates kvmalloc()). kvmalloc() is smarter: in an attempt to reduce
long term fragmentation, it first tries to kmalloc non-disruptively.
Switch to kvmalloc() and set the respective PF_MEMALLOC_* flag using
the scope API to avoid the deadlock. Note that kvmalloc() needs to be
passed GFP_KERNEL to enable the fallback.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
-rw-r--r-- | net/ceph/ceph_common.c | 29 |
1 files changed, 23 insertions, 6 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index b412a3ccc4fc..2d568246803f 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/nsproxy.h> | 13 | #include <linux/nsproxy.h> |
14 | #include <linux/parser.h> | 14 | #include <linux/parser.h> |
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/sched/mm.h> | ||
16 | #include <linux/seq_file.h> | 17 | #include <linux/seq_file.h> |
17 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
18 | #include <linux/statfs.h> | 19 | #include <linux/statfs.h> |
@@ -185,18 +186,34 @@ int ceph_compare_options(struct ceph_options *new_opt, | |||
185 | } | 186 | } |
186 | EXPORT_SYMBOL(ceph_compare_options); | 187 | EXPORT_SYMBOL(ceph_compare_options); |
187 | 188 | ||
189 | /* | ||
190 | * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are | ||
191 | * compatible with (a superset of) GFP_KERNEL. This is because while the | ||
192 | * actual pages are allocated with the specified flags, the page table pages | ||
193 | * are always allocated with GFP_KERNEL. map_vm_area() doesn't even take | ||
194 | * flags because GFP_KERNEL is hard-coded in {p4d,pud,pmd,pte}_alloc(). | ||
195 | * | ||
196 | * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO. | ||
197 | */ | ||
188 | void *ceph_kvmalloc(size_t size, gfp_t flags) | 198 | void *ceph_kvmalloc(size_t size, gfp_t flags) |
189 | { | 199 | { |
190 | if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { | 200 | void *p; |
191 | void *ptr = kmalloc(size, flags | __GFP_NOWARN); | 201 | |
192 | if (ptr) | 202 | if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) { |
193 | return ptr; | 203 | p = kvmalloc(size, flags); |
204 | } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) { | ||
205 | unsigned int nofs_flag = memalloc_nofs_save(); | ||
206 | p = kvmalloc(size, GFP_KERNEL); | ||
207 | memalloc_nofs_restore(nofs_flag); | ||
208 | } else { | ||
209 | unsigned int noio_flag = memalloc_noio_save(); | ||
210 | p = kvmalloc(size, GFP_KERNEL); | ||
211 | memalloc_noio_restore(noio_flag); | ||
194 | } | 212 | } |
195 | 213 | ||
196 | return __vmalloc(size, flags, PAGE_KERNEL); | 214 | return p; |
197 | } | 215 | } |
198 | 216 | ||
199 | |||
200 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | 217 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) |
201 | { | 218 | { |
202 | int i = 0; | 219 | int i = 0; |