mm: introduce kv[mz]alloc helpers

Patch series "kvmalloc", v5. There are many open coded kmalloc with vmalloc fallback instances in the tree. Most of them are not careful enough or simply do not care about the underlying semantic of the kmalloc/page allocator which means that a) some vmalloc fallbacks are basically unreachable because the kmalloc part will keep retrying until it succeeds b) the page allocator can invoke a really disruptive steps like the OOM killer to move forward which doesn't sound appropriate when we consider that the vmalloc fallback is available. As it can be seen implementing kvmalloc requires quite an intimate knowledge if the page allocator and the memory reclaim internals which strongly suggests that a helper should be implemented in the memory subsystem proper. Most callers, I could find, have been converted to use the helper instead. This is patch 6. There are some more relying on __GFP_REPEAT in the networking stack which I have converted as well and Eric Dumazet was not opposed [2] to convert them as well. [1] http://lkml.kernel.org/r/20170130094940.13546-1-mhocko@kernel.org [2] http://lkml.kernel.org/r/1485273626.16328.301.camel@edumazet-glaptop3.roam.corp.google.com This patch (of 9): Using kmalloc with the vmalloc fallback for larger allocations is a common pattern in the kernel code. Yet we do not have any common helper for that and so users have invented their own helpers. Some of them are really creative when doing so. Let's just add kv[mz]alloc and make sure it is implemented properly. This implementation makes sure to not make a large memory pressure for > PAGE_SZE requests (__GFP_NORETRY) and also to not warn about allocation failures. This also rules out the OOM killer as the vmalloc is a more approapriate fallback than a disruptive user visible action. This patch also changes some existing users and removes helpers which are specific for them. In some cases this is not possible (e.g. ext4_kvmalloc, libcfs_kvzalloc) because those seems to be broken and require GFP_NO{FS,IO} context which is not vmalloc compatible in general (note that the page table allocation is GFP_KERNEL). Those need to be fixed separately. While we are at it, document that __vmalloc{_node} about unsupported gfp mask because there seems to be a lot of confusion out there. kvmalloc_node will warn about GFP_KERNEL incompatible (which are not superset) flags to catch new abusers. Existing ones would have to die slowly. [sfr@canb.auug.org.au: f2fs fixup] Link: http://lkml.kernel.org/r/20170320163735.332e64b7@canb.auug.org.au Link: http://lkml.kernel.org/r/20170306103032.2540-2-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Reviewed-by: Andreas Dilger <adilger@dilger.ca> [ext4 part] Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: John Hubbard <jhubbard@nvidia.com> Cc: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Michal Hocko <mhocko@suse.com> 2017-05-08 18:57:09 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-05-08 20:15:12 -0400
commit: a7c3e901a46ff54c016d040847eda598a9e3e653 (patch)
tree: d149d70d420ff19586daa827db47a2e26a5598fe /mm
parent: 60f3e00d25b44e3aa51846590d1e10f408466a83 (diff)
3 files changed, 58 insertions, 1 deletions
diff --git a/mm/nommu.c b/mm/nommu.c
index 2d131b97a851..a80411d258fc 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -237,6 +237,11 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 }
 EXPORT_SYMBOL(__vmalloc);
+void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags)
+{
+        return __vmalloc(size, flags, PAGE_KERNEL);
+}
 void *vmalloc_user(unsigned long size)
 {
        void *ret;
diff --git a/mm/util.c b/mm/util.c
index 656dc5e37a87..10a14a0ac3c2 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -329,6 +329,51 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_mmap);
+/**
+ * kvmalloc_node - attempt to allocate physically contiguous memory, but upon
+ * failure, fall back to non-contiguous (vmalloc) allocation.
+ * @size: size of the request.
+ * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
+ * @node: numa node to allocate from
+ *
+ * Uses kmalloc to get the memory but if the allocation fails then falls back
+ * to the vmalloc allocator. Use kvfree for freeing the memory.
+ *
+ * Reclaim modifiers - __GFP_NORETRY, __GFP_REPEAT and __GFP_NOFAIL are not supported
+ *
+ * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people.
+ */
+void *kvmalloc_node(size_t size, gfp_t flags, int node)
+{
+        gfp_t kmalloc_flags = flags;
+        void *ret;
+        /*
+         * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
+         * so the given set of flags has to be compatible.
+         */
+        WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
+        /*
+         * Make sure that larger requests are not too disruptive - no OOM
+         * killer and no allocation failure warnings as we have a fallback
+         */
+        if (size > PAGE_SIZE)
+                kmalloc_flags |= __GFP_NORETRY | __GFP_NOWARN;
+        ret = kmalloc_node(size, kmalloc_flags, node);
+        /*
+         * It doesn't really make sense to fallback to vmalloc for sub page
+         * requests
+         */
+        if (ret || size <= PAGE_SIZE)
+                return ret;
+        return __vmalloc_node_flags(size, node, flags | __GFP_HIGHMEM);
+}
+EXPORT_SYMBOL(kvmalloc_node);
 void kvfree(const void *addr)
 {
        if (is_vmalloc_addr(addr))
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b52aeed3f58e..33603239560e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1786,6 +1786,13 @@ fail:
 *      Allocate enough pages to cover @size from the page level
 *      allocator with @gfp_mask flags.  Map them into contiguous
 *      kernel virtual space, using a pagetable protection of @prot.
+ *
+ *      Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT
+ *      and __GFP_NOFAIL are not supported
+ *
+ *      Any use of gfp flags outside of GFP_KERNEL should be consulted
+ *      with mm people.
+ *
 */
 static void *__vmalloc_node(unsigned long size, unsigned long align,
                            gfp_t gfp_mask, pgprot_t prot,
@@ -1802,7 +1809,7 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 }
 EXPORT_SYMBOL(__vmalloc);
-static inline void *__vmalloc_node_flags(unsigned long size,
+void *__vmalloc_node_flags(unsigned long size,
                                        int node, gfp_t flags)
 {
        return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
author	Michal Hocko <mhocko@suse.com>	2017-05-08 18:57:09 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-05-08 20:15:12 -0400
commit	a7c3e901a46ff54c016d040847eda598a9e3e653 (patch)
tree	d149d70d420ff19586daa827db47a2e26a5598fe /mm
parent	60f3e00d25b44e3aa51846590d1e10f408466a83 (diff)

diff --git a/mm/nommu.c b/mm/nommu.c index 2d131b97a851..a80411d258fc 100644 --- a/mm/nommu.c +++ b/mm/nommu.c
@@ -237,6 +237,11 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
237	}	237	}
238	EXPORT_SYMBOL(__vmalloc);	238	EXPORT_SYMBOL(__vmalloc);
239		239
		240	void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags)
		241	{
		242	return __vmalloc(size, flags, PAGE_KERNEL);
		243	}
		244
240	void *vmalloc_user(unsigned long size)	245	void *vmalloc_user(unsigned long size)
241	{	246	{
242	void *ret;	247	void *ret;


diff --git a/mm/util.c b/mm/util.c index 656dc5e37a87..10a14a0ac3c2 100644 --- a/mm/util.c +++ b/mm/util.c
@@ -329,6 +329,51 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
329	}	329	}
330	EXPORT_SYMBOL(vm_mmap);	330	EXPORT_SYMBOL(vm_mmap);
331		331
		332	/**
		333	* kvmalloc_node - attempt to allocate physically contiguous memory, but upon
		334	* failure, fall back to non-contiguous (vmalloc) allocation.
		335	* @size: size of the request.
		336	* @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
		337	* @node: numa node to allocate from
		338	*
		339	* Uses kmalloc to get the memory but if the allocation fails then falls back
		340	* to the vmalloc allocator. Use kvfree for freeing the memory.
		341	*
		342	* Reclaim modifiers - __GFP_NORETRY, __GFP_REPEAT and __GFP_NOFAIL are not supported
		343	*
		344	* Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people.
		345	*/
		346	void *kvmalloc_node(size_t size, gfp_t flags, int node)
		347	{
		348	gfp_t kmalloc_flags = flags;
		349	void *ret;
		350
		351	/*
		352	* vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
		353	* so the given set of flags has to be compatible.
		354	*/
		355	WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
		356
		357	/*
		358	* Make sure that larger requests are not too disruptive - no OOM
		359	* killer and no allocation failure warnings as we have a fallback
		360	*/
		361	if (size > PAGE_SIZE)
		362	kmalloc_flags \|= __GFP_NORETRY \| __GFP_NOWARN;
		363
		364	ret = kmalloc_node(size, kmalloc_flags, node);
		365
		366	/*
		367	* It doesn't really make sense to fallback to vmalloc for sub page
		368	* requests
		369	*/
		370	if (ret \|\| size <= PAGE_SIZE)
		371	return ret;
		372
		373	return __vmalloc_node_flags(size, node, flags \| __GFP_HIGHMEM);
		374	}
		375	EXPORT_SYMBOL(kvmalloc_node);
		376
332	void kvfree(const void *addr)	377	void kvfree(const void *addr)
333	{	378	{
334	if (is_vmalloc_addr(addr))	379	if (is_vmalloc_addr(addr))


diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b52aeed3f58e..33603239560e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c
@@ -1786,6 +1786,13 @@ fail:
1786	* Allocate enough pages to cover @size from the page level	1786	* Allocate enough pages to cover @size from the page level
1787	* allocator with @gfp_mask flags. Map them into contiguous	1787	* allocator with @gfp_mask flags. Map them into contiguous
1788	* kernel virtual space, using a pagetable protection of @prot.	1788	* kernel virtual space, using a pagetable protection of @prot.
		1789	*
		1790	* Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT
		1791	* and __GFP_NOFAIL are not supported
		1792	*
		1793	* Any use of gfp flags outside of GFP_KERNEL should be consulted
		1794	* with mm people.
		1795	*
1789	*/	1796	*/
1790	static void *__vmalloc_node(unsigned long size, unsigned long align,	1797	static void *__vmalloc_node(unsigned long size, unsigned long align,
1791	gfp_t gfp_mask, pgprot_t prot,	1798	gfp_t gfp_mask, pgprot_t prot,
@@ -1802,7 +1809,7 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1802	}	1809	}
1803	EXPORT_SYMBOL(__vmalloc);	1810	EXPORT_SYMBOL(__vmalloc);
1804		1811
1805	static inline void *__vmalloc_node_flags(unsigned long size,	1812	void *__vmalloc_node_flags(unsigned long size,
1806	int node, gfp_t flags)	1813	int node, gfp_t flags)
1807	{	1814	{
1808	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,	1815	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,