mm: deduplicate memory overcommitment code

Currently we have two copies of the same code which implements memory overcommitment logic. Let's move it into mm/util.c and hence avoid duplication. No functional changes here. Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Andrey Ryabinin <aryabinin@virtuozzo.com> 2016-03-17 17:18:50 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-03-17 18:09:34 -0400
commit: 39a1aa8e194ab67983de3b9d0b204ccee12e689a (patch)
tree: 66e70a955436cd15a170b848fdba9563bf30cb37 /mm/util.c
parent: ea606cf5d8df370e7932460dfd960b21f20e7c6d (diff)
1 files changed, 124 insertions, 0 deletions
diff --git a/mm/util.c b/mm/util.c
index 4fb14ca5a419..47a57e557614 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -396,6 +396,13 @@ int __page_mapcount(struct page *page)
 }
 EXPORT_SYMBOL_GPL(__page_mapcount);
+int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
+int sysctl_overcommit_ratio __read_mostly = 50;
+unsigned long sysctl_overcommit_kbytes __read_mostly;
+int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
+unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
+unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
 int overcommit_ratio_handler(struct ctl_table *table, int write,
                             void __user *buffer, size_t *lenp,
                             loff_t *ppos)
@@ -437,6 +444,123 @@ unsigned long vm_commit_limit(void)
        return allowed;
 }
+/*
+ * Make sure vm_committed_as in one cacheline and not cacheline shared with
+ * other variables. It can be updated by several CPUs frequently.
+ */
+struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
+/*
+ * The global memory commitment made in the system can be a metric
+ * that can be used to drive ballooning decisions when Linux is hosted
+ * as a guest. On Hyper-V, the host implements a policy engine for dynamically
+ * balancing memory across competing virtual machines that are hosted.
+ * Several metrics drive this policy engine including the guest reported
+ * memory commitment.
+ */
+unsigned long vm_memory_committed(void)
+{
+        return percpu_counter_read_positive(&vm_committed_as);
+}
+EXPORT_SYMBOL_GPL(vm_memory_committed);
+/*
+ * Check that a process has enough memory to allocate a new virtual
+ * mapping. 0 means there is enough memory for the allocation to
+ * succeed and -ENOMEM implies there is not.
+ *
+ * We currently support three overcommit policies, which are set via the
+ * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting
+ *
+ * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
+ * Additional code 2002 Jul 20 by Robert Love.
+ *
+ * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
+ *
+ * Note this is a helper function intended to be used by LSMs which
+ * wish to use this logic.
+ */
+int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
+{
+        long free, allowed, reserve;
+        VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
+                        -(s64)vm_committed_as_batch * num_online_cpus(),
+                        "memory commitment underflow");
+        vm_acct_memory(pages);
+        /*
+         * Sometimes we want to use more memory than we have
+         */
+        if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
+                return 0;
+        if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
+                free = global_page_state(NR_FREE_PAGES);
+                free += global_page_state(NR_FILE_PAGES);
+                /*
+                 * shmem pages shouldn't be counted as free in this
+                 * case, they can't be purged, only swapped out, and
+                 * that won't affect the overall amount of available
+                 * memory in the system.
+                 */
+                free -= global_page_state(NR_SHMEM);
+                free += get_nr_swap_pages();
+                /*
+                 * Any slabs which are created with the
+                 * SLAB_RECLAIM_ACCOUNT flag claim to have contents
+                 * which are reclaimable, under pressure.  The dentry
+                 * cache and most inode caches should fall into this
+                 */
+                free += global_page_state(NR_SLAB_RECLAIMABLE);
+                /*
+                 * Leave reserved pages. The pages are not for anonymous pages.
+                 */
+                if (free <= totalreserve_pages)
+                        goto error;
+                else
+                        free -= totalreserve_pages;
+                /*
+                 * Reserve some for root
+                 */
+                if (!cap_sys_admin)
+                        free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
+                if (free > pages)
+                        return 0;
+                goto error;
+        }
+        allowed = vm_commit_limit();
+        /*
+         * Reserve some for root
+         */
+        if (!cap_sys_admin)
+                allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
+        /*
+         * Don't let a single process grow so big a user can't recover
+         */
+        if (mm) {
+                reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
+                allowed -= min_t(long, mm->total_vm / 32, reserve);
+        }
+        if (percpu_counter_read_positive(&vm_committed_as) < allowed)
+                return 0;
+error:
+        vm_unacct_memory(pages);
+        return -ENOMEM;
+}
 /**
 * get_cmdline() - copy the cmdline value to a buffer.
 * @task:     the task whose cmdline value to copy.
author	Andrey Ryabinin <aryabinin@virtuozzo.com>	2016-03-17 17:18:50 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-03-17 18:09:34 -0400
commit	39a1aa8e194ab67983de3b9d0b204ccee12e689a (patch)
tree	66e70a955436cd15a170b848fdba9563bf30cb37 /mm/util.c
parent	ea606cf5d8df370e7932460dfd960b21f20e7c6d (diff)

diff --git a/mm/util.c b/mm/util.c index 4fb14ca5a419..47a57e557614 100644 --- a/mm/util.c +++ b/mm/util.c
@@ -396,6 +396,13 @@ int __page_mapcount(struct page *page)
396	}	396	}
397	EXPORT_SYMBOL_GPL(__page_mapcount);	397	EXPORT_SYMBOL_GPL(__page_mapcount);
398		398
		399	int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
		400	int sysctl_overcommit_ratio __read_mostly = 50;
		401	unsigned long sysctl_overcommit_kbytes __read_mostly;
		402	int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
		403	unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
		404	unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
		405
399	int overcommit_ratio_handler(struct ctl_table *table, int write,	406	int overcommit_ratio_handler(struct ctl_table *table, int write,
400	void __user buffer, size_t lenp,	407	void __user buffer, size_t lenp,
401	loff_t *ppos)	408	loff_t *ppos)
@@ -437,6 +444,123 @@ unsigned long vm_commit_limit(void)
437	return allowed;	444	return allowed;
438	}	445	}
439		446
		447	/*
		448	* Make sure vm_committed_as in one cacheline and not cacheline shared with
		449	* other variables. It can be updated by several CPUs frequently.
		450	*/
		451	struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
		452
		453	/*
		454	* The global memory commitment made in the system can be a metric
		455	* that can be used to drive ballooning decisions when Linux is hosted
		456	* as a guest. On Hyper-V, the host implements a policy engine for dynamically
		457	* balancing memory across competing virtual machines that are hosted.
		458	* Several metrics drive this policy engine including the guest reported
		459	* memory commitment.
		460	*/
		461	unsigned long vm_memory_committed(void)
		462	{
		463	return percpu_counter_read_positive(&vm_committed_as);
		464	}
		465	EXPORT_SYMBOL_GPL(vm_memory_committed);
		466
		467	/*
		468	* Check that a process has enough memory to allocate a new virtual
		469	* mapping. 0 means there is enough memory for the allocation to
		470	* succeed and -ENOMEM implies there is not.
		471	*
		472	* We currently support three overcommit policies, which are set via the
		473	* vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
		474	*
		475	* Strict overcommit modes added 2002 Feb 26 by Alan Cox.
		476	* Additional code 2002 Jul 20 by Robert Love.
		477	*
		478	* cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
		479	*
		480	* Note this is a helper function intended to be used by LSMs which
		481	* wish to use this logic.
		482	*/
		483	int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
		484	{
		485	long free, allowed, reserve;
		486
		487	VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
		488	-(s64)vm_committed_as_batch * num_online_cpus(),
		489	"memory commitment underflow");
		490
		491	vm_acct_memory(pages);
		492
		493	/*
		494	* Sometimes we want to use more memory than we have
		495	*/
		496	if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
		497	return 0;
		498
		499	if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
		500	free = global_page_state(NR_FREE_PAGES);
		501	free += global_page_state(NR_FILE_PAGES);
		502
		503	/*
		504	* shmem pages shouldn't be counted as free in this
		505	* case, they can't be purged, only swapped out, and
		506	* that won't affect the overall amount of available
		507	* memory in the system.
		508	*/
		509	free -= global_page_state(NR_SHMEM);
		510
		511	free += get_nr_swap_pages();
		512
		513	/*
		514	* Any slabs which are created with the
		515	* SLAB_RECLAIM_ACCOUNT flag claim to have contents
		516	* which are reclaimable, under pressure. The dentry
		517	* cache and most inode caches should fall into this
		518	*/
		519	free += global_page_state(NR_SLAB_RECLAIMABLE);
		520
		521	/*
		522	* Leave reserved pages. The pages are not for anonymous pages.
		523	*/
		524	if (free <= totalreserve_pages)
		525	goto error;
		526	else
		527	free -= totalreserve_pages;
		528
		529	/*
		530	* Reserve some for root
		531	*/
		532	if (!cap_sys_admin)
		533	free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
		534
		535	if (free > pages)
		536	return 0;
		537
		538	goto error;
		539	}
		540
		541	allowed = vm_commit_limit();
		542	/*
		543	* Reserve some for root
		544	*/
		545	if (!cap_sys_admin)
		546	allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
		547
		548	/*
		549	* Don't let a single process grow so big a user can't recover
		550	*/
		551	if (mm) {
		552	reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
		553	allowed -= min_t(long, mm->total_vm / 32, reserve);
		554	}
		555
		556	if (percpu_counter_read_positive(&vm_committed_as) < allowed)
		557	return 0;
		558	error:
		559	vm_unacct_memory(pages);
		560
		561	return -ENOMEM;
		562	}
		563
440	/**	564	/**
441	* get_cmdline() - copy the cmdline value to a buffer.	565	* get_cmdline() - copy the cmdline value to a buffer.
442	* @task: the task whose cmdline value to copy.	566	* @task: the task whose cmdline value to copy.