diff options
author | Andrew Shewmaker <agshew@gmail.com> | 2013-04-29 18:08:11 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-29 18:54:36 -0400 |
commit | 4eeab4f5580d11bffedc697684b91b0bca0d5009 (patch) | |
tree | 4e9a0c010d34e786df52225039a17aa38e9adf17 /mm | |
parent | c9b1d0981fcce3d9976d7b7a56e4e0503bc610dd (diff) |
mm: replace hardcoded 3% with admin_reserve_pages knob
Add an admin_reserve_kbytes knob to allow admins to change the hardcoded
memory reserve to something other than 3%, which may be multiple
gigabytes on large memory systems. Only about 8MB is necessary to
enable recovery in the default mode, and only a few hundred MB are
required even when overcommit is disabled.
This affects OVERCOMMIT_GUESS and OVERCOMMIT_NEVER.
admin_reserve_kbytes is initialized to min(3% free pages, 8MB)
I arrived at 8MB by summing the RSS of sshd or login, bash, and top.
Please see first patch in this series for full background, motivation,
testing, and full changelog.
[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: make init_admin_reserve() static]
Signed-off-by: Andrew Shewmaker <agshew@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/mmap.c | 30 | ||||
-rw-r--r-- | mm/nommu.c | 30 |
2 files changed, 52 insertions, 8 deletions
@@ -85,6 +85,7 @@ int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic ove | |||
85 | int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ | 85 | int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ |
86 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; | 86 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; |
87 | unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ | 87 | unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ |
88 | unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ | ||
88 | /* | 89 | /* |
89 | * Make sure vm_committed_as in one cacheline and not cacheline shared with | 90 | * Make sure vm_committed_as in one cacheline and not cacheline shared with |
90 | * other variables. It can be updated by several CPUs frequently. | 91 | * other variables. It can be updated by several CPUs frequently. |
@@ -164,10 +165,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
164 | free -= totalreserve_pages; | 165 | free -= totalreserve_pages; |
165 | 166 | ||
166 | /* | 167 | /* |
167 | * Leave the last 3% for root | 168 | * Reserve some for root |
168 | */ | 169 | */ |
169 | if (!cap_sys_admin) | 170 | if (!cap_sys_admin) |
170 | free -= free / 32; | 171 | free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); |
171 | 172 | ||
172 | if (free > pages) | 173 | if (free > pages) |
173 | return 0; | 174 | return 0; |
@@ -178,10 +179,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
178 | allowed = (totalram_pages - hugetlb_total_pages()) | 179 | allowed = (totalram_pages - hugetlb_total_pages()) |
179 | * sysctl_overcommit_ratio / 100; | 180 | * sysctl_overcommit_ratio / 100; |
180 | /* | 181 | /* |
181 | * Leave the last 3% for root | 182 | * Reserve some for root |
182 | */ | 183 | */ |
183 | if (!cap_sys_admin) | 184 | if (!cap_sys_admin) |
184 | allowed -= allowed / 32; | 185 | allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); |
185 | allowed += total_swap_pages; | 186 | allowed += total_swap_pages; |
186 | 187 | ||
187 | /* | 188 | /* |
@@ -3119,3 +3120,24 @@ static int __meminit init_user_reserve(void) | |||
3119 | return 0; | 3120 | return 0; |
3120 | } | 3121 | } |
3121 | module_init(init_user_reserve) | 3122 | module_init(init_user_reserve) |
3123 | |||
3124 | /* | ||
3125 | * Initialise sysctl_admin_reserve_kbytes. | ||
3126 | * | ||
3127 | * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin | ||
3128 | * to log in and kill a memory hogging process. | ||
3129 | * | ||
3130 | * Systems with more than 256MB will reserve 8MB, enough to recover | ||
3131 | * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will | ||
3132 | * only reserve 3% of free pages by default. | ||
3133 | */ | ||
3134 | static int __meminit init_admin_reserve(void) | ||
3135 | { | ||
3136 | unsigned long free_kbytes; | ||
3137 | |||
3138 | free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); | ||
3139 | |||
3140 | sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); | ||
3141 | return 0; | ||
3142 | } | ||
3143 | module_init(init_admin_reserve) | ||
diff --git a/mm/nommu.c b/mm/nommu.c index 58e4a0a5125f..fbe3e2f317eb 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -64,6 +64,7 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */ | |||
64 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; | 64 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; |
65 | int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; | 65 | int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; |
66 | unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ | 66 | unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ |
67 | unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ | ||
67 | int heap_stack_gap = 0; | 68 | int heap_stack_gap = 0; |
68 | 69 | ||
69 | atomic_long_t mmap_pages_allocated; | 70 | atomic_long_t mmap_pages_allocated; |
@@ -1939,10 +1940,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
1939 | free -= totalreserve_pages; | 1940 | free -= totalreserve_pages; |
1940 | 1941 | ||
1941 | /* | 1942 | /* |
1942 | * Leave the last 3% for root | 1943 | * Reserve some for root |
1943 | */ | 1944 | */ |
1944 | if (!cap_sys_admin) | 1945 | if (!cap_sys_admin) |
1945 | free -= free / 32; | 1946 | free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); |
1946 | 1947 | ||
1947 | if (free > pages) | 1948 | if (free > pages) |
1948 | return 0; | 1949 | return 0; |
@@ -1952,10 +1953,10 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
1952 | 1953 | ||
1953 | allowed = totalram_pages * sysctl_overcommit_ratio / 100; | 1954 | allowed = totalram_pages * sysctl_overcommit_ratio / 100; |
1954 | /* | 1955 | /* |
1955 | * Leave the last 3% for root | 1956 | * Reserve some 3% for root |
1956 | */ | 1957 | */ |
1957 | if (!cap_sys_admin) | 1958 | if (!cap_sys_admin) |
1958 | allowed -= allowed / 32; | 1959 | allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); |
1959 | allowed += total_swap_pages; | 1960 | allowed += total_swap_pages; |
1960 | 1961 | ||
1961 | /* | 1962 | /* |
@@ -2147,3 +2148,24 @@ static int __meminit init_user_reserve(void) | |||
2147 | return 0; | 2148 | return 0; |
2148 | } | 2149 | } |
2149 | module_init(init_user_reserve) | 2150 | module_init(init_user_reserve) |
2151 | |||
2152 | /* | ||
2153 | * Initialise sysctl_admin_reserve_kbytes. | ||
2154 | * | ||
2155 | * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin | ||
2156 | * to log in and kill a memory hogging process. | ||
2157 | * | ||
2158 | * Systems with more than 256MB will reserve 8MB, enough to recover | ||
2159 | * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will | ||
2160 | * only reserve 3% of free pages by default. | ||
2161 | */ | ||
2162 | static int __meminit init_admin_reserve(void) | ||
2163 | { | ||
2164 | unsigned long free_kbytes; | ||
2165 | |||
2166 | free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); | ||
2167 | |||
2168 | sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); | ||
2169 | return 0; | ||
2170 | } | ||
2171 | module_init(init_admin_reserve) | ||