diff options
-rw-r--r-- | Documentation/sysctl/vm.txt | 20 | ||||
-rw-r--r-- | Documentation/vm/overcommit-accounting | 8 | ||||
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 7 | ||||
-rw-r--r-- | mm/mmap.c | 35 | ||||
-rw-r--r-- | mm/nommu.c | 35 |
6 files changed, 96 insertions, 11 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 078701fdbd4d..f69895738357 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -53,6 +53,7 @@ Currently, these files are in /proc/sys/vm: | |||
53 | - percpu_pagelist_fraction | 53 | - percpu_pagelist_fraction |
54 | - stat_interval | 54 | - stat_interval |
55 | - swappiness | 55 | - swappiness |
56 | - user_reserve_kbytes | ||
56 | - vfs_cache_pressure | 57 | - vfs_cache_pressure |
57 | - zone_reclaim_mode | 58 | - zone_reclaim_mode |
58 | 59 | ||
@@ -542,6 +543,7 @@ memory until it actually runs out. | |||
542 | 543 | ||
543 | When this flag is 2, the kernel uses a "never overcommit" | 544 | When this flag is 2, the kernel uses a "never overcommit" |
544 | policy that attempts to prevent any overcommit of memory. | 545 | policy that attempts to prevent any overcommit of memory. |
546 | Note that user_reserve_kbytes affects this policy. | ||
545 | 547 | ||
546 | This feature can be very useful because there are a lot of | 548 | This feature can be very useful because there are a lot of |
547 | programs that malloc() huge amounts of memory "just-in-case" | 549 | programs that malloc() huge amounts of memory "just-in-case" |
@@ -645,6 +647,24 @@ The default value is 60. | |||
645 | 647 | ||
646 | ============================================================== | 648 | ============================================================== |
647 | 649 | ||
650 | - user_reserve_kbytes | ||
651 | |||
652 | When overcommit_memory is set to 2, "never overommit" mode, reserve | ||
653 | min(3% of current process size, user_reserve_kbytes) of free memory. | ||
654 | This is intended to prevent a user from starting a single memory hogging | ||
655 | process, such that they cannot recover (kill the hog). | ||
656 | |||
657 | user_reserve_kbytes defaults to min(3% of the current process size, 128MB). | ||
658 | |||
659 | If this is reduced to zero, then the user will be allowed to allocate | ||
660 | all free memory with a single process, minus admin_reserve_kbytes. | ||
661 | Any subsequent attempts to execute a command will result in | ||
662 | "fork: Cannot allocate memory". | ||
663 | |||
664 | Changing this takes effect whenever an application requests memory. | ||
665 | |||
666 | ============================================================== | ||
667 | |||
648 | vfs_cache_pressure | 668 | vfs_cache_pressure |
649 | ------------------ | 669 | ------------------ |
650 | 670 | ||
diff --git a/Documentation/vm/overcommit-accounting b/Documentation/vm/overcommit-accounting index 706d7ed9d8d2..8eaa2fc4b8fa 100644 --- a/Documentation/vm/overcommit-accounting +++ b/Documentation/vm/overcommit-accounting | |||
@@ -8,7 +8,9 @@ The Linux kernel supports the following overcommit handling modes | |||
8 | default. | 8 | default. |
9 | 9 | ||
10 | 1 - Always overcommit. Appropriate for some scientific | 10 | 1 - Always overcommit. Appropriate for some scientific |
11 | applications. | 11 | applications. Classic example is code using sparse arrays |
12 | and just relying on the virtual memory consisting almost | ||
13 | entirely of zero pages. | ||
12 | 14 | ||
13 | 2 - Don't overcommit. The total address space commit | 15 | 2 - Don't overcommit. The total address space commit |
14 | for the system is not permitted to exceed swap + a | 16 | for the system is not permitted to exceed swap + a |
@@ -18,6 +20,10 @@ The Linux kernel supports the following overcommit handling modes | |||
18 | pages but will receive errors on memory allocation as | 20 | pages but will receive errors on memory allocation as |
19 | appropriate. | 21 | appropriate. |
20 | 22 | ||
23 | Useful for applications that want to guarantee their | ||
24 | memory allocations will be available in the future | ||
25 | without having to initialize every page. | ||
26 | |||
21 | The overcommit policy is set via the sysctl `vm.overcommit_memory'. | 27 | The overcommit policy is set via the sysctl `vm.overcommit_memory'. |
22 | 28 | ||
23 | The overcommit percentage is set via `vm.overcommit_ratio'. | 29 | The overcommit percentage is set via `vm.overcommit_ratio'. |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 7aa11a6736eb..43cfaabbde40 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -44,6 +44,8 @@ extern int sysctl_legacy_va_layout; | |||
44 | #include <asm/pgtable.h> | 44 | #include <asm/pgtable.h> |
45 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
46 | 46 | ||
47 | extern unsigned long sysctl_user_reserve_kbytes; | ||
48 | |||
47 | #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) | 49 | #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) |
48 | 50 | ||
49 | /* to align the pointer to the (next) page boundary */ | 51 | /* to align the pointer to the (next) page boundary */ |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3dadde52253c..6daabb72bdb5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1429,6 +1429,13 @@ static struct ctl_table vm_table[] = { | |||
1429 | .extra2 = &one, | 1429 | .extra2 = &one, |
1430 | }, | 1430 | }, |
1431 | #endif | 1431 | #endif |
1432 | { | ||
1433 | .procname = "user_reserve_kbytes", | ||
1434 | .data = &sysctl_user_reserve_kbytes, | ||
1435 | .maxlen = sizeof(sysctl_user_reserve_kbytes), | ||
1436 | .mode = 0644, | ||
1437 | .proc_handler = proc_doulongvec_minmax, | ||
1438 | }, | ||
1432 | { } | 1439 | { } |
1433 | }; | 1440 | }; |
1434 | 1441 | ||
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(vm_get_page_prot); | |||
84 | int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 84 | int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
85 | int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ | 85 | int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ |
86 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; | 86 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; |
87 | unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ | ||
87 | /* | 88 | /* |
88 | * Make sure vm_committed_as in one cacheline and not cacheline shared with | 89 | * Make sure vm_committed_as in one cacheline and not cacheline shared with |
89 | * other variables. It can be updated by several CPUs frequently. | 90 | * other variables. It can be updated by several CPUs frequently. |
@@ -122,7 +123,7 @@ EXPORT_SYMBOL_GPL(vm_memory_committed); | |||
122 | */ | 123 | */ |
123 | int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | 124 | int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) |
124 | { | 125 | { |
125 | unsigned long free, allowed; | 126 | unsigned long free, allowed, reserve; |
126 | 127 | ||
127 | vm_acct_memory(pages); | 128 | vm_acct_memory(pages); |
128 | 129 | ||
@@ -183,10 +184,13 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
183 | allowed -= allowed / 32; | 184 | allowed -= allowed / 32; |
184 | allowed += total_swap_pages; | 185 | allowed += total_swap_pages; |
185 | 186 | ||
186 | /* Don't let a single process grow too big: | 187 | /* |
187 | leave 3% of the size of this process for other processes */ | 188 | * Don't let a single process grow so big a user can't recover |
188 | if (mm) | 189 | */ |
189 | allowed -= mm->total_vm / 32; | 190 | if (mm) { |
191 | reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); | ||
192 | allowed -= min(mm->total_vm / 32, reserve); | ||
193 | } | ||
190 | 194 | ||
191 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) | 195 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) |
192 | return 0; | 196 | return 0; |
@@ -3094,3 +3098,24 @@ void __init mmap_init(void) | |||
3094 | ret = percpu_counter_init(&vm_committed_as, 0); | 3098 | ret = percpu_counter_init(&vm_committed_as, 0); |
3095 | VM_BUG_ON(ret); | 3099 | VM_BUG_ON(ret); |
3096 | } | 3100 | } |
3101 | |||
3102 | /* | ||
3103 | * Initialise sysctl_user_reserve_kbytes. | ||
3104 | * | ||
3105 | * This is intended to prevent a user from starting a single memory hogging | ||
3106 | * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER | ||
3107 | * mode. | ||
3108 | * | ||
3109 | * The default value is min(3% of free memory, 128MB) | ||
3110 | * 128MB is enough to recover with sshd/login, bash, and top/kill. | ||
3111 | */ | ||
3112 | static int __meminit init_user_reserve(void) | ||
3113 | { | ||
3114 | unsigned long free_kbytes; | ||
3115 | |||
3116 | free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); | ||
3117 | |||
3118 | sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); | ||
3119 | return 0; | ||
3120 | } | ||
3121 | module_init(init_user_reserve) | ||
diff --git a/mm/nommu.c b/mm/nommu.c index 2f1c75ed468e..58e4a0a5125f 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -63,6 +63,7 @@ int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | |||
63 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 63 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
64 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; | 64 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; |
65 | int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; | 65 | int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; |
66 | unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ | ||
66 | int heap_stack_gap = 0; | 67 | int heap_stack_gap = 0; |
67 | 68 | ||
68 | atomic_long_t mmap_pages_allocated; | 69 | atomic_long_t mmap_pages_allocated; |
@@ -1897,7 +1898,7 @@ EXPORT_SYMBOL(unmap_mapping_range); | |||
1897 | */ | 1898 | */ |
1898 | int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | 1899 | int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) |
1899 | { | 1900 | { |
1900 | unsigned long free, allowed; | 1901 | unsigned long free, allowed, reserve; |
1901 | 1902 | ||
1902 | vm_acct_memory(pages); | 1903 | vm_acct_memory(pages); |
1903 | 1904 | ||
@@ -1957,10 +1958,13 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
1957 | allowed -= allowed / 32; | 1958 | allowed -= allowed / 32; |
1958 | allowed += total_swap_pages; | 1959 | allowed += total_swap_pages; |
1959 | 1960 | ||
1960 | /* Don't let a single process grow too big: | 1961 | /* |
1961 | leave 3% of the size of this process for other processes */ | 1962 | * Don't let a single process grow so big a user can't recover |
1962 | if (mm) | 1963 | */ |
1963 | allowed -= mm->total_vm / 32; | 1964 | if (mm) { |
1965 | reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); | ||
1966 | allowed -= min(mm->total_vm / 32, reserve); | ||
1967 | } | ||
1964 | 1968 | ||
1965 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) | 1969 | if (percpu_counter_read_positive(&vm_committed_as) < allowed) |
1966 | return 0; | 1970 | return 0; |
@@ -2122,3 +2126,24 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, | |||
2122 | up_write(&nommu_region_sem); | 2126 | up_write(&nommu_region_sem); |
2123 | return 0; | 2127 | return 0; |
2124 | } | 2128 | } |
2129 | |||
2130 | /* | ||
2131 | * Initialise sysctl_user_reserve_kbytes. | ||
2132 | * | ||
2133 | * This is intended to prevent a user from starting a single memory hogging | ||
2134 | * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER | ||
2135 | * mode. | ||
2136 | * | ||
2137 | * The default value is min(3% of free memory, 128MB) | ||
2138 | * 128MB is enough to recover with sshd/login, bash, and top/kill. | ||
2139 | */ | ||
2140 | static int __meminit init_user_reserve(void) | ||
2141 | { | ||
2142 | unsigned long free_kbytes; | ||
2143 | |||
2144 | free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); | ||
2145 | |||
2146 | sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); | ||
2147 | return 0; | ||
2148 | } | ||
2149 | module_init(init_user_reserve) | ||