aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/mmap.c3
-rw-r--r--mm/nommu.c7
-rw-r--r--mm/oom_kill.c64
-rw-r--r--mm/page_alloc.c4
-rw-r--r--mm/percpu.c35
-rw-r--r--mm/rmap.c1
-rw-r--r--mm/vmscan.c9
8 files changed, 76 insertions, 53 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index c948d4ca8bde..fe5f674d7a7d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -225,9 +225,9 @@ config DEFAULT_MMAP_MIN_ADDR
225 For most ia64, ppc64 and x86 users with lots of address space 225 For most ia64, ppc64 and x86 users with lots of address space
226 a value of 65536 is reasonable and should cause no problems. 226 a value of 65536 is reasonable and should cause no problems.
227 On arm and other archs it should not be higher than 32768. 227 On arm and other archs it should not be higher than 32768.
228 Programs which use vm86 functionality would either need additional 228 Programs which use vm86 functionality or have some need to map
229 permissions from either the LSM or the capabilities module or have 229 this low address space will need CAP_SYS_RAWIO or disable this
230 this protection disabled. 230 protection by setting the value to 0.
231 231
232 This value can be changed after boot using the 232 This value can be changed after boot using the
233 /proc/sys/vm/mmap_min_addr tunable. 233 /proc/sys/vm/mmap_min_addr tunable.
diff --git a/mm/mmap.c b/mm/mmap.c
index 34579b23ebd5..8101de490c73 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -88,9 +88,6 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */
88int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; 88int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
89struct percpu_counter vm_committed_as; 89struct percpu_counter vm_committed_as;
90 90
91/* amount of vm to protect from userspace access */
92unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
93
94/* 91/*
95 * Check that a process has enough memory to allocate a new virtual 92 * Check that a process has enough memory to allocate a new virtual
96 * mapping. 0 means there is enough memory for the allocation to 93 * mapping. 0 means there is enough memory for the allocation to
diff --git a/mm/nommu.c b/mm/nommu.c
index 53cab10fece4..4bde489ec431 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -69,9 +69,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
69int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 69int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
70int heap_stack_gap = 0; 70int heap_stack_gap = 0;
71 71
72/* amount of vm to protect from userspace access */
73unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
74
75atomic_long_t mmap_pages_allocated; 72atomic_long_t mmap_pages_allocated;
76 73
77EXPORT_SYMBOL(mem_map); 74EXPORT_SYMBOL(mem_map);
@@ -922,6 +919,10 @@ static int validate_mmap_request(struct file *file,
922 if (!file->f_op->read) 919 if (!file->f_op->read)
923 capabilities &= ~BDI_CAP_MAP_COPY; 920 capabilities &= ~BDI_CAP_MAP_COPY;
924 921
922 /* The file shall have been opened with read permission. */
923 if (!(file->f_mode & FMODE_READ))
924 return -EACCES;
925
925 if (flags & MAP_SHARED) { 926 if (flags & MAP_SHARED) {
926 /* do checks for writing, appending and locking */ 927 /* do checks for writing, appending and locking */
927 if ((prot & PROT_WRITE) && 928 if ((prot & PROT_WRITE) &&
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 175a67a78a99..a7b2460e922b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -58,7 +58,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
58 unsigned long points, cpu_time, run_time; 58 unsigned long points, cpu_time, run_time;
59 struct mm_struct *mm; 59 struct mm_struct *mm;
60 struct task_struct *child; 60 struct task_struct *child;
61 int oom_adj;
62 61
63 task_lock(p); 62 task_lock(p);
64 mm = p->mm; 63 mm = p->mm;
@@ -66,11 +65,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
66 task_unlock(p); 65 task_unlock(p);
67 return 0; 66 return 0;
68 } 67 }
69 oom_adj = mm->oom_adj;
70 if (oom_adj == OOM_DISABLE) {
71 task_unlock(p);
72 return 0;
73 }
74 68
75 /* 69 /*
76 * The memory size of the process is the basis for the badness. 70 * The memory size of the process is the basis for the badness.
@@ -154,15 +148,15 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
154 points /= 8; 148 points /= 8;
155 149
156 /* 150 /*
157 * Adjust the score by oom_adj. 151 * Adjust the score by oomkilladj.
158 */ 152 */
159 if (oom_adj) { 153 if (p->oomkilladj) {
160 if (oom_adj > 0) { 154 if (p->oomkilladj > 0) {
161 if (!points) 155 if (!points)
162 points = 1; 156 points = 1;
163 points <<= oom_adj; 157 points <<= p->oomkilladj;
164 } else 158 } else
165 points >>= -(oom_adj); 159 points >>= -(p->oomkilladj);
166 } 160 }
167 161
168#ifdef DEBUG 162#ifdef DEBUG
@@ -257,8 +251,11 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
257 *ppoints = ULONG_MAX; 251 *ppoints = ULONG_MAX;
258 } 252 }
259 253
254 if (p->oomkilladj == OOM_DISABLE)
255 continue;
256
260 points = badness(p, uptime.tv_sec); 257 points = badness(p, uptime.tv_sec);
261 if (points > *ppoints) { 258 if (points > *ppoints || !chosen) {
262 chosen = p; 259 chosen = p;
263 *ppoints = points; 260 *ppoints = points;
264 } 261 }
@@ -307,7 +304,8 @@ static void dump_tasks(const struct mem_cgroup *mem)
307 } 304 }
308 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", 305 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
309 p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm, 306 p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
310 get_mm_rss(mm), (int)task_cpu(p), mm->oom_adj, p->comm); 307 get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
308 p->comm);
311 task_unlock(p); 309 task_unlock(p);
312 } while_each_thread(g, p); 310 } while_each_thread(g, p);
313} 311}
@@ -325,8 +323,11 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
325 return; 323 return;
326 } 324 }
327 325
328 if (!p->mm) 326 if (!p->mm) {
327 WARN_ON(1);
328 printk(KERN_WARNING "tried to kill an mm-less task!\n");
329 return; 329 return;
330 }
330 331
331 if (verbose) 332 if (verbose)
332 printk(KERN_ERR "Killed process %d (%s)\n", 333 printk(KERN_ERR "Killed process %d (%s)\n",
@@ -348,13 +349,28 @@ static int oom_kill_task(struct task_struct *p)
348 struct mm_struct *mm; 349 struct mm_struct *mm;
349 struct task_struct *g, *q; 350 struct task_struct *g, *q;
350 351
351 task_lock(p);
352 mm = p->mm; 352 mm = p->mm;
353 if (!mm || mm->oom_adj == OOM_DISABLE) { 353
354 task_unlock(p); 354 /* WARNING: mm may not be dereferenced since we did not obtain its
355 * value from get_task_mm(p). This is OK since all we need to do is
356 * compare mm to q->mm below.
357 *
358 * Furthermore, even if mm contains a non-NULL value, p->mm may
359 * change to NULL at any time since we do not hold task_lock(p).
360 * However, this is of no concern to us.
361 */
362
363 if (mm == NULL)
355 return 1; 364 return 1;
356 } 365
357 task_unlock(p); 366 /*
367 * Don't kill the process if any threads are set to OOM_DISABLE
368 */
369 do_each_thread(g, q) {
370 if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
371 return 1;
372 } while_each_thread(g, q);
373
358 __oom_kill_task(p, 1); 374 __oom_kill_task(p, 1);
359 375
360 /* 376 /*
@@ -377,11 +393,10 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
377 struct task_struct *c; 393 struct task_struct *c;
378 394
379 if (printk_ratelimit()) { 395 if (printk_ratelimit()) {
380 task_lock(current);
381 printk(KERN_WARNING "%s invoked oom-killer: " 396 printk(KERN_WARNING "%s invoked oom-killer: "
382 "gfp_mask=0x%x, order=%d, oom_adj=%d\n", 397 "gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
383 current->comm, gfp_mask, order, 398 current->comm, gfp_mask, order, current->oomkilladj);
384 current->mm ? current->mm->oom_adj : OOM_DISABLE); 399 task_lock(current);
385 cpuset_print_task_mems_allowed(current); 400 cpuset_print_task_mems_allowed(current);
386 task_unlock(current); 401 task_unlock(current);
387 dump_stack(); 402 dump_stack();
@@ -394,9 +409,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
394 /* 409 /*
395 * If the task is already exiting, don't alarm the sysadmin or kill 410 * If the task is already exiting, don't alarm the sysadmin or kill
396 * its children or threads, just set TIF_MEMDIE so it can die quickly 411 * its children or threads, just set TIF_MEMDIE so it can die quickly
397 * if its mm is still attached.
398 */ 412 */
399 if (p->mm && (p->flags & PF_EXITING)) { 413 if (p->flags & PF_EXITING) {
400 __oom_kill_task(p, 0); 414 __oom_kill_task(p, 0);
401 return 0; 415 return 0;
402 } 416 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d052abbe3063..5cc986eb9f6f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2544,7 +2544,6 @@ static void build_zonelists(pg_data_t *pgdat)
2544 prev_node = local_node; 2544 prev_node = local_node;
2545 nodes_clear(used_mask); 2545 nodes_clear(used_mask);
2546 2546
2547 memset(node_load, 0, sizeof(node_load));
2548 memset(node_order, 0, sizeof(node_order)); 2547 memset(node_order, 0, sizeof(node_order));
2549 j = 0; 2548 j = 0;
2550 2549
@@ -2653,6 +2652,9 @@ static int __build_all_zonelists(void *dummy)
2653{ 2652{
2654 int nid; 2653 int nid;
2655 2654
2655#ifdef CONFIG_NUMA
2656 memset(node_load, 0, sizeof(node_load));
2657#endif
2656 for_each_online_node(nid) { 2658 for_each_online_node(nid) {
2657 pg_data_t *pgdat = NODE_DATA(nid); 2659 pg_data_t *pgdat = NODE_DATA(nid);
2658 2660
diff --git a/mm/percpu.c b/mm/percpu.c
index b70f2acd8853..5fe37842e0ea 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -8,12 +8,12 @@
8 * 8 *
9 * This is percpu allocator which can handle both static and dynamic 9 * This is percpu allocator which can handle both static and dynamic
10 * areas. Percpu areas are allocated in chunks in vmalloc area. Each 10 * areas. Percpu areas are allocated in chunks in vmalloc area. Each
11 * chunk is consisted of num_possible_cpus() units and the first chunk 11 * chunk is consisted of nr_cpu_ids units and the first chunk is used
12 * is used for static percpu variables in the kernel image (special 12 * for static percpu variables in the kernel image (special boot time
13 * boot time alloc/init handling necessary as these areas need to be 13 * alloc/init handling necessary as these areas need to be brought up
14 * brought up before allocation services are running). Unit grows as 14 * before allocation services are running). Unit grows as necessary
15 * necessary and all units grow or shrink in unison. When a chunk is 15 * and all units grow or shrink in unison. When a chunk is filled up,
16 * filled up, another chunk is allocated. ie. in vmalloc area 16 * another chunk is allocated. ie. in vmalloc area
17 * 17 *
18 * c0 c1 c2 18 * c0 c1 c2
19 * ------------------- ------------------- ------------ 19 * ------------------- ------------------- ------------
@@ -558,7 +558,7 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
558static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, 558static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
559 bool flush_tlb) 559 bool flush_tlb)
560{ 560{
561 unsigned int last = num_possible_cpus() - 1; 561 unsigned int last = nr_cpu_ids - 1;
562 unsigned int cpu; 562 unsigned int cpu;
563 563
564 /* unmap must not be done on immutable chunk */ 564 /* unmap must not be done on immutable chunk */
@@ -643,7 +643,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size,
643 */ 643 */
644static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) 644static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
645{ 645{
646 unsigned int last = num_possible_cpus() - 1; 646 unsigned int last = nr_cpu_ids - 1;
647 unsigned int cpu; 647 unsigned int cpu;
648 int err; 648 int err;
649 649
@@ -749,7 +749,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
749 chunk->map[chunk->map_used++] = pcpu_unit_size; 749 chunk->map[chunk->map_used++] = pcpu_unit_size;
750 chunk->page = chunk->page_ar; 750 chunk->page = chunk->page_ar;
751 751
752 chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); 752 chunk->vm = get_vm_area(pcpu_chunk_size, VM_ALLOC);
753 if (!chunk->vm) { 753 if (!chunk->vm) {
754 free_pcpu_chunk(chunk); 754 free_pcpu_chunk(chunk);
755 return NULL; 755 return NULL;
@@ -1067,9 +1067,9 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1067 PFN_UP(size_sum)); 1067 PFN_UP(size_sum));
1068 1068
1069 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; 1069 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
1070 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; 1070 pcpu_chunk_size = nr_cpu_ids * pcpu_unit_size;
1071 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) 1071 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
1072 + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); 1072 + nr_cpu_ids * pcpu_unit_pages * sizeof(struct page *);
1073 1073
1074 if (dyn_size < 0) 1074 if (dyn_size < 0)
1075 dyn_size = pcpu_unit_size - static_size - reserved_size; 1075 dyn_size = pcpu_unit_size - static_size - reserved_size;
@@ -1248,7 +1248,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1248 } else 1248 } else
1249 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); 1249 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
1250 1250
1251 chunk_size = pcpue_unit_size * num_possible_cpus(); 1251 chunk_size = pcpue_unit_size * nr_cpu_ids;
1252 1252
1253 pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, 1253 pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
1254 __pa(MAX_DMA_ADDRESS)); 1254 __pa(MAX_DMA_ADDRESS));
@@ -1259,12 +1259,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1259 } 1259 }
1260 1260
1261 /* return the leftover and copy */ 1261 /* return the leftover and copy */
1262 for_each_possible_cpu(cpu) { 1262 for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
1263 void *ptr = pcpue_ptr + cpu * pcpue_unit_size; 1263 void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
1264 1264
1265 free_bootmem(__pa(ptr + pcpue_size), 1265 if (cpu_possible(cpu)) {
1266 pcpue_unit_size - pcpue_size); 1266 free_bootmem(__pa(ptr + pcpue_size),
1267 memcpy(ptr, __per_cpu_load, static_size); 1267 pcpue_unit_size - pcpue_size);
1268 memcpy(ptr, __per_cpu_load, static_size);
1269 } else
1270 free_bootmem(__pa(ptr), pcpue_unit_size);
1268 } 1271 }
1269 1272
1270 /* we're ready, commit */ 1273 /* we're ready, commit */
diff --git a/mm/rmap.c b/mm/rmap.c
index 836c6c63e1f2..0895b5c7cbff 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -358,6 +358,7 @@ static int page_referenced_one(struct page *page,
358 */ 358 */
359 if (vma->vm_flags & VM_LOCKED) { 359 if (vma->vm_flags & VM_LOCKED) {
360 *mapcount = 1; /* break early from loop */ 360 *mapcount = 1; /* break early from loop */
361 *vm_flags |= VM_LOCKED;
361 goto out_unmap; 362 goto out_unmap;
362 } 363 }
363 364
diff --git a/mm/vmscan.c b/mm/vmscan.c
index dea7abd31098..94e86dd6954c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -630,9 +630,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
630 630
631 referenced = page_referenced(page, 1, 631 referenced = page_referenced(page, 1,
632 sc->mem_cgroup, &vm_flags); 632 sc->mem_cgroup, &vm_flags);
633 /* In active use or really unfreeable? Activate it. */ 633 /*
634 * In active use or really unfreeable? Activate it.
635 * If page which have PG_mlocked lost isoltation race,
636 * try_to_unmap moves it to unevictable list
637 */
634 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && 638 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
635 referenced && page_mapping_inuse(page)) 639 referenced && page_mapping_inuse(page)
640 && !(vm_flags & VM_LOCKED))
636 goto activate_locked; 641 goto activate_locked;
637 642
638 /* 643 /*