diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 6 | ||||
-rw-r--r-- | mm/mempolicy.c | 84 | ||||
-rw-r--r-- | mm/mempool.c | 4 | ||||
-rw-r--r-- | mm/mmap.c | 3 | ||||
-rw-r--r-- | mm/nommu.c | 7 | ||||
-rw-r--r-- | mm/oom_kill.c | 64 | ||||
-rw-r--r-- | mm/page_alloc.c | 4 | ||||
-rw-r--r-- | mm/percpu.c | 35 | ||||
-rw-r--r-- | mm/rmap.c | 1 | ||||
-rw-r--r-- | mm/vmscan.c | 9 |
10 files changed, 136 insertions, 81 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index c948d4ca8bde..fe5f674d7a7d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -225,9 +225,9 @@ config DEFAULT_MMAP_MIN_ADDR | |||
225 | For most ia64, ppc64 and x86 users with lots of address space | 225 | For most ia64, ppc64 and x86 users with lots of address space |
226 | a value of 65536 is reasonable and should cause no problems. | 226 | a value of 65536 is reasonable and should cause no problems. |
227 | On arm and other archs it should not be higher than 32768. | 227 | On arm and other archs it should not be higher than 32768. |
228 | Programs which use vm86 functionality would either need additional | 228 | Programs which use vm86 functionality or have some need to map |
229 | permissions from either the LSM or the capabilities module or have | 229 | this low address space will need CAP_SYS_RAWIO or disable this |
230 | this protection disabled. | 230 | protection by setting the value to 0. |
231 | 231 | ||
232 | This value can be changed after boot using the | 232 | This value can be changed after boot using the |
233 | /proc/sys/vm/mmap_min_addr tunable. | 233 | /proc/sys/vm/mmap_min_addr tunable. |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e08e2c4da63a..7dd9d9f80694 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -191,25 +191,27 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes) | |||
191 | * Must be called holding task's alloc_lock to protect task's mems_allowed | 191 | * Must be called holding task's alloc_lock to protect task's mems_allowed |
192 | * and mempolicy. May also be called holding the mmap_semaphore for write. | 192 | * and mempolicy. May also be called holding the mmap_semaphore for write. |
193 | */ | 193 | */ |
194 | static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) | 194 | static int mpol_set_nodemask(struct mempolicy *pol, |
195 | const nodemask_t *nodes, struct nodemask_scratch *nsc) | ||
195 | { | 196 | { |
196 | nodemask_t cpuset_context_nmask; | ||
197 | int ret; | 197 | int ret; |
198 | 198 | ||
199 | /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ | 199 | /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ |
200 | if (pol == NULL) | 200 | if (pol == NULL) |
201 | return 0; | 201 | return 0; |
202 | /* Check N_HIGH_MEMORY */ | ||
203 | nodes_and(nsc->mask1, | ||
204 | cpuset_current_mems_allowed, node_states[N_HIGH_MEMORY]); | ||
202 | 205 | ||
203 | VM_BUG_ON(!nodes); | 206 | VM_BUG_ON(!nodes); |
204 | if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) | 207 | if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) |
205 | nodes = NULL; /* explicit local allocation */ | 208 | nodes = NULL; /* explicit local allocation */ |
206 | else { | 209 | else { |
207 | if (pol->flags & MPOL_F_RELATIVE_NODES) | 210 | if (pol->flags & MPOL_F_RELATIVE_NODES) |
208 | mpol_relative_nodemask(&cpuset_context_nmask, nodes, | 211 | mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1); |
209 | &cpuset_current_mems_allowed); | ||
210 | else | 212 | else |
211 | nodes_and(cpuset_context_nmask, *nodes, | 213 | nodes_and(nsc->mask2, *nodes, nsc->mask1); |
212 | cpuset_current_mems_allowed); | 214 | |
213 | if (mpol_store_user_nodemask(pol)) | 215 | if (mpol_store_user_nodemask(pol)) |
214 | pol->w.user_nodemask = *nodes; | 216 | pol->w.user_nodemask = *nodes; |
215 | else | 217 | else |
@@ -217,8 +219,10 @@ static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) | |||
217 | cpuset_current_mems_allowed; | 219 | cpuset_current_mems_allowed; |
218 | } | 220 | } |
219 | 221 | ||
220 | ret = mpol_ops[pol->mode].create(pol, | 222 | if (nodes) |
221 | nodes ? &cpuset_context_nmask : NULL); | 223 | ret = mpol_ops[pol->mode].create(pol, &nsc->mask2); |
224 | else | ||
225 | ret = mpol_ops[pol->mode].create(pol, NULL); | ||
222 | return ret; | 226 | return ret; |
223 | } | 227 | } |
224 | 228 | ||
@@ -620,12 +624,17 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
620 | { | 624 | { |
621 | struct mempolicy *new, *old; | 625 | struct mempolicy *new, *old; |
622 | struct mm_struct *mm = current->mm; | 626 | struct mm_struct *mm = current->mm; |
627 | NODEMASK_SCRATCH(scratch); | ||
623 | int ret; | 628 | int ret; |
624 | 629 | ||
625 | new = mpol_new(mode, flags, nodes); | 630 | if (!scratch) |
626 | if (IS_ERR(new)) | 631 | return -ENOMEM; |
627 | return PTR_ERR(new); | ||
628 | 632 | ||
633 | new = mpol_new(mode, flags, nodes); | ||
634 | if (IS_ERR(new)) { | ||
635 | ret = PTR_ERR(new); | ||
636 | goto out; | ||
637 | } | ||
629 | /* | 638 | /* |
630 | * prevent changing our mempolicy while show_numa_maps() | 639 | * prevent changing our mempolicy while show_numa_maps() |
631 | * is using it. | 640 | * is using it. |
@@ -635,13 +644,13 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
635 | if (mm) | 644 | if (mm) |
636 | down_write(&mm->mmap_sem); | 645 | down_write(&mm->mmap_sem); |
637 | task_lock(current); | 646 | task_lock(current); |
638 | ret = mpol_set_nodemask(new, nodes); | 647 | ret = mpol_set_nodemask(new, nodes, scratch); |
639 | if (ret) { | 648 | if (ret) { |
640 | task_unlock(current); | 649 | task_unlock(current); |
641 | if (mm) | 650 | if (mm) |
642 | up_write(&mm->mmap_sem); | 651 | up_write(&mm->mmap_sem); |
643 | mpol_put(new); | 652 | mpol_put(new); |
644 | return ret; | 653 | goto out; |
645 | } | 654 | } |
646 | old = current->mempolicy; | 655 | old = current->mempolicy; |
647 | current->mempolicy = new; | 656 | current->mempolicy = new; |
@@ -654,7 +663,10 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
654 | up_write(&mm->mmap_sem); | 663 | up_write(&mm->mmap_sem); |
655 | 664 | ||
656 | mpol_put(old); | 665 | mpol_put(old); |
657 | return 0; | 666 | ret = 0; |
667 | out: | ||
668 | NODEMASK_SCRATCH_FREE(scratch); | ||
669 | return ret; | ||
658 | } | 670 | } |
659 | 671 | ||
660 | /* | 672 | /* |
@@ -1014,12 +1026,20 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1014 | if (err) | 1026 | if (err) |
1015 | return err; | 1027 | return err; |
1016 | } | 1028 | } |
1017 | down_write(&mm->mmap_sem); | 1029 | { |
1018 | task_lock(current); | 1030 | NODEMASK_SCRATCH(scratch); |
1019 | err = mpol_set_nodemask(new, nmask); | 1031 | if (scratch) { |
1020 | task_unlock(current); | 1032 | down_write(&mm->mmap_sem); |
1033 | task_lock(current); | ||
1034 | err = mpol_set_nodemask(new, nmask, scratch); | ||
1035 | task_unlock(current); | ||
1036 | if (err) | ||
1037 | up_write(&mm->mmap_sem); | ||
1038 | } else | ||
1039 | err = -ENOMEM; | ||
1040 | NODEMASK_SCRATCH_FREE(scratch); | ||
1041 | } | ||
1021 | if (err) { | 1042 | if (err) { |
1022 | up_write(&mm->mmap_sem); | ||
1023 | mpol_put(new); | 1043 | mpol_put(new); |
1024 | return err; | 1044 | return err; |
1025 | } | 1045 | } |
@@ -1891,6 +1911,7 @@ restart: | |||
1891 | * Install non-NULL @mpol in inode's shared policy rb-tree. | 1911 | * Install non-NULL @mpol in inode's shared policy rb-tree. |
1892 | * On entry, the current task has a reference on a non-NULL @mpol. | 1912 | * On entry, the current task has a reference on a non-NULL @mpol. |
1893 | * This must be released on exit. | 1913 | * This must be released on exit. |
1914 | * This is called at get_inode() calls and we can use GFP_KERNEL. | ||
1894 | */ | 1915 | */ |
1895 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | 1916 | void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) |
1896 | { | 1917 | { |
@@ -1902,19 +1923,24 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
1902 | if (mpol) { | 1923 | if (mpol) { |
1903 | struct vm_area_struct pvma; | 1924 | struct vm_area_struct pvma; |
1904 | struct mempolicy *new; | 1925 | struct mempolicy *new; |
1926 | NODEMASK_SCRATCH(scratch); | ||
1905 | 1927 | ||
1928 | if (!scratch) | ||
1929 | return; | ||
1906 | /* contextualize the tmpfs mount point mempolicy */ | 1930 | /* contextualize the tmpfs mount point mempolicy */ |
1907 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); | 1931 | new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); |
1908 | if (IS_ERR(new)) { | 1932 | if (IS_ERR(new)) { |
1909 | mpol_put(mpol); /* drop our ref on sb mpol */ | 1933 | mpol_put(mpol); /* drop our ref on sb mpol */ |
1934 | NODEMASK_SCRATCH_FREE(scratch); | ||
1910 | return; /* no valid nodemask intersection */ | 1935 | return; /* no valid nodemask intersection */ |
1911 | } | 1936 | } |
1912 | 1937 | ||
1913 | task_lock(current); | 1938 | task_lock(current); |
1914 | ret = mpol_set_nodemask(new, &mpol->w.user_nodemask); | 1939 | ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch); |
1915 | task_unlock(current); | 1940 | task_unlock(current); |
1916 | mpol_put(mpol); /* drop our ref on sb mpol */ | 1941 | mpol_put(mpol); /* drop our ref on sb mpol */ |
1917 | if (ret) { | 1942 | if (ret) { |
1943 | NODEMASK_SCRATCH_FREE(scratch); | ||
1918 | mpol_put(new); | 1944 | mpol_put(new); |
1919 | return; | 1945 | return; |
1920 | } | 1946 | } |
@@ -1924,6 +1950,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
1924 | pvma.vm_end = TASK_SIZE; /* policy covers entire file */ | 1950 | pvma.vm_end = TASK_SIZE; /* policy covers entire file */ |
1925 | mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ | 1951 | mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ |
1926 | mpol_put(new); /* drop initial ref */ | 1952 | mpol_put(new); /* drop initial ref */ |
1953 | NODEMASK_SCRATCH_FREE(scratch); | ||
1927 | } | 1954 | } |
1928 | } | 1955 | } |
1929 | 1956 | ||
@@ -2140,13 +2167,18 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
2140 | err = 1; | 2167 | err = 1; |
2141 | else { | 2168 | else { |
2142 | int ret; | 2169 | int ret; |
2143 | 2170 | NODEMASK_SCRATCH(scratch); | |
2144 | task_lock(current); | 2171 | if (scratch) { |
2145 | ret = mpol_set_nodemask(new, &nodes); | 2172 | task_lock(current); |
2146 | task_unlock(current); | 2173 | ret = mpol_set_nodemask(new, &nodes, scratch); |
2147 | if (ret) | 2174 | task_unlock(current); |
2175 | } else | ||
2176 | ret = -ENOMEM; | ||
2177 | NODEMASK_SCRATCH_FREE(scratch); | ||
2178 | if (ret) { | ||
2148 | err = 1; | 2179 | err = 1; |
2149 | else if (no_context) { | 2180 | mpol_put(new); |
2181 | } else if (no_context) { | ||
2150 | /* save for contextualization */ | 2182 | /* save for contextualization */ |
2151 | new->w.user_nodemask = nodes; | 2183 | new->w.user_nodemask = nodes; |
2152 | } | 2184 | } |
diff --git a/mm/mempool.c b/mm/mempool.c index a46eb1b4bb66..32e75d400503 100644 --- a/mm/mempool.c +++ b/mm/mempool.c | |||
@@ -303,14 +303,14 @@ EXPORT_SYMBOL(mempool_free_slab); | |||
303 | */ | 303 | */ |
304 | void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) | 304 | void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) |
305 | { | 305 | { |
306 | size_t size = (size_t)(long)pool_data; | 306 | size_t size = (size_t)pool_data; |
307 | return kmalloc(size, gfp_mask); | 307 | return kmalloc(size, gfp_mask); |
308 | } | 308 | } |
309 | EXPORT_SYMBOL(mempool_kmalloc); | 309 | EXPORT_SYMBOL(mempool_kmalloc); |
310 | 310 | ||
311 | void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data) | 311 | void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data) |
312 | { | 312 | { |
313 | size_t size = (size_t) pool_data; | 313 | size_t size = (size_t)pool_data; |
314 | return kzalloc(size, gfp_mask); | 314 | return kzalloc(size, gfp_mask); |
315 | } | 315 | } |
316 | EXPORT_SYMBOL(mempool_kzalloc); | 316 | EXPORT_SYMBOL(mempool_kzalloc); |
@@ -88,9 +88,6 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */ | |||
88 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; | 88 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; |
89 | struct percpu_counter vm_committed_as; | 89 | struct percpu_counter vm_committed_as; |
90 | 90 | ||
91 | /* amount of vm to protect from userspace access */ | ||
92 | unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; | ||
93 | |||
94 | /* | 91 | /* |
95 | * Check that a process has enough memory to allocate a new virtual | 92 | * Check that a process has enough memory to allocate a new virtual |
96 | * mapping. 0 means there is enough memory for the allocation to | 93 | * mapping. 0 means there is enough memory for the allocation to |
diff --git a/mm/nommu.c b/mm/nommu.c index 53cab10fece4..4bde489ec431 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -69,9 +69,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; | |||
69 | int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; | 69 | int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; |
70 | int heap_stack_gap = 0; | 70 | int heap_stack_gap = 0; |
71 | 71 | ||
72 | /* amount of vm to protect from userspace access */ | ||
73 | unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; | ||
74 | |||
75 | atomic_long_t mmap_pages_allocated; | 72 | atomic_long_t mmap_pages_allocated; |
76 | 73 | ||
77 | EXPORT_SYMBOL(mem_map); | 74 | EXPORT_SYMBOL(mem_map); |
@@ -922,6 +919,10 @@ static int validate_mmap_request(struct file *file, | |||
922 | if (!file->f_op->read) | 919 | if (!file->f_op->read) |
923 | capabilities &= ~BDI_CAP_MAP_COPY; | 920 | capabilities &= ~BDI_CAP_MAP_COPY; |
924 | 921 | ||
922 | /* The file shall have been opened with read permission. */ | ||
923 | if (!(file->f_mode & FMODE_READ)) | ||
924 | return -EACCES; | ||
925 | |||
925 | if (flags & MAP_SHARED) { | 926 | if (flags & MAP_SHARED) { |
926 | /* do checks for writing, appending and locking */ | 927 | /* do checks for writing, appending and locking */ |
927 | if ((prot & PROT_WRITE) && | 928 | if ((prot & PROT_WRITE) && |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 175a67a78a99..a7b2460e922b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -58,7 +58,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
58 | unsigned long points, cpu_time, run_time; | 58 | unsigned long points, cpu_time, run_time; |
59 | struct mm_struct *mm; | 59 | struct mm_struct *mm; |
60 | struct task_struct *child; | 60 | struct task_struct *child; |
61 | int oom_adj; | ||
62 | 61 | ||
63 | task_lock(p); | 62 | task_lock(p); |
64 | mm = p->mm; | 63 | mm = p->mm; |
@@ -66,11 +65,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
66 | task_unlock(p); | 65 | task_unlock(p); |
67 | return 0; | 66 | return 0; |
68 | } | 67 | } |
69 | oom_adj = mm->oom_adj; | ||
70 | if (oom_adj == OOM_DISABLE) { | ||
71 | task_unlock(p); | ||
72 | return 0; | ||
73 | } | ||
74 | 68 | ||
75 | /* | 69 | /* |
76 | * The memory size of the process is the basis for the badness. | 70 | * The memory size of the process is the basis for the badness. |
@@ -154,15 +148,15 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
154 | points /= 8; | 148 | points /= 8; |
155 | 149 | ||
156 | /* | 150 | /* |
157 | * Adjust the score by oom_adj. | 151 | * Adjust the score by oomkilladj. |
158 | */ | 152 | */ |
159 | if (oom_adj) { | 153 | if (p->oomkilladj) { |
160 | if (oom_adj > 0) { | 154 | if (p->oomkilladj > 0) { |
161 | if (!points) | 155 | if (!points) |
162 | points = 1; | 156 | points = 1; |
163 | points <<= oom_adj; | 157 | points <<= p->oomkilladj; |
164 | } else | 158 | } else |
165 | points >>= -(oom_adj); | 159 | points >>= -(p->oomkilladj); |
166 | } | 160 | } |
167 | 161 | ||
168 | #ifdef DEBUG | 162 | #ifdef DEBUG |
@@ -257,8 +251,11 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, | |||
257 | *ppoints = ULONG_MAX; | 251 | *ppoints = ULONG_MAX; |
258 | } | 252 | } |
259 | 253 | ||
254 | if (p->oomkilladj == OOM_DISABLE) | ||
255 | continue; | ||
256 | |||
260 | points = badness(p, uptime.tv_sec); | 257 | points = badness(p, uptime.tv_sec); |
261 | if (points > *ppoints) { | 258 | if (points > *ppoints || !chosen) { |
262 | chosen = p; | 259 | chosen = p; |
263 | *ppoints = points; | 260 | *ppoints = points; |
264 | } | 261 | } |
@@ -307,7 +304,8 @@ static void dump_tasks(const struct mem_cgroup *mem) | |||
307 | } | 304 | } |
308 | printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", | 305 | printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", |
309 | p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm, | 306 | p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm, |
310 | get_mm_rss(mm), (int)task_cpu(p), mm->oom_adj, p->comm); | 307 | get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj, |
308 | p->comm); | ||
311 | task_unlock(p); | 309 | task_unlock(p); |
312 | } while_each_thread(g, p); | 310 | } while_each_thread(g, p); |
313 | } | 311 | } |
@@ -325,8 +323,11 @@ static void __oom_kill_task(struct task_struct *p, int verbose) | |||
325 | return; | 323 | return; |
326 | } | 324 | } |
327 | 325 | ||
328 | if (!p->mm) | 326 | if (!p->mm) { |
327 | WARN_ON(1); | ||
328 | printk(KERN_WARNING "tried to kill an mm-less task!\n"); | ||
329 | return; | 329 | return; |
330 | } | ||
330 | 331 | ||
331 | if (verbose) | 332 | if (verbose) |
332 | printk(KERN_ERR "Killed process %d (%s)\n", | 333 | printk(KERN_ERR "Killed process %d (%s)\n", |
@@ -348,13 +349,28 @@ static int oom_kill_task(struct task_struct *p) | |||
348 | struct mm_struct *mm; | 349 | struct mm_struct *mm; |
349 | struct task_struct *g, *q; | 350 | struct task_struct *g, *q; |
350 | 351 | ||
351 | task_lock(p); | ||
352 | mm = p->mm; | 352 | mm = p->mm; |
353 | if (!mm || mm->oom_adj == OOM_DISABLE) { | 353 | |
354 | task_unlock(p); | 354 | /* WARNING: mm may not be dereferenced since we did not obtain its |
355 | * value from get_task_mm(p). This is OK since all we need to do is | ||
356 | * compare mm to q->mm below. | ||
357 | * | ||
358 | * Furthermore, even if mm contains a non-NULL value, p->mm may | ||
359 | * change to NULL at any time since we do not hold task_lock(p). | ||
360 | * However, this is of no concern to us. | ||
361 | */ | ||
362 | |||
363 | if (mm == NULL) | ||
355 | return 1; | 364 | return 1; |
356 | } | 365 | |
357 | task_unlock(p); | 366 | /* |
367 | * Don't kill the process if any threads are set to OOM_DISABLE | ||
368 | */ | ||
369 | do_each_thread(g, q) { | ||
370 | if (q->mm == mm && q->oomkilladj == OOM_DISABLE) | ||
371 | return 1; | ||
372 | } while_each_thread(g, q); | ||
373 | |||
358 | __oom_kill_task(p, 1); | 374 | __oom_kill_task(p, 1); |
359 | 375 | ||
360 | /* | 376 | /* |
@@ -377,11 +393,10 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
377 | struct task_struct *c; | 393 | struct task_struct *c; |
378 | 394 | ||
379 | if (printk_ratelimit()) { | 395 | if (printk_ratelimit()) { |
380 | task_lock(current); | ||
381 | printk(KERN_WARNING "%s invoked oom-killer: " | 396 | printk(KERN_WARNING "%s invoked oom-killer: " |
382 | "gfp_mask=0x%x, order=%d, oom_adj=%d\n", | 397 | "gfp_mask=0x%x, order=%d, oomkilladj=%d\n", |
383 | current->comm, gfp_mask, order, | 398 | current->comm, gfp_mask, order, current->oomkilladj); |
384 | current->mm ? current->mm->oom_adj : OOM_DISABLE); | 399 | task_lock(current); |
385 | cpuset_print_task_mems_allowed(current); | 400 | cpuset_print_task_mems_allowed(current); |
386 | task_unlock(current); | 401 | task_unlock(current); |
387 | dump_stack(); | 402 | dump_stack(); |
@@ -394,9 +409,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
394 | /* | 409 | /* |
395 | * If the task is already exiting, don't alarm the sysadmin or kill | 410 | * If the task is already exiting, don't alarm the sysadmin or kill |
396 | * its children or threads, just set TIF_MEMDIE so it can die quickly | 411 | * its children or threads, just set TIF_MEMDIE so it can die quickly |
397 | * if its mm is still attached. | ||
398 | */ | 412 | */ |
399 | if (p->mm && (p->flags & PF_EXITING)) { | 413 | if (p->flags & PF_EXITING) { |
400 | __oom_kill_task(p, 0); | 414 | __oom_kill_task(p, 0); |
401 | return 0; | 415 | return 0; |
402 | } | 416 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d052abbe3063..5cc986eb9f6f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2544,7 +2544,6 @@ static void build_zonelists(pg_data_t *pgdat) | |||
2544 | prev_node = local_node; | 2544 | prev_node = local_node; |
2545 | nodes_clear(used_mask); | 2545 | nodes_clear(used_mask); |
2546 | 2546 | ||
2547 | memset(node_load, 0, sizeof(node_load)); | ||
2548 | memset(node_order, 0, sizeof(node_order)); | 2547 | memset(node_order, 0, sizeof(node_order)); |
2549 | j = 0; | 2548 | j = 0; |
2550 | 2549 | ||
@@ -2653,6 +2652,9 @@ static int __build_all_zonelists(void *dummy) | |||
2653 | { | 2652 | { |
2654 | int nid; | 2653 | int nid; |
2655 | 2654 | ||
2655 | #ifdef CONFIG_NUMA | ||
2656 | memset(node_load, 0, sizeof(node_load)); | ||
2657 | #endif | ||
2656 | for_each_online_node(nid) { | 2658 | for_each_online_node(nid) { |
2657 | pg_data_t *pgdat = NODE_DATA(nid); | 2659 | pg_data_t *pgdat = NODE_DATA(nid); |
2658 | 2660 | ||
diff --git a/mm/percpu.c b/mm/percpu.c index b70f2acd8853..5fe37842e0ea 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -8,12 +8,12 @@ | |||
8 | * | 8 | * |
9 | * This is percpu allocator which can handle both static and dynamic | 9 | * This is percpu allocator which can handle both static and dynamic |
10 | * areas. Percpu areas are allocated in chunks in vmalloc area. Each | 10 | * areas. Percpu areas are allocated in chunks in vmalloc area. Each |
11 | * chunk is consisted of num_possible_cpus() units and the first chunk | 11 | * chunk is consisted of nr_cpu_ids units and the first chunk is used |
12 | * is used for static percpu variables in the kernel image (special | 12 | * for static percpu variables in the kernel image (special boot time |
13 | * boot time alloc/init handling necessary as these areas need to be | 13 | * alloc/init handling necessary as these areas need to be brought up |
14 | * brought up before allocation services are running). Unit grows as | 14 | * before allocation services are running). Unit grows as necessary |
15 | * necessary and all units grow or shrink in unison. When a chunk is | 15 | * and all units grow or shrink in unison. When a chunk is filled up, |
16 | * filled up, another chunk is allocated. ie. in vmalloc area | 16 | * another chunk is allocated. ie. in vmalloc area |
17 | * | 17 | * |
18 | * c0 c1 c2 | 18 | * c0 c1 c2 |
19 | * ------------------- ------------------- ------------ | 19 | * ------------------- ------------------- ------------ |
@@ -558,7 +558,7 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) | |||
558 | static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, | 558 | static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, |
559 | bool flush_tlb) | 559 | bool flush_tlb) |
560 | { | 560 | { |
561 | unsigned int last = num_possible_cpus() - 1; | 561 | unsigned int last = nr_cpu_ids - 1; |
562 | unsigned int cpu; | 562 | unsigned int cpu; |
563 | 563 | ||
564 | /* unmap must not be done on immutable chunk */ | 564 | /* unmap must not be done on immutable chunk */ |
@@ -643,7 +643,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, | |||
643 | */ | 643 | */ |
644 | static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) | 644 | static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) |
645 | { | 645 | { |
646 | unsigned int last = num_possible_cpus() - 1; | 646 | unsigned int last = nr_cpu_ids - 1; |
647 | unsigned int cpu; | 647 | unsigned int cpu; |
648 | int err; | 648 | int err; |
649 | 649 | ||
@@ -749,7 +749,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) | |||
749 | chunk->map[chunk->map_used++] = pcpu_unit_size; | 749 | chunk->map[chunk->map_used++] = pcpu_unit_size; |
750 | chunk->page = chunk->page_ar; | 750 | chunk->page = chunk->page_ar; |
751 | 751 | ||
752 | chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); | 752 | chunk->vm = get_vm_area(pcpu_chunk_size, VM_ALLOC); |
753 | if (!chunk->vm) { | 753 | if (!chunk->vm) { |
754 | free_pcpu_chunk(chunk); | 754 | free_pcpu_chunk(chunk); |
755 | return NULL; | 755 | return NULL; |
@@ -1067,9 +1067,9 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
1067 | PFN_UP(size_sum)); | 1067 | PFN_UP(size_sum)); |
1068 | 1068 | ||
1069 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; | 1069 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; |
1070 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; | 1070 | pcpu_chunk_size = nr_cpu_ids * pcpu_unit_size; |
1071 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) | 1071 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) |
1072 | + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); | 1072 | + nr_cpu_ids * pcpu_unit_pages * sizeof(struct page *); |
1073 | 1073 | ||
1074 | if (dyn_size < 0) | 1074 | if (dyn_size < 0) |
1075 | dyn_size = pcpu_unit_size - static_size - reserved_size; | 1075 | dyn_size = pcpu_unit_size - static_size - reserved_size; |
@@ -1248,7 +1248,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | |||
1248 | } else | 1248 | } else |
1249 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); | 1249 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); |
1250 | 1250 | ||
1251 | chunk_size = pcpue_unit_size * num_possible_cpus(); | 1251 | chunk_size = pcpue_unit_size * nr_cpu_ids; |
1252 | 1252 | ||
1253 | pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, | 1253 | pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, |
1254 | __pa(MAX_DMA_ADDRESS)); | 1254 | __pa(MAX_DMA_ADDRESS)); |
@@ -1259,12 +1259,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | |||
1259 | } | 1259 | } |
1260 | 1260 | ||
1261 | /* return the leftover and copy */ | 1261 | /* return the leftover and copy */ |
1262 | for_each_possible_cpu(cpu) { | 1262 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) { |
1263 | void *ptr = pcpue_ptr + cpu * pcpue_unit_size; | 1263 | void *ptr = pcpue_ptr + cpu * pcpue_unit_size; |
1264 | 1264 | ||
1265 | free_bootmem(__pa(ptr + pcpue_size), | 1265 | if (cpu_possible(cpu)) { |
1266 | pcpue_unit_size - pcpue_size); | 1266 | free_bootmem(__pa(ptr + pcpue_size), |
1267 | memcpy(ptr, __per_cpu_load, static_size); | 1267 | pcpue_unit_size - pcpue_size); |
1268 | memcpy(ptr, __per_cpu_load, static_size); | ||
1269 | } else | ||
1270 | free_bootmem(__pa(ptr), pcpue_unit_size); | ||
1268 | } | 1271 | } |
1269 | 1272 | ||
1270 | /* we're ready, commit */ | 1273 | /* we're ready, commit */ |
@@ -358,6 +358,7 @@ static int page_referenced_one(struct page *page, | |||
358 | */ | 358 | */ |
359 | if (vma->vm_flags & VM_LOCKED) { | 359 | if (vma->vm_flags & VM_LOCKED) { |
360 | *mapcount = 1; /* break early from loop */ | 360 | *mapcount = 1; /* break early from loop */ |
361 | *vm_flags |= VM_LOCKED; | ||
361 | goto out_unmap; | 362 | goto out_unmap; |
362 | } | 363 | } |
363 | 364 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index dea7abd31098..94e86dd6954c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -630,9 +630,14 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
630 | 630 | ||
631 | referenced = page_referenced(page, 1, | 631 | referenced = page_referenced(page, 1, |
632 | sc->mem_cgroup, &vm_flags); | 632 | sc->mem_cgroup, &vm_flags); |
633 | /* In active use or really unfreeable? Activate it. */ | 633 | /* |
634 | * In active use or really unfreeable? Activate it. | ||
635 | * If page which have PG_mlocked lost isoltation race, | ||
636 | * try_to_unmap moves it to unevictable list | ||
637 | */ | ||
634 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && | 638 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && |
635 | referenced && page_mapping_inuse(page)) | 639 | referenced && page_mapping_inuse(page) |
640 | && !(vm_flags & VM_LOCKED)) | ||
636 | goto activate_locked; | 641 | goto activate_locked; |
637 | 642 | ||
638 | /* | 643 | /* |