aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/mempolicy.c84
-rw-r--r--mm/mempool.c4
-rw-r--r--mm/mmap.c3
-rw-r--r--mm/nommu.c7
-rw-r--r--mm/oom_kill.c64
-rw-r--r--mm/page_alloc.c4
-rw-r--r--mm/percpu.c35
-rw-r--r--mm/rmap.c1
-rw-r--r--mm/vmscan.c9
10 files changed, 136 insertions, 81 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index c948d4ca8bde..fe5f674d7a7d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -225,9 +225,9 @@ config DEFAULT_MMAP_MIN_ADDR
225 For most ia64, ppc64 and x86 users with lots of address space 225 For most ia64, ppc64 and x86 users with lots of address space
226 a value of 65536 is reasonable and should cause no problems. 226 a value of 65536 is reasonable and should cause no problems.
227 On arm and other archs it should not be higher than 32768. 227 On arm and other archs it should not be higher than 32768.
228 Programs which use vm86 functionality would either need additional 228 Programs which use vm86 functionality or have some need to map
229 permissions from either the LSM or the capabilities module or have 229 this low address space will need CAP_SYS_RAWIO or disable this
230 this protection disabled. 230 protection by setting the value to 0.
231 231
232 This value can be changed after boot using the 232 This value can be changed after boot using the
233 /proc/sys/vm/mmap_min_addr tunable. 233 /proc/sys/vm/mmap_min_addr tunable.
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e08e2c4da63a..7dd9d9f80694 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -191,25 +191,27 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
191 * Must be called holding task's alloc_lock to protect task's mems_allowed 191 * Must be called holding task's alloc_lock to protect task's mems_allowed
192 * and mempolicy. May also be called holding the mmap_semaphore for write. 192 * and mempolicy. May also be called holding the mmap_semaphore for write.
193 */ 193 */
194static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) 194static int mpol_set_nodemask(struct mempolicy *pol,
195 const nodemask_t *nodes, struct nodemask_scratch *nsc)
195{ 196{
196 nodemask_t cpuset_context_nmask;
197 int ret; 197 int ret;
198 198
199 /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ 199 /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */
200 if (pol == NULL) 200 if (pol == NULL)
201 return 0; 201 return 0;
202 /* Check N_HIGH_MEMORY */
203 nodes_and(nsc->mask1,
204 cpuset_current_mems_allowed, node_states[N_HIGH_MEMORY]);
202 205
203 VM_BUG_ON(!nodes); 206 VM_BUG_ON(!nodes);
204 if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) 207 if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes))
205 nodes = NULL; /* explicit local allocation */ 208 nodes = NULL; /* explicit local allocation */
206 else { 209 else {
207 if (pol->flags & MPOL_F_RELATIVE_NODES) 210 if (pol->flags & MPOL_F_RELATIVE_NODES)
208 mpol_relative_nodemask(&cpuset_context_nmask, nodes, 211 mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1);
209 &cpuset_current_mems_allowed);
210 else 212 else
211 nodes_and(cpuset_context_nmask, *nodes, 213 nodes_and(nsc->mask2, *nodes, nsc->mask1);
212 cpuset_current_mems_allowed); 214
213 if (mpol_store_user_nodemask(pol)) 215 if (mpol_store_user_nodemask(pol))
214 pol->w.user_nodemask = *nodes; 216 pol->w.user_nodemask = *nodes;
215 else 217 else
@@ -217,8 +219,10 @@ static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes)
217 cpuset_current_mems_allowed; 219 cpuset_current_mems_allowed;
218 } 220 }
219 221
220 ret = mpol_ops[pol->mode].create(pol, 222 if (nodes)
221 nodes ? &cpuset_context_nmask : NULL); 223 ret = mpol_ops[pol->mode].create(pol, &nsc->mask2);
224 else
225 ret = mpol_ops[pol->mode].create(pol, NULL);
222 return ret; 226 return ret;
223} 227}
224 228
@@ -620,12 +624,17 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
620{ 624{
621 struct mempolicy *new, *old; 625 struct mempolicy *new, *old;
622 struct mm_struct *mm = current->mm; 626 struct mm_struct *mm = current->mm;
627 NODEMASK_SCRATCH(scratch);
623 int ret; 628 int ret;
624 629
625 new = mpol_new(mode, flags, nodes); 630 if (!scratch)
626 if (IS_ERR(new)) 631 return -ENOMEM;
627 return PTR_ERR(new);
628 632
633 new = mpol_new(mode, flags, nodes);
634 if (IS_ERR(new)) {
635 ret = PTR_ERR(new);
636 goto out;
637 }
629 /* 638 /*
630 * prevent changing our mempolicy while show_numa_maps() 639 * prevent changing our mempolicy while show_numa_maps()
631 * is using it. 640 * is using it.
@@ -635,13 +644,13 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
635 if (mm) 644 if (mm)
636 down_write(&mm->mmap_sem); 645 down_write(&mm->mmap_sem);
637 task_lock(current); 646 task_lock(current);
638 ret = mpol_set_nodemask(new, nodes); 647 ret = mpol_set_nodemask(new, nodes, scratch);
639 if (ret) { 648 if (ret) {
640 task_unlock(current); 649 task_unlock(current);
641 if (mm) 650 if (mm)
642 up_write(&mm->mmap_sem); 651 up_write(&mm->mmap_sem);
643 mpol_put(new); 652 mpol_put(new);
644 return ret; 653 goto out;
645 } 654 }
646 old = current->mempolicy; 655 old = current->mempolicy;
647 current->mempolicy = new; 656 current->mempolicy = new;
@@ -654,7 +663,10 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
654 up_write(&mm->mmap_sem); 663 up_write(&mm->mmap_sem);
655 664
656 mpol_put(old); 665 mpol_put(old);
657 return 0; 666 ret = 0;
667out:
668 NODEMASK_SCRATCH_FREE(scratch);
669 return ret;
658} 670}
659 671
660/* 672/*
@@ -1014,12 +1026,20 @@ static long do_mbind(unsigned long start, unsigned long len,
1014 if (err) 1026 if (err)
1015 return err; 1027 return err;
1016 } 1028 }
1017 down_write(&mm->mmap_sem); 1029 {
1018 task_lock(current); 1030 NODEMASK_SCRATCH(scratch);
1019 err = mpol_set_nodemask(new, nmask); 1031 if (scratch) {
1020 task_unlock(current); 1032 down_write(&mm->mmap_sem);
1033 task_lock(current);
1034 err = mpol_set_nodemask(new, nmask, scratch);
1035 task_unlock(current);
1036 if (err)
1037 up_write(&mm->mmap_sem);
1038 } else
1039 err = -ENOMEM;
1040 NODEMASK_SCRATCH_FREE(scratch);
1041 }
1021 if (err) { 1042 if (err) {
1022 up_write(&mm->mmap_sem);
1023 mpol_put(new); 1043 mpol_put(new);
1024 return err; 1044 return err;
1025 } 1045 }
@@ -1891,6 +1911,7 @@ restart:
1891 * Install non-NULL @mpol in inode's shared policy rb-tree. 1911 * Install non-NULL @mpol in inode's shared policy rb-tree.
1892 * On entry, the current task has a reference on a non-NULL @mpol. 1912 * On entry, the current task has a reference on a non-NULL @mpol.
1893 * This must be released on exit. 1913 * This must be released on exit.
1914 * This is called at get_inode() calls and we can use GFP_KERNEL.
1894 */ 1915 */
1895void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) 1916void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
1896{ 1917{
@@ -1902,19 +1923,24 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
1902 if (mpol) { 1923 if (mpol) {
1903 struct vm_area_struct pvma; 1924 struct vm_area_struct pvma;
1904 struct mempolicy *new; 1925 struct mempolicy *new;
1926 NODEMASK_SCRATCH(scratch);
1905 1927
1928 if (!scratch)
1929 return;
1906 /* contextualize the tmpfs mount point mempolicy */ 1930 /* contextualize the tmpfs mount point mempolicy */
1907 new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); 1931 new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
1908 if (IS_ERR(new)) { 1932 if (IS_ERR(new)) {
1909 mpol_put(mpol); /* drop our ref on sb mpol */ 1933 mpol_put(mpol); /* drop our ref on sb mpol */
1934 NODEMASK_SCRATCH_FREE(scratch);
1910 return; /* no valid nodemask intersection */ 1935 return; /* no valid nodemask intersection */
1911 } 1936 }
1912 1937
1913 task_lock(current); 1938 task_lock(current);
1914 ret = mpol_set_nodemask(new, &mpol->w.user_nodemask); 1939 ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch);
1915 task_unlock(current); 1940 task_unlock(current);
1916 mpol_put(mpol); /* drop our ref on sb mpol */ 1941 mpol_put(mpol); /* drop our ref on sb mpol */
1917 if (ret) { 1942 if (ret) {
1943 NODEMASK_SCRATCH_FREE(scratch);
1918 mpol_put(new); 1944 mpol_put(new);
1919 return; 1945 return;
1920 } 1946 }
@@ -1924,6 +1950,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
1924 pvma.vm_end = TASK_SIZE; /* policy covers entire file */ 1950 pvma.vm_end = TASK_SIZE; /* policy covers entire file */
1925 mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ 1951 mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
1926 mpol_put(new); /* drop initial ref */ 1952 mpol_put(new); /* drop initial ref */
1953 NODEMASK_SCRATCH_FREE(scratch);
1927 } 1954 }
1928} 1955}
1929 1956
@@ -2140,13 +2167,18 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
2140 err = 1; 2167 err = 1;
2141 else { 2168 else {
2142 int ret; 2169 int ret;
2143 2170 NODEMASK_SCRATCH(scratch);
2144 task_lock(current); 2171 if (scratch) {
2145 ret = mpol_set_nodemask(new, &nodes); 2172 task_lock(current);
2146 task_unlock(current); 2173 ret = mpol_set_nodemask(new, &nodes, scratch);
2147 if (ret) 2174 task_unlock(current);
2175 } else
2176 ret = -ENOMEM;
2177 NODEMASK_SCRATCH_FREE(scratch);
2178 if (ret) {
2148 err = 1; 2179 err = 1;
2149 else if (no_context) { 2180 mpol_put(new);
2181 } else if (no_context) {
2150 /* save for contextualization */ 2182 /* save for contextualization */
2151 new->w.user_nodemask = nodes; 2183 new->w.user_nodemask = nodes;
2152 } 2184 }
diff --git a/mm/mempool.c b/mm/mempool.c
index a46eb1b4bb66..32e75d400503 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -303,14 +303,14 @@ EXPORT_SYMBOL(mempool_free_slab);
303 */ 303 */
304void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) 304void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
305{ 305{
306 size_t size = (size_t)(long)pool_data; 306 size_t size = (size_t)pool_data;
307 return kmalloc(size, gfp_mask); 307 return kmalloc(size, gfp_mask);
308} 308}
309EXPORT_SYMBOL(mempool_kmalloc); 309EXPORT_SYMBOL(mempool_kmalloc);
310 310
311void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data) 311void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data)
312{ 312{
313 size_t size = (size_t) pool_data; 313 size_t size = (size_t)pool_data;
314 return kzalloc(size, gfp_mask); 314 return kzalloc(size, gfp_mask);
315} 315}
316EXPORT_SYMBOL(mempool_kzalloc); 316EXPORT_SYMBOL(mempool_kzalloc);
diff --git a/mm/mmap.c b/mm/mmap.c
index 34579b23ebd5..8101de490c73 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -88,9 +88,6 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */
88int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; 88int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
89struct percpu_counter vm_committed_as; 89struct percpu_counter vm_committed_as;
90 90
91/* amount of vm to protect from userspace access */
92unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
93
94/* 91/*
95 * Check that a process has enough memory to allocate a new virtual 92 * Check that a process has enough memory to allocate a new virtual
96 * mapping. 0 means there is enough memory for the allocation to 93 * mapping. 0 means there is enough memory for the allocation to
diff --git a/mm/nommu.c b/mm/nommu.c
index 53cab10fece4..4bde489ec431 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -69,9 +69,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
69int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 69int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
70int heap_stack_gap = 0; 70int heap_stack_gap = 0;
71 71
72/* amount of vm to protect from userspace access */
73unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
74
75atomic_long_t mmap_pages_allocated; 72atomic_long_t mmap_pages_allocated;
76 73
77EXPORT_SYMBOL(mem_map); 74EXPORT_SYMBOL(mem_map);
@@ -922,6 +919,10 @@ static int validate_mmap_request(struct file *file,
922 if (!file->f_op->read) 919 if (!file->f_op->read)
923 capabilities &= ~BDI_CAP_MAP_COPY; 920 capabilities &= ~BDI_CAP_MAP_COPY;
924 921
922 /* The file shall have been opened with read permission. */
923 if (!(file->f_mode & FMODE_READ))
924 return -EACCES;
925
925 if (flags & MAP_SHARED) { 926 if (flags & MAP_SHARED) {
926 /* do checks for writing, appending and locking */ 927 /* do checks for writing, appending and locking */
927 if ((prot & PROT_WRITE) && 928 if ((prot & PROT_WRITE) &&
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 175a67a78a99..a7b2460e922b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -58,7 +58,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
58 unsigned long points, cpu_time, run_time; 58 unsigned long points, cpu_time, run_time;
59 struct mm_struct *mm; 59 struct mm_struct *mm;
60 struct task_struct *child; 60 struct task_struct *child;
61 int oom_adj;
62 61
63 task_lock(p); 62 task_lock(p);
64 mm = p->mm; 63 mm = p->mm;
@@ -66,11 +65,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
66 task_unlock(p); 65 task_unlock(p);
67 return 0; 66 return 0;
68 } 67 }
69 oom_adj = mm->oom_adj;
70 if (oom_adj == OOM_DISABLE) {
71 task_unlock(p);
72 return 0;
73 }
74 68
75 /* 69 /*
76 * The memory size of the process is the basis for the badness. 70 * The memory size of the process is the basis for the badness.
@@ -154,15 +148,15 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
154 points /= 8; 148 points /= 8;
155 149
156 /* 150 /*
157 * Adjust the score by oom_adj. 151 * Adjust the score by oomkilladj.
158 */ 152 */
159 if (oom_adj) { 153 if (p->oomkilladj) {
160 if (oom_adj > 0) { 154 if (p->oomkilladj > 0) {
161 if (!points) 155 if (!points)
162 points = 1; 156 points = 1;
163 points <<= oom_adj; 157 points <<= p->oomkilladj;
164 } else 158 } else
165 points >>= -(oom_adj); 159 points >>= -(p->oomkilladj);
166 } 160 }
167 161
168#ifdef DEBUG 162#ifdef DEBUG
@@ -257,8 +251,11 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
257 *ppoints = ULONG_MAX; 251 *ppoints = ULONG_MAX;
258 } 252 }
259 253
254 if (p->oomkilladj == OOM_DISABLE)
255 continue;
256
260 points = badness(p, uptime.tv_sec); 257 points = badness(p, uptime.tv_sec);
261 if (points > *ppoints) { 258 if (points > *ppoints || !chosen) {
262 chosen = p; 259 chosen = p;
263 *ppoints = points; 260 *ppoints = points;
264 } 261 }
@@ -307,7 +304,8 @@ static void dump_tasks(const struct mem_cgroup *mem)
307 } 304 }
308 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", 305 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
309 p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm, 306 p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
310 get_mm_rss(mm), (int)task_cpu(p), mm->oom_adj, p->comm); 307 get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
308 p->comm);
311 task_unlock(p); 309 task_unlock(p);
312 } while_each_thread(g, p); 310 } while_each_thread(g, p);
313} 311}
@@ -325,8 +323,11 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
325 return; 323 return;
326 } 324 }
327 325
328 if (!p->mm) 326 if (!p->mm) {
327 WARN_ON(1);
328 printk(KERN_WARNING "tried to kill an mm-less task!\n");
329 return; 329 return;
330 }
330 331
331 if (verbose) 332 if (verbose)
332 printk(KERN_ERR "Killed process %d (%s)\n", 333 printk(KERN_ERR "Killed process %d (%s)\n",
@@ -348,13 +349,28 @@ static int oom_kill_task(struct task_struct *p)
348 struct mm_struct *mm; 349 struct mm_struct *mm;
349 struct task_struct *g, *q; 350 struct task_struct *g, *q;
350 351
351 task_lock(p);
352 mm = p->mm; 352 mm = p->mm;
353 if (!mm || mm->oom_adj == OOM_DISABLE) { 353
354 task_unlock(p); 354 /* WARNING: mm may not be dereferenced since we did not obtain its
355 * value from get_task_mm(p). This is OK since all we need to do is
356 * compare mm to q->mm below.
357 *
358 * Furthermore, even if mm contains a non-NULL value, p->mm may
359 * change to NULL at any time since we do not hold task_lock(p).
360 * However, this is of no concern to us.
361 */
362
363 if (mm == NULL)
355 return 1; 364 return 1;
356 } 365
357 task_unlock(p); 366 /*
367 * Don't kill the process if any threads are set to OOM_DISABLE
368 */
369 do_each_thread(g, q) {
370 if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
371 return 1;
372 } while_each_thread(g, q);
373
358 __oom_kill_task(p, 1); 374 __oom_kill_task(p, 1);
359 375
360 /* 376 /*
@@ -377,11 +393,10 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
377 struct task_struct *c; 393 struct task_struct *c;
378 394
379 if (printk_ratelimit()) { 395 if (printk_ratelimit()) {
380 task_lock(current);
381 printk(KERN_WARNING "%s invoked oom-killer: " 396 printk(KERN_WARNING "%s invoked oom-killer: "
382 "gfp_mask=0x%x, order=%d, oom_adj=%d\n", 397 "gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
383 current->comm, gfp_mask, order, 398 current->comm, gfp_mask, order, current->oomkilladj);
384 current->mm ? current->mm->oom_adj : OOM_DISABLE); 399 task_lock(current);
385 cpuset_print_task_mems_allowed(current); 400 cpuset_print_task_mems_allowed(current);
386 task_unlock(current); 401 task_unlock(current);
387 dump_stack(); 402 dump_stack();
@@ -394,9 +409,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
394 /* 409 /*
395 * If the task is already exiting, don't alarm the sysadmin or kill 410 * If the task is already exiting, don't alarm the sysadmin or kill
396 * its children or threads, just set TIF_MEMDIE so it can die quickly 411 * its children or threads, just set TIF_MEMDIE so it can die quickly
397 * if its mm is still attached.
398 */ 412 */
399 if (p->mm && (p->flags & PF_EXITING)) { 413 if (p->flags & PF_EXITING) {
400 __oom_kill_task(p, 0); 414 __oom_kill_task(p, 0);
401 return 0; 415 return 0;
402 } 416 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d052abbe3063..5cc986eb9f6f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2544,7 +2544,6 @@ static void build_zonelists(pg_data_t *pgdat)
2544 prev_node = local_node; 2544 prev_node = local_node;
2545 nodes_clear(used_mask); 2545 nodes_clear(used_mask);
2546 2546
2547 memset(node_load, 0, sizeof(node_load));
2548 memset(node_order, 0, sizeof(node_order)); 2547 memset(node_order, 0, sizeof(node_order));
2549 j = 0; 2548 j = 0;
2550 2549
@@ -2653,6 +2652,9 @@ static int __build_all_zonelists(void *dummy)
2653{ 2652{
2654 int nid; 2653 int nid;
2655 2654
2655#ifdef CONFIG_NUMA
2656 memset(node_load, 0, sizeof(node_load));
2657#endif
2656 for_each_online_node(nid) { 2658 for_each_online_node(nid) {
2657 pg_data_t *pgdat = NODE_DATA(nid); 2659 pg_data_t *pgdat = NODE_DATA(nid);
2658 2660
diff --git a/mm/percpu.c b/mm/percpu.c
index b70f2acd8853..5fe37842e0ea 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -8,12 +8,12 @@
8 * 8 *
9 * This is percpu allocator which can handle both static and dynamic 9 * This is percpu allocator which can handle both static and dynamic
10 * areas. Percpu areas are allocated in chunks in vmalloc area. Each 10 * areas. Percpu areas are allocated in chunks in vmalloc area. Each
11 * chunk is consisted of num_possible_cpus() units and the first chunk 11 * chunk is consisted of nr_cpu_ids units and the first chunk is used
12 * is used for static percpu variables in the kernel image (special 12 * for static percpu variables in the kernel image (special boot time
13 * boot time alloc/init handling necessary as these areas need to be 13 * alloc/init handling necessary as these areas need to be brought up
14 * brought up before allocation services are running). Unit grows as 14 * before allocation services are running). Unit grows as necessary
15 * necessary and all units grow or shrink in unison. When a chunk is 15 * and all units grow or shrink in unison. When a chunk is filled up,
16 * filled up, another chunk is allocated. ie. in vmalloc area 16 * another chunk is allocated. ie. in vmalloc area
17 * 17 *
18 * c0 c1 c2 18 * c0 c1 c2
19 * ------------------- ------------------- ------------ 19 * ------------------- ------------------- ------------
@@ -558,7 +558,7 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
558static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, 558static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
559 bool flush_tlb) 559 bool flush_tlb)
560{ 560{
561 unsigned int last = num_possible_cpus() - 1; 561 unsigned int last = nr_cpu_ids - 1;
562 unsigned int cpu; 562 unsigned int cpu;
563 563
564 /* unmap must not be done on immutable chunk */ 564 /* unmap must not be done on immutable chunk */
@@ -643,7 +643,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size,
643 */ 643 */
644static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) 644static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
645{ 645{
646 unsigned int last = num_possible_cpus() - 1; 646 unsigned int last = nr_cpu_ids - 1;
647 unsigned int cpu; 647 unsigned int cpu;
648 int err; 648 int err;
649 649
@@ -749,7 +749,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
749 chunk->map[chunk->map_used++] = pcpu_unit_size; 749 chunk->map[chunk->map_used++] = pcpu_unit_size;
750 chunk->page = chunk->page_ar; 750 chunk->page = chunk->page_ar;
751 751
752 chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); 752 chunk->vm = get_vm_area(pcpu_chunk_size, VM_ALLOC);
753 if (!chunk->vm) { 753 if (!chunk->vm) {
754 free_pcpu_chunk(chunk); 754 free_pcpu_chunk(chunk);
755 return NULL; 755 return NULL;
@@ -1067,9 +1067,9 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
1067 PFN_UP(size_sum)); 1067 PFN_UP(size_sum));
1068 1068
1069 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; 1069 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
1070 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; 1070 pcpu_chunk_size = nr_cpu_ids * pcpu_unit_size;
1071 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) 1071 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
1072 + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); 1072 + nr_cpu_ids * pcpu_unit_pages * sizeof(struct page *);
1073 1073
1074 if (dyn_size < 0) 1074 if (dyn_size < 0)
1075 dyn_size = pcpu_unit_size - static_size - reserved_size; 1075 dyn_size = pcpu_unit_size - static_size - reserved_size;
@@ -1248,7 +1248,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1248 } else 1248 } else
1249 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); 1249 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
1250 1250
1251 chunk_size = pcpue_unit_size * num_possible_cpus(); 1251 chunk_size = pcpue_unit_size * nr_cpu_ids;
1252 1252
1253 pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, 1253 pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
1254 __pa(MAX_DMA_ADDRESS)); 1254 __pa(MAX_DMA_ADDRESS));
@@ -1259,12 +1259,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1259 } 1259 }
1260 1260
1261 /* return the leftover and copy */ 1261 /* return the leftover and copy */
1262 for_each_possible_cpu(cpu) { 1262 for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
1263 void *ptr = pcpue_ptr + cpu * pcpue_unit_size; 1263 void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
1264 1264
1265 free_bootmem(__pa(ptr + pcpue_size), 1265 if (cpu_possible(cpu)) {
1266 pcpue_unit_size - pcpue_size); 1266 free_bootmem(__pa(ptr + pcpue_size),
1267 memcpy(ptr, __per_cpu_load, static_size); 1267 pcpue_unit_size - pcpue_size);
1268 memcpy(ptr, __per_cpu_load, static_size);
1269 } else
1270 free_bootmem(__pa(ptr), pcpue_unit_size);
1268 } 1271 }
1269 1272
1270 /* we're ready, commit */ 1273 /* we're ready, commit */
diff --git a/mm/rmap.c b/mm/rmap.c
index 836c6c63e1f2..0895b5c7cbff 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -358,6 +358,7 @@ static int page_referenced_one(struct page *page,
358 */ 358 */
359 if (vma->vm_flags & VM_LOCKED) { 359 if (vma->vm_flags & VM_LOCKED) {
360 *mapcount = 1; /* break early from loop */ 360 *mapcount = 1; /* break early from loop */
361 *vm_flags |= VM_LOCKED;
361 goto out_unmap; 362 goto out_unmap;
362 } 363 }
363 364
diff --git a/mm/vmscan.c b/mm/vmscan.c
index dea7abd31098..94e86dd6954c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -630,9 +630,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
630 630
631 referenced = page_referenced(page, 1, 631 referenced = page_referenced(page, 1,
632 sc->mem_cgroup, &vm_flags); 632 sc->mem_cgroup, &vm_flags);
633 /* In active use or really unfreeable? Activate it. */ 633 /*
634 * In active use or really unfreeable? Activate it.
635 * If page which have PG_mlocked lost isoltation race,
636 * try_to_unmap moves it to unevictable list
637 */
634 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && 638 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
635 referenced && page_mapping_inuse(page)) 639 referenced && page_mapping_inuse(page)
640 && !(vm_flags & VM_LOCKED))
636 goto activate_locked; 641 goto activate_locked;
637 642
638 /* 643 /*