aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig4
-rw-r--r--mm/compaction.c20
-rw-r--r--mm/fremap.c28
-rw-r--r--mm/mempolicy.c92
-rw-r--r--mm/migrate.c43
-rw-r--r--mm/mmu_context.c3
-rw-r--r--mm/percpu.c208
-rw-r--r--mm/process_vm_access.c26
-rw-r--r--mm/rmap.c15
9 files changed, 225 insertions, 214 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 2d9f1504d75e..2888024e0b0a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -575,5 +575,5 @@ config PGTABLE_MAPPING
575 then you should select this. This causes zsmalloc to use page table 575 then you should select this. This causes zsmalloc to use page table
576 mapping rather than copying for object mapping. 576 mapping rather than copying for object mapping.
577 577
578 You can check speed with zsmalloc benchmark[1]. 578 You can check speed with zsmalloc benchmark:
579 [1] https://github.com/spartacus06/zsmalloc 579 https://github.com/spartacus06/zsmapbench
diff --git a/mm/compaction.c b/mm/compaction.c
index b48c5259ea33..918577595ea8 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -251,7 +251,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
251{ 251{
252 int nr_scanned = 0, total_isolated = 0; 252 int nr_scanned = 0, total_isolated = 0;
253 struct page *cursor, *valid_page = NULL; 253 struct page *cursor, *valid_page = NULL;
254 unsigned long nr_strict_required = end_pfn - blockpfn;
255 unsigned long flags; 254 unsigned long flags;
256 bool locked = false; 255 bool locked = false;
257 256
@@ -264,11 +263,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
264 263
265 nr_scanned++; 264 nr_scanned++;
266 if (!pfn_valid_within(blockpfn)) 265 if (!pfn_valid_within(blockpfn))
267 continue; 266 goto isolate_fail;
267
268 if (!valid_page) 268 if (!valid_page)
269 valid_page = page; 269 valid_page = page;
270 if (!PageBuddy(page)) 270 if (!PageBuddy(page))
271 continue; 271 goto isolate_fail;
272 272
273 /* 273 /*
274 * The zone lock must be held to isolate freepages. 274 * The zone lock must be held to isolate freepages.
@@ -289,12 +289,10 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
289 289
290 /* Recheck this is a buddy page under lock */ 290 /* Recheck this is a buddy page under lock */
291 if (!PageBuddy(page)) 291 if (!PageBuddy(page))
292 continue; 292 goto isolate_fail;
293 293
294 /* Found a free page, break it into order-0 pages */ 294 /* Found a free page, break it into order-0 pages */
295 isolated = split_free_page(page); 295 isolated = split_free_page(page);
296 if (!isolated && strict)
297 break;
298 total_isolated += isolated; 296 total_isolated += isolated;
299 for (i = 0; i < isolated; i++) { 297 for (i = 0; i < isolated; i++) {
300 list_add(&page->lru, freelist); 298 list_add(&page->lru, freelist);
@@ -305,7 +303,15 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
305 if (isolated) { 303 if (isolated) {
306 blockpfn += isolated - 1; 304 blockpfn += isolated - 1;
307 cursor += isolated - 1; 305 cursor += isolated - 1;
306 continue;
308 } 307 }
308
309isolate_fail:
310 if (strict)
311 break;
312 else
313 continue;
314
309 } 315 }
310 316
311 trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated); 317 trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated);
@@ -315,7 +321,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
315 * pages requested were isolated. If there were any failures, 0 is 321 * pages requested were isolated. If there were any failures, 0 is
316 * returned and CMA will fail. 322 * returned and CMA will fail.
317 */ 323 */
318 if (strict && nr_strict_required > total_isolated) 324 if (strict && blockpfn < end_pfn)
319 total_isolated = 0; 325 total_isolated = 0;
320 326
321 if (locked) 327 if (locked)
diff --git a/mm/fremap.c b/mm/fremap.c
index bbc4d660221a..34feba60a17e 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -23,28 +23,44 @@
23 23
24#include "internal.h" 24#include "internal.h"
25 25
26static int mm_counter(struct page *page)
27{
28 return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES;
29}
30
26static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, 31static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
27 unsigned long addr, pte_t *ptep) 32 unsigned long addr, pte_t *ptep)
28{ 33{
29 pte_t pte = *ptep; 34 pte_t pte = *ptep;
35 struct page *page;
36 swp_entry_t entry;
30 37
31 if (pte_present(pte)) { 38 if (pte_present(pte)) {
32 struct page *page;
33
34 flush_cache_page(vma, addr, pte_pfn(pte)); 39 flush_cache_page(vma, addr, pte_pfn(pte));
35 pte = ptep_clear_flush(vma, addr, ptep); 40 pte = ptep_clear_flush(vma, addr, ptep);
36 page = vm_normal_page(vma, addr, pte); 41 page = vm_normal_page(vma, addr, pte);
37 if (page) { 42 if (page) {
38 if (pte_dirty(pte)) 43 if (pte_dirty(pte))
39 set_page_dirty(page); 44 set_page_dirty(page);
45 update_hiwater_rss(mm);
46 dec_mm_counter(mm, mm_counter(page));
40 page_remove_rmap(page); 47 page_remove_rmap(page);
41 page_cache_release(page); 48 page_cache_release(page);
49 }
50 } else { /* zap_pte() is not called when pte_none() */
51 if (!pte_file(pte)) {
42 update_hiwater_rss(mm); 52 update_hiwater_rss(mm);
43 dec_mm_counter(mm, MM_FILEPAGES); 53 entry = pte_to_swp_entry(pte);
54 if (non_swap_entry(entry)) {
55 if (is_migration_entry(entry)) {
56 page = migration_entry_to_page(entry);
57 dec_mm_counter(mm, mm_counter(page));
58 }
59 } else {
60 free_swap_and_cache(entry);
61 dec_mm_counter(mm, MM_SWAPENTS);
62 }
44 } 63 }
45 } else {
46 if (!pte_file(pte))
47 free_swap_and_cache(pte_to_swp_entry(pte));
48 pte_clear_not_present_full(mm, addr, ptep, 0); 64 pte_clear_not_present_full(mm, addr, ptep, 0);
49 } 65 }
50} 66}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ae3c8f3595d4..4755c8576942 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1556,10 +1556,10 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
1556 1556
1557#ifdef CONFIG_COMPAT 1557#ifdef CONFIG_COMPAT
1558 1558
1559asmlinkage long compat_sys_get_mempolicy(int __user *policy, 1559COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
1560 compat_ulong_t __user *nmask, 1560 compat_ulong_t __user *, nmask,
1561 compat_ulong_t maxnode, 1561 compat_ulong_t, maxnode,
1562 compat_ulong_t addr, compat_ulong_t flags) 1562 compat_ulong_t, addr, compat_ulong_t, flags)
1563{ 1563{
1564 long err; 1564 long err;
1565 unsigned long __user *nm = NULL; 1565 unsigned long __user *nm = NULL;
@@ -1586,8 +1586,8 @@ asmlinkage long compat_sys_get_mempolicy(int __user *policy,
1586 return err; 1586 return err;
1587} 1587}
1588 1588
1589asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, 1589COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
1590 compat_ulong_t maxnode) 1590 compat_ulong_t, maxnode)
1591{ 1591{
1592 long err = 0; 1592 long err = 0;
1593 unsigned long __user *nm = NULL; 1593 unsigned long __user *nm = NULL;
@@ -1609,9 +1609,9 @@ asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask,
1609 return sys_set_mempolicy(mode, nm, nr_bits+1); 1609 return sys_set_mempolicy(mode, nm, nr_bits+1);
1610} 1610}
1611 1611
1612asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, 1612COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
1613 compat_ulong_t mode, compat_ulong_t __user *nmask, 1613 compat_ulong_t, mode, compat_ulong_t __user *, nmask,
1614 compat_ulong_t maxnode, compat_ulong_t flags) 1614 compat_ulong_t, maxnode, compat_ulong_t, flags)
1615{ 1615{
1616 long err = 0; 1616 long err = 0;
1617 unsigned long __user *nm = NULL; 1617 unsigned long __user *nm = NULL;
@@ -2301,35 +2301,6 @@ static void sp_free(struct sp_node *n)
2301 kmem_cache_free(sn_cache, n); 2301 kmem_cache_free(sn_cache, n);
2302} 2302}
2303 2303
2304#ifdef CONFIG_NUMA_BALANCING
2305static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
2306{
2307 /* Never defer a private fault */
2308 if (cpupid_match_pid(p, last_cpupid))
2309 return false;
2310
2311 if (p->numa_migrate_deferred) {
2312 p->numa_migrate_deferred--;
2313 return true;
2314 }
2315 return false;
2316}
2317
2318static inline void defer_numa_migrate(struct task_struct *p)
2319{
2320 p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred;
2321}
2322#else
2323static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
2324{
2325 return false;
2326}
2327
2328static inline void defer_numa_migrate(struct task_struct *p)
2329{
2330}
2331#endif /* CONFIG_NUMA_BALANCING */
2332
2333/** 2304/**
2334 * mpol_misplaced - check whether current page node is valid in policy 2305 * mpol_misplaced - check whether current page node is valid in policy
2335 * 2306 *
@@ -2403,52 +2374,9 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
2403 2374
2404 /* Migrate the page towards the node whose CPU is referencing it */ 2375 /* Migrate the page towards the node whose CPU is referencing it */
2405 if (pol->flags & MPOL_F_MORON) { 2376 if (pol->flags & MPOL_F_MORON) {
2406 int last_cpupid;
2407 int this_cpupid;
2408
2409 polnid = thisnid; 2377 polnid = thisnid;
2410 this_cpupid = cpu_pid_to_cpupid(thiscpu, current->pid);
2411
2412 /*
2413 * Multi-stage node selection is used in conjunction
2414 * with a periodic migration fault to build a temporal
2415 * task<->page relation. By using a two-stage filter we
2416 * remove short/unlikely relations.
2417 *
2418 * Using P(p) ~ n_p / n_t as per frequentist
2419 * probability, we can equate a task's usage of a
2420 * particular page (n_p) per total usage of this
2421 * page (n_t) (in a given time-span) to a probability.
2422 *
2423 * Our periodic faults will sample this probability and
2424 * getting the same result twice in a row, given these
2425 * samples are fully independent, is then given by
2426 * P(n)^2, provided our sample period is sufficiently
2427 * short compared to the usage pattern.
2428 *
2429 * This quadric squishes small probabilities, making
2430 * it less likely we act on an unlikely task<->page
2431 * relation.
2432 */
2433 last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
2434 if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) {
2435 2378
2436 /* See sysctl_numa_balancing_migrate_deferred comment */ 2379 if (!should_numa_migrate_memory(current, page, curnid, thiscpu))
2437 if (!cpupid_match_pid(current, last_cpupid))
2438 defer_numa_migrate(current);
2439
2440 goto out;
2441 }
2442
2443 /*
2444 * The quadratic filter above reduces extraneous migration
2445 * of shared pages somewhat. This code reduces it even more,
2446 * reducing the overhead of page migrations of shared pages.
2447 * This makes workloads with shared pages rely more on
2448 * "move task near its memory", and less on "move memory
2449 * towards its task", which is exactly what we want.
2450 */
2451 if (numa_migrate_deferred(current, last_cpupid))
2452 goto out; 2380 goto out;
2453 } 2381 }
2454 2382
diff --git a/mm/migrate.c b/mm/migrate.c
index 482a33d89134..bed48809e5d0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -178,6 +178,37 @@ out:
178} 178}
179 179
180/* 180/*
181 * Congratulations to trinity for discovering this bug.
182 * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
183 * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
184 * replace the specified range by file ptes throughout (maybe populated after).
185 * If page migration finds a page within that range, while it's still located
186 * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
187 * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
188 * But if the migrating page is in a part of the vma outside the range to be
189 * remapped, then it will not be cleared, and remove_migration_ptes() needs to
190 * deal with it. Fortunately, this part of the vma is of course still linear,
191 * so we just need to use linear location on the nonlinear list.
192 */
193static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
194 struct address_space *mapping, void *arg)
195{
196 struct vm_area_struct *vma;
197 /* hugetlbfs does not support remap_pages, so no huge pgoff worries */
198 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
199 unsigned long addr;
200
201 list_for_each_entry(vma,
202 &mapping->i_mmap_nonlinear, shared.nonlinear) {
203
204 addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
205 if (addr >= vma->vm_start && addr < vma->vm_end)
206 remove_migration_pte(page, vma, addr, arg);
207 }
208 return SWAP_AGAIN;
209}
210
211/*
181 * Get rid of all migration entries and replace them by 212 * Get rid of all migration entries and replace them by
182 * references to the indicated page. 213 * references to the indicated page.
183 */ 214 */
@@ -186,6 +217,7 @@ static void remove_migration_ptes(struct page *old, struct page *new)
186 struct rmap_walk_control rwc = { 217 struct rmap_walk_control rwc = {
187 .rmap_one = remove_migration_pte, 218 .rmap_one = remove_migration_pte,
188 .arg = old, 219 .arg = old,
220 .file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
189 }; 221 };
190 222
191 rmap_walk(new, &rwc); 223 rmap_walk(new, &rwc);
@@ -1158,7 +1190,7 @@ static struct page *new_page_node(struct page *p, unsigned long private,
1158 pm->node); 1190 pm->node);
1159 else 1191 else
1160 return alloc_pages_exact_node(pm->node, 1192 return alloc_pages_exact_node(pm->node,
1161 GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); 1193 GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
1162} 1194}
1163 1195
1164/* 1196/*
@@ -1544,9 +1576,9 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
1544 struct page *newpage; 1576 struct page *newpage;
1545 1577
1546 newpage = alloc_pages_exact_node(nid, 1578 newpage = alloc_pages_exact_node(nid,
1547 (GFP_HIGHUSER_MOVABLE | GFP_THISNODE | 1579 (GFP_HIGHUSER_MOVABLE |
1548 __GFP_NOMEMALLOC | __GFP_NORETRY | 1580 __GFP_THISNODE | __GFP_NOMEMALLOC |
1549 __GFP_NOWARN) & 1581 __GFP_NORETRY | __GFP_NOWARN) &
1550 ~GFP_IOFS, 0); 1582 ~GFP_IOFS, 0);
1551 1583
1552 return newpage; 1584 return newpage;
@@ -1747,7 +1779,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1747 goto out_dropref; 1779 goto out_dropref;
1748 1780
1749 new_page = alloc_pages_node(node, 1781 new_page = alloc_pages_node(node,
1750 (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); 1782 (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
1783 HPAGE_PMD_ORDER);
1751 if (!new_page) 1784 if (!new_page)
1752 goto out_fail; 1785 goto out_fail;
1753 1786
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 8a8cd0265e52..f802c2d216a7 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -31,6 +31,9 @@ void use_mm(struct mm_struct *mm)
31 tsk->mm = mm; 31 tsk->mm = mm;
32 switch_mm(active_mm, mm, tsk); 32 switch_mm(active_mm, mm, tsk);
33 task_unlock(tsk); 33 task_unlock(tsk);
34#ifdef finish_arch_post_lock_switch
35 finish_arch_post_lock_switch();
36#endif
34 37
35 if (active_mm != mm) 38 if (active_mm != mm)
36 mmdrop(active_mm); 39 mmdrop(active_mm);
diff --git a/mm/percpu.c b/mm/percpu.c
index 036cfe07050f..63e24fb4387b 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -102,10 +102,11 @@ struct pcpu_chunk {
102 int free_size; /* free bytes in the chunk */ 102 int free_size; /* free bytes in the chunk */
103 int contig_hint; /* max contiguous size hint */ 103 int contig_hint; /* max contiguous size hint */
104 void *base_addr; /* base address of this chunk */ 104 void *base_addr; /* base address of this chunk */
105 int map_used; /* # of map entries used */ 105 int map_used; /* # of map entries used before the sentry */
106 int map_alloc; /* # of map entries allocated */ 106 int map_alloc; /* # of map entries allocated */
107 int *map; /* allocation map */ 107 int *map; /* allocation map */
108 void *data; /* chunk data */ 108 void *data; /* chunk data */
109 int first_free; /* no free below this */
109 bool immutable; /* no [de]population allowed */ 110 bool immutable; /* no [de]population allowed */
110 unsigned long populated[]; /* populated bitmap */ 111 unsigned long populated[]; /* populated bitmap */
111}; 112};
@@ -356,11 +357,11 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
356{ 357{
357 int new_alloc; 358 int new_alloc;
358 359
359 if (chunk->map_alloc >= chunk->map_used + 2) 360 if (chunk->map_alloc >= chunk->map_used + 3)
360 return 0; 361 return 0;
361 362
362 new_alloc = PCPU_DFL_MAP_ALLOC; 363 new_alloc = PCPU_DFL_MAP_ALLOC;
363 while (new_alloc < chunk->map_used + 2) 364 while (new_alloc < chunk->map_used + 3)
364 new_alloc *= 2; 365 new_alloc *= 2;
365 366
366 return new_alloc; 367 return new_alloc;
@@ -418,48 +419,6 @@ out_unlock:
418} 419}
419 420
420/** 421/**
421 * pcpu_split_block - split a map block
422 * @chunk: chunk of interest
423 * @i: index of map block to split
424 * @head: head size in bytes (can be 0)
425 * @tail: tail size in bytes (can be 0)
426 *
427 * Split the @i'th map block into two or three blocks. If @head is
428 * non-zero, @head bytes block is inserted before block @i moving it
429 * to @i+1 and reducing its size by @head bytes.
430 *
431 * If @tail is non-zero, the target block, which can be @i or @i+1
432 * depending on @head, is reduced by @tail bytes and @tail byte block
433 * is inserted after the target block.
434 *
435 * @chunk->map must have enough free slots to accommodate the split.
436 *
437 * CONTEXT:
438 * pcpu_lock.
439 */
440static void pcpu_split_block(struct pcpu_chunk *chunk, int i,
441 int head, int tail)
442{
443 int nr_extra = !!head + !!tail;
444
445 BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra);
446
447 /* insert new subblocks */
448 memmove(&chunk->map[i + nr_extra], &chunk->map[i],
449 sizeof(chunk->map[0]) * (chunk->map_used - i));
450 chunk->map_used += nr_extra;
451
452 if (head) {
453 chunk->map[i + 1] = chunk->map[i] - head;
454 chunk->map[i++] = head;
455 }
456 if (tail) {
457 chunk->map[i++] -= tail;
458 chunk->map[i] = tail;
459 }
460}
461
462/**
463 * pcpu_alloc_area - allocate area from a pcpu_chunk 422 * pcpu_alloc_area - allocate area from a pcpu_chunk
464 * @chunk: chunk of interest 423 * @chunk: chunk of interest
465 * @size: wanted size in bytes 424 * @size: wanted size in bytes
@@ -483,19 +442,27 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
483 int oslot = pcpu_chunk_slot(chunk); 442 int oslot = pcpu_chunk_slot(chunk);
484 int max_contig = 0; 443 int max_contig = 0;
485 int i, off; 444 int i, off;
445 bool seen_free = false;
446 int *p;
486 447
487 for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { 448 for (i = chunk->first_free, p = chunk->map + i; i < chunk->map_used; i++, p++) {
488 bool is_last = i + 1 == chunk->map_used;
489 int head, tail; 449 int head, tail;
450 int this_size;
451
452 off = *p;
453 if (off & 1)
454 continue;
490 455
491 /* extra for alignment requirement */ 456 /* extra for alignment requirement */
492 head = ALIGN(off, align) - off; 457 head = ALIGN(off, align) - off;
493 BUG_ON(i == 0 && head != 0);
494 458
495 if (chunk->map[i] < 0) 459 this_size = (p[1] & ~1) - off;
496 continue; 460 if (this_size < head + size) {
497 if (chunk->map[i] < head + size) { 461 if (!seen_free) {
498 max_contig = max(chunk->map[i], max_contig); 462 chunk->first_free = i;
463 seen_free = true;
464 }
465 max_contig = max(this_size, max_contig);
499 continue; 466 continue;
500 } 467 }
501 468
@@ -505,44 +472,59 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
505 * than sizeof(int), which is very small but isn't too 472 * than sizeof(int), which is very small but isn't too
506 * uncommon for percpu allocations. 473 * uncommon for percpu allocations.
507 */ 474 */
508 if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) { 475 if (head && (head < sizeof(int) || !(p[-1] & 1))) {
509 if (chunk->map[i - 1] > 0) 476 *p = off += head;
510 chunk->map[i - 1] += head; 477 if (p[-1] & 1)
511 else {
512 chunk->map[i - 1] -= head;
513 chunk->free_size -= head; 478 chunk->free_size -= head;
514 } 479 else
515 chunk->map[i] -= head; 480 max_contig = max(*p - p[-1], max_contig);
516 off += head; 481 this_size -= head;
517 head = 0; 482 head = 0;
518 } 483 }
519 484
520 /* if tail is small, just keep it around */ 485 /* if tail is small, just keep it around */
521 tail = chunk->map[i] - head - size; 486 tail = this_size - head - size;
522 if (tail < sizeof(int)) 487 if (tail < sizeof(int)) {
523 tail = 0; 488 tail = 0;
489 size = this_size - head;
490 }
524 491
525 /* split if warranted */ 492 /* split if warranted */
526 if (head || tail) { 493 if (head || tail) {
527 pcpu_split_block(chunk, i, head, tail); 494 int nr_extra = !!head + !!tail;
495
496 /* insert new subblocks */
497 memmove(p + nr_extra + 1, p + 1,
498 sizeof(chunk->map[0]) * (chunk->map_used - i));
499 chunk->map_used += nr_extra;
500
528 if (head) { 501 if (head) {
529 i++; 502 if (!seen_free) {
530 off += head; 503 chunk->first_free = i;
531 max_contig = max(chunk->map[i - 1], max_contig); 504 seen_free = true;
505 }
506 *++p = off += head;
507 ++i;
508 max_contig = max(head, max_contig);
509 }
510 if (tail) {
511 p[1] = off + size;
512 max_contig = max(tail, max_contig);
532 } 513 }
533 if (tail)
534 max_contig = max(chunk->map[i + 1], max_contig);
535 } 514 }
536 515
516 if (!seen_free)
517 chunk->first_free = i + 1;
518
537 /* update hint and mark allocated */ 519 /* update hint and mark allocated */
538 if (is_last) 520 if (i + 1 == chunk->map_used)
539 chunk->contig_hint = max_contig; /* fully scanned */ 521 chunk->contig_hint = max_contig; /* fully scanned */
540 else 522 else
541 chunk->contig_hint = max(chunk->contig_hint, 523 chunk->contig_hint = max(chunk->contig_hint,
542 max_contig); 524 max_contig);
543 525
544 chunk->free_size -= chunk->map[i]; 526 chunk->free_size -= size;
545 chunk->map[i] = -chunk->map[i]; 527 *p |= 1;
546 528
547 pcpu_chunk_relocate(chunk, oslot); 529 pcpu_chunk_relocate(chunk, oslot);
548 return off; 530 return off;
@@ -570,34 +552,50 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
570static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) 552static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
571{ 553{
572 int oslot = pcpu_chunk_slot(chunk); 554 int oslot = pcpu_chunk_slot(chunk);
573 int i, off; 555 int off = 0;
574 556 unsigned i, j;
575 for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) 557 int to_free = 0;
576 if (off == freeme) 558 int *p;
577 break; 559
560 freeme |= 1; /* we are searching for <given offset, in use> pair */
561
562 i = 0;
563 j = chunk->map_used;
564 while (i != j) {
565 unsigned k = (i + j) / 2;
566 off = chunk->map[k];
567 if (off < freeme)
568 i = k + 1;
569 else if (off > freeme)
570 j = k;
571 else
572 i = j = k;
573 }
578 BUG_ON(off != freeme); 574 BUG_ON(off != freeme);
579 BUG_ON(chunk->map[i] > 0);
580 575
581 chunk->map[i] = -chunk->map[i]; 576 if (i < chunk->first_free)
582 chunk->free_size += chunk->map[i]; 577 chunk->first_free = i;
583 578
579 p = chunk->map + i;
580 *p = off &= ~1;
581 chunk->free_size += (p[1] & ~1) - off;
582
583 /* merge with next? */
584 if (!(p[1] & 1))
585 to_free++;
584 /* merge with previous? */ 586 /* merge with previous? */
585 if (i > 0 && chunk->map[i - 1] >= 0) { 587 if (i > 0 && !(p[-1] & 1)) {
586 chunk->map[i - 1] += chunk->map[i]; 588 to_free++;
587 chunk->map_used--;
588 memmove(&chunk->map[i], &chunk->map[i + 1],
589 (chunk->map_used - i) * sizeof(chunk->map[0]));
590 i--; 589 i--;
590 p--;
591 } 591 }
592 /* merge with next? */ 592 if (to_free) {
593 if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) { 593 chunk->map_used -= to_free;
594 chunk->map[i] += chunk->map[i + 1]; 594 memmove(p + 1, p + 1 + to_free,
595 chunk->map_used--; 595 (chunk->map_used - i) * sizeof(chunk->map[0]));
596 memmove(&chunk->map[i + 1], &chunk->map[i + 2],
597 (chunk->map_used - (i + 1)) * sizeof(chunk->map[0]));
598 } 596 }
599 597
600 chunk->contig_hint = max(chunk->map[i], chunk->contig_hint); 598 chunk->contig_hint = max(chunk->map[i + 1] - chunk->map[i] - 1, chunk->contig_hint);
601 pcpu_chunk_relocate(chunk, oslot); 599 pcpu_chunk_relocate(chunk, oslot);
602} 600}
603 601
@@ -617,7 +615,9 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
617 } 615 }
618 616
619 chunk->map_alloc = PCPU_DFL_MAP_ALLOC; 617 chunk->map_alloc = PCPU_DFL_MAP_ALLOC;
620 chunk->map[chunk->map_used++] = pcpu_unit_size; 618 chunk->map[0] = 0;
619 chunk->map[1] = pcpu_unit_size | 1;
620 chunk->map_used = 1;
621 621
622 INIT_LIST_HEAD(&chunk->list); 622 INIT_LIST_HEAD(&chunk->list);
623 chunk->free_size = pcpu_unit_size; 623 chunk->free_size = pcpu_unit_size;
@@ -713,6 +713,16 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
713 unsigned long flags; 713 unsigned long flags;
714 void __percpu *ptr; 714 void __percpu *ptr;
715 715
716 /*
717 * We want the lowest bit of offset available for in-use/free
718 * indicator, so force >= 16bit alignment and make size even.
719 */
720 if (unlikely(align < 2))
721 align = 2;
722
723 if (unlikely(size & 1))
724 size++;
725
716 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { 726 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
717 WARN(true, "illegal size (%zu) or align (%zu) for " 727 WARN(true, "illegal size (%zu) or align (%zu) for "
718 "percpu allocation\n", size, align); 728 "percpu allocation\n", size, align);
@@ -1343,9 +1353,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1343 } 1353 }
1344 schunk->contig_hint = schunk->free_size; 1354 schunk->contig_hint = schunk->free_size;
1345 1355
1346 schunk->map[schunk->map_used++] = -ai->static_size; 1356 schunk->map[0] = 1;
1357 schunk->map[1] = ai->static_size;
1358 schunk->map_used = 1;
1347 if (schunk->free_size) 1359 if (schunk->free_size)
1348 schunk->map[schunk->map_used++] = schunk->free_size; 1360 schunk->map[++schunk->map_used] = 1 | (ai->static_size + schunk->free_size);
1361 else
1362 schunk->map[1] |= 1;
1349 1363
1350 /* init dynamic chunk if necessary */ 1364 /* init dynamic chunk if necessary */
1351 if (dyn_size) { 1365 if (dyn_size) {
@@ -1358,8 +1372,10 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1358 bitmap_fill(dchunk->populated, pcpu_unit_pages); 1372 bitmap_fill(dchunk->populated, pcpu_unit_pages);
1359 1373
1360 dchunk->contig_hint = dchunk->free_size = dyn_size; 1374 dchunk->contig_hint = dchunk->free_size = dyn_size;
1361 dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; 1375 dchunk->map[0] = 1;
1362 dchunk->map[dchunk->map_used++] = dchunk->free_size; 1376 dchunk->map[1] = pcpu_reserved_chunk_limit;
1377 dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1;
1378 dchunk->map_used = 2;
1363 } 1379 }
1364 1380
1365 /* link the first chunk in */ 1381 /* link the first chunk in */
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index fd26d0433509..3c5cf68566ec 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -456,25 +456,23 @@ free_iovecs:
456 return rc; 456 return rc;
457} 457}
458 458
459asmlinkage ssize_t 459COMPAT_SYSCALL_DEFINE6(process_vm_readv, compat_pid_t, pid,
460compat_sys_process_vm_readv(compat_pid_t pid, 460 const struct compat_iovec __user *, lvec,
461 const struct compat_iovec __user *lvec, 461 compat_ulong_t, liovcnt,
462 unsigned long liovcnt, 462 const struct compat_iovec __user *, rvec,
463 const struct compat_iovec __user *rvec, 463 compat_ulong_t, riovcnt,
464 unsigned long riovcnt, 464 compat_ulong_t, flags)
465 unsigned long flags)
466{ 465{
467 return compat_process_vm_rw(pid, lvec, liovcnt, rvec, 466 return compat_process_vm_rw(pid, lvec, liovcnt, rvec,
468 riovcnt, flags, 0); 467 riovcnt, flags, 0);
469} 468}
470 469
471asmlinkage ssize_t 470COMPAT_SYSCALL_DEFINE6(process_vm_writev, compat_pid_t, pid,
472compat_sys_process_vm_writev(compat_pid_t pid, 471 const struct compat_iovec __user *, lvec,
473 const struct compat_iovec __user *lvec, 472 compat_ulong_t, liovcnt,
474 unsigned long liovcnt, 473 const struct compat_iovec __user *, rvec,
475 const struct compat_iovec __user *rvec, 474 compat_ulong_t, riovcnt,
476 unsigned long riovcnt, 475 compat_ulong_t, flags)
477 unsigned long flags)
478{ 476{
479 return compat_process_vm_rw(pid, lvec, liovcnt, rvec, 477 return compat_process_vm_rw(pid, lvec, liovcnt, rvec,
480 riovcnt, flags, 1); 478 riovcnt, flags, 1);
diff --git a/mm/rmap.c b/mm/rmap.c
index d9d42316a99a..11cf322f8133 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1165,6 +1165,16 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1165 } 1165 }
1166 set_pte_at(mm, address, pte, 1166 set_pte_at(mm, address, pte,
1167 swp_entry_to_pte(make_hwpoison_entry(page))); 1167 swp_entry_to_pte(make_hwpoison_entry(page)));
1168 } else if (pte_unused(pteval)) {
1169 /*
1170 * The guest indicated that the page content is of no
1171 * interest anymore. Simply discard the pte, vmscan
1172 * will take care of the rest.
1173 */
1174 if (PageAnon(page))
1175 dec_mm_counter(mm, MM_ANONPAGES);
1176 else
1177 dec_mm_counter(mm, MM_FILEPAGES);
1168 } else if (PageAnon(page)) { 1178 } else if (PageAnon(page)) {
1169 swp_entry_t entry = { .val = page_private(page) }; 1179 swp_entry_t entry = { .val = page_private(page) };
1170 pte_t swp_pte; 1180 pte_t swp_pte;
@@ -1360,8 +1370,9 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1360} 1370}
1361 1371
1362static int try_to_unmap_nonlinear(struct page *page, 1372static int try_to_unmap_nonlinear(struct page *page,
1363 struct address_space *mapping, struct vm_area_struct *vma) 1373 struct address_space *mapping, void *arg)
1364{ 1374{
1375 struct vm_area_struct *vma;
1365 int ret = SWAP_AGAIN; 1376 int ret = SWAP_AGAIN;
1366 unsigned long cursor; 1377 unsigned long cursor;
1367 unsigned long max_nl_cursor = 0; 1378 unsigned long max_nl_cursor = 0;
@@ -1663,7 +1674,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
1663 if (list_empty(&mapping->i_mmap_nonlinear)) 1674 if (list_empty(&mapping->i_mmap_nonlinear))
1664 goto done; 1675 goto done;
1665 1676
1666 ret = rwc->file_nonlinear(page, mapping, vma); 1677 ret = rwc->file_nonlinear(page, mapping, rwc->arg);
1667 1678
1668done: 1679done:
1669 mutex_unlock(&mapping->i_mmap_mutex); 1680 mutex_unlock(&mapping->i_mmap_mutex);