diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 4 | ||||
-rw-r--r-- | mm/compaction.c | 20 | ||||
-rw-r--r-- | mm/fremap.c | 28 | ||||
-rw-r--r-- | mm/mempolicy.c | 92 | ||||
-rw-r--r-- | mm/migrate.c | 43 | ||||
-rw-r--r-- | mm/mmu_context.c | 3 | ||||
-rw-r--r-- | mm/percpu.c | 208 | ||||
-rw-r--r-- | mm/process_vm_access.c | 26 | ||||
-rw-r--r-- | mm/rmap.c | 15 |
9 files changed, 225 insertions, 214 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 2d9f1504d75e..2888024e0b0a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -575,5 +575,5 @@ config PGTABLE_MAPPING | |||
575 | then you should select this. This causes zsmalloc to use page table | 575 | then you should select this. This causes zsmalloc to use page table |
576 | mapping rather than copying for object mapping. | 576 | mapping rather than copying for object mapping. |
577 | 577 | ||
578 | You can check speed with zsmalloc benchmark[1]. | 578 | You can check speed with zsmalloc benchmark: |
579 | [1] https://github.com/spartacus06/zsmalloc | 579 | https://github.com/spartacus06/zsmapbench |
diff --git a/mm/compaction.c b/mm/compaction.c index b48c5259ea33..918577595ea8 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -251,7 +251,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, | |||
251 | { | 251 | { |
252 | int nr_scanned = 0, total_isolated = 0; | 252 | int nr_scanned = 0, total_isolated = 0; |
253 | struct page *cursor, *valid_page = NULL; | 253 | struct page *cursor, *valid_page = NULL; |
254 | unsigned long nr_strict_required = end_pfn - blockpfn; | ||
255 | unsigned long flags; | 254 | unsigned long flags; |
256 | bool locked = false; | 255 | bool locked = false; |
257 | 256 | ||
@@ -264,11 +263,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, | |||
264 | 263 | ||
265 | nr_scanned++; | 264 | nr_scanned++; |
266 | if (!pfn_valid_within(blockpfn)) | 265 | if (!pfn_valid_within(blockpfn)) |
267 | continue; | 266 | goto isolate_fail; |
267 | |||
268 | if (!valid_page) | 268 | if (!valid_page) |
269 | valid_page = page; | 269 | valid_page = page; |
270 | if (!PageBuddy(page)) | 270 | if (!PageBuddy(page)) |
271 | continue; | 271 | goto isolate_fail; |
272 | 272 | ||
273 | /* | 273 | /* |
274 | * The zone lock must be held to isolate freepages. | 274 | * The zone lock must be held to isolate freepages. |
@@ -289,12 +289,10 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, | |||
289 | 289 | ||
290 | /* Recheck this is a buddy page under lock */ | 290 | /* Recheck this is a buddy page under lock */ |
291 | if (!PageBuddy(page)) | 291 | if (!PageBuddy(page)) |
292 | continue; | 292 | goto isolate_fail; |
293 | 293 | ||
294 | /* Found a free page, break it into order-0 pages */ | 294 | /* Found a free page, break it into order-0 pages */ |
295 | isolated = split_free_page(page); | 295 | isolated = split_free_page(page); |
296 | if (!isolated && strict) | ||
297 | break; | ||
298 | total_isolated += isolated; | 296 | total_isolated += isolated; |
299 | for (i = 0; i < isolated; i++) { | 297 | for (i = 0; i < isolated; i++) { |
300 | list_add(&page->lru, freelist); | 298 | list_add(&page->lru, freelist); |
@@ -305,7 +303,15 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, | |||
305 | if (isolated) { | 303 | if (isolated) { |
306 | blockpfn += isolated - 1; | 304 | blockpfn += isolated - 1; |
307 | cursor += isolated - 1; | 305 | cursor += isolated - 1; |
306 | continue; | ||
308 | } | 307 | } |
308 | |||
309 | isolate_fail: | ||
310 | if (strict) | ||
311 | break; | ||
312 | else | ||
313 | continue; | ||
314 | |||
309 | } | 315 | } |
310 | 316 | ||
311 | trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated); | 317 | trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated); |
@@ -315,7 +321,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, | |||
315 | * pages requested were isolated. If there were any failures, 0 is | 321 | * pages requested were isolated. If there were any failures, 0 is |
316 | * returned and CMA will fail. | 322 | * returned and CMA will fail. |
317 | */ | 323 | */ |
318 | if (strict && nr_strict_required > total_isolated) | 324 | if (strict && blockpfn < end_pfn) |
319 | total_isolated = 0; | 325 | total_isolated = 0; |
320 | 326 | ||
321 | if (locked) | 327 | if (locked) |
diff --git a/mm/fremap.c b/mm/fremap.c index bbc4d660221a..34feba60a17e 100644 --- a/mm/fremap.c +++ b/mm/fremap.c | |||
@@ -23,28 +23,44 @@ | |||
23 | 23 | ||
24 | #include "internal.h" | 24 | #include "internal.h" |
25 | 25 | ||
26 | static int mm_counter(struct page *page) | ||
27 | { | ||
28 | return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES; | ||
29 | } | ||
30 | |||
26 | static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, | 31 | static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, |
27 | unsigned long addr, pte_t *ptep) | 32 | unsigned long addr, pte_t *ptep) |
28 | { | 33 | { |
29 | pte_t pte = *ptep; | 34 | pte_t pte = *ptep; |
35 | struct page *page; | ||
36 | swp_entry_t entry; | ||
30 | 37 | ||
31 | if (pte_present(pte)) { | 38 | if (pte_present(pte)) { |
32 | struct page *page; | ||
33 | |||
34 | flush_cache_page(vma, addr, pte_pfn(pte)); | 39 | flush_cache_page(vma, addr, pte_pfn(pte)); |
35 | pte = ptep_clear_flush(vma, addr, ptep); | 40 | pte = ptep_clear_flush(vma, addr, ptep); |
36 | page = vm_normal_page(vma, addr, pte); | 41 | page = vm_normal_page(vma, addr, pte); |
37 | if (page) { | 42 | if (page) { |
38 | if (pte_dirty(pte)) | 43 | if (pte_dirty(pte)) |
39 | set_page_dirty(page); | 44 | set_page_dirty(page); |
45 | update_hiwater_rss(mm); | ||
46 | dec_mm_counter(mm, mm_counter(page)); | ||
40 | page_remove_rmap(page); | 47 | page_remove_rmap(page); |
41 | page_cache_release(page); | 48 | page_cache_release(page); |
49 | } | ||
50 | } else { /* zap_pte() is not called when pte_none() */ | ||
51 | if (!pte_file(pte)) { | ||
42 | update_hiwater_rss(mm); | 52 | update_hiwater_rss(mm); |
43 | dec_mm_counter(mm, MM_FILEPAGES); | 53 | entry = pte_to_swp_entry(pte); |
54 | if (non_swap_entry(entry)) { | ||
55 | if (is_migration_entry(entry)) { | ||
56 | page = migration_entry_to_page(entry); | ||
57 | dec_mm_counter(mm, mm_counter(page)); | ||
58 | } | ||
59 | } else { | ||
60 | free_swap_and_cache(entry); | ||
61 | dec_mm_counter(mm, MM_SWAPENTS); | ||
62 | } | ||
44 | } | 63 | } |
45 | } else { | ||
46 | if (!pte_file(pte)) | ||
47 | free_swap_and_cache(pte_to_swp_entry(pte)); | ||
48 | pte_clear_not_present_full(mm, addr, ptep, 0); | 64 | pte_clear_not_present_full(mm, addr, ptep, 0); |
49 | } | 65 | } |
50 | } | 66 | } |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ae3c8f3595d4..4755c8576942 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1556,10 +1556,10 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, | |||
1556 | 1556 | ||
1557 | #ifdef CONFIG_COMPAT | 1557 | #ifdef CONFIG_COMPAT |
1558 | 1558 | ||
1559 | asmlinkage long compat_sys_get_mempolicy(int __user *policy, | 1559 | COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, |
1560 | compat_ulong_t __user *nmask, | 1560 | compat_ulong_t __user *, nmask, |
1561 | compat_ulong_t maxnode, | 1561 | compat_ulong_t, maxnode, |
1562 | compat_ulong_t addr, compat_ulong_t flags) | 1562 | compat_ulong_t, addr, compat_ulong_t, flags) |
1563 | { | 1563 | { |
1564 | long err; | 1564 | long err; |
1565 | unsigned long __user *nm = NULL; | 1565 | unsigned long __user *nm = NULL; |
@@ -1586,8 +1586,8 @@ asmlinkage long compat_sys_get_mempolicy(int __user *policy, | |||
1586 | return err; | 1586 | return err; |
1587 | } | 1587 | } |
1588 | 1588 | ||
1589 | asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, | 1589 | COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask, |
1590 | compat_ulong_t maxnode) | 1590 | compat_ulong_t, maxnode) |
1591 | { | 1591 | { |
1592 | long err = 0; | 1592 | long err = 0; |
1593 | unsigned long __user *nm = NULL; | 1593 | unsigned long __user *nm = NULL; |
@@ -1609,9 +1609,9 @@ asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, | |||
1609 | return sys_set_mempolicy(mode, nm, nr_bits+1); | 1609 | return sys_set_mempolicy(mode, nm, nr_bits+1); |
1610 | } | 1610 | } |
1611 | 1611 | ||
1612 | asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, | 1612 | COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, |
1613 | compat_ulong_t mode, compat_ulong_t __user *nmask, | 1613 | compat_ulong_t, mode, compat_ulong_t __user *, nmask, |
1614 | compat_ulong_t maxnode, compat_ulong_t flags) | 1614 | compat_ulong_t, maxnode, compat_ulong_t, flags) |
1615 | { | 1615 | { |
1616 | long err = 0; | 1616 | long err = 0; |
1617 | unsigned long __user *nm = NULL; | 1617 | unsigned long __user *nm = NULL; |
@@ -2301,35 +2301,6 @@ static void sp_free(struct sp_node *n) | |||
2301 | kmem_cache_free(sn_cache, n); | 2301 | kmem_cache_free(sn_cache, n); |
2302 | } | 2302 | } |
2303 | 2303 | ||
2304 | #ifdef CONFIG_NUMA_BALANCING | ||
2305 | static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) | ||
2306 | { | ||
2307 | /* Never defer a private fault */ | ||
2308 | if (cpupid_match_pid(p, last_cpupid)) | ||
2309 | return false; | ||
2310 | |||
2311 | if (p->numa_migrate_deferred) { | ||
2312 | p->numa_migrate_deferred--; | ||
2313 | return true; | ||
2314 | } | ||
2315 | return false; | ||
2316 | } | ||
2317 | |||
2318 | static inline void defer_numa_migrate(struct task_struct *p) | ||
2319 | { | ||
2320 | p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred; | ||
2321 | } | ||
2322 | #else | ||
2323 | static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) | ||
2324 | { | ||
2325 | return false; | ||
2326 | } | ||
2327 | |||
2328 | static inline void defer_numa_migrate(struct task_struct *p) | ||
2329 | { | ||
2330 | } | ||
2331 | #endif /* CONFIG_NUMA_BALANCING */ | ||
2332 | |||
2333 | /** | 2304 | /** |
2334 | * mpol_misplaced - check whether current page node is valid in policy | 2305 | * mpol_misplaced - check whether current page node is valid in policy |
2335 | * | 2306 | * |
@@ -2403,52 +2374,9 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long | |||
2403 | 2374 | ||
2404 | /* Migrate the page towards the node whose CPU is referencing it */ | 2375 | /* Migrate the page towards the node whose CPU is referencing it */ |
2405 | if (pol->flags & MPOL_F_MORON) { | 2376 | if (pol->flags & MPOL_F_MORON) { |
2406 | int last_cpupid; | ||
2407 | int this_cpupid; | ||
2408 | |||
2409 | polnid = thisnid; | 2377 | polnid = thisnid; |
2410 | this_cpupid = cpu_pid_to_cpupid(thiscpu, current->pid); | ||
2411 | |||
2412 | /* | ||
2413 | * Multi-stage node selection is used in conjunction | ||
2414 | * with a periodic migration fault to build a temporal | ||
2415 | * task<->page relation. By using a two-stage filter we | ||
2416 | * remove short/unlikely relations. | ||
2417 | * | ||
2418 | * Using P(p) ~ n_p / n_t as per frequentist | ||
2419 | * probability, we can equate a task's usage of a | ||
2420 | * particular page (n_p) per total usage of this | ||
2421 | * page (n_t) (in a given time-span) to a probability. | ||
2422 | * | ||
2423 | * Our periodic faults will sample this probability and | ||
2424 | * getting the same result twice in a row, given these | ||
2425 | * samples are fully independent, is then given by | ||
2426 | * P(n)^2, provided our sample period is sufficiently | ||
2427 | * short compared to the usage pattern. | ||
2428 | * | ||
2429 | * This quadric squishes small probabilities, making | ||
2430 | * it less likely we act on an unlikely task<->page | ||
2431 | * relation. | ||
2432 | */ | ||
2433 | last_cpupid = page_cpupid_xchg_last(page, this_cpupid); | ||
2434 | if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) { | ||
2435 | 2378 | ||
2436 | /* See sysctl_numa_balancing_migrate_deferred comment */ | 2379 | if (!should_numa_migrate_memory(current, page, curnid, thiscpu)) |
2437 | if (!cpupid_match_pid(current, last_cpupid)) | ||
2438 | defer_numa_migrate(current); | ||
2439 | |||
2440 | goto out; | ||
2441 | } | ||
2442 | |||
2443 | /* | ||
2444 | * The quadratic filter above reduces extraneous migration | ||
2445 | * of shared pages somewhat. This code reduces it even more, | ||
2446 | * reducing the overhead of page migrations of shared pages. | ||
2447 | * This makes workloads with shared pages rely more on | ||
2448 | * "move task near its memory", and less on "move memory | ||
2449 | * towards its task", which is exactly what we want. | ||
2450 | */ | ||
2451 | if (numa_migrate_deferred(current, last_cpupid)) | ||
2452 | goto out; | 2380 | goto out; |
2453 | } | 2381 | } |
2454 | 2382 | ||
diff --git a/mm/migrate.c b/mm/migrate.c index 482a33d89134..bed48809e5d0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -178,6 +178,37 @@ out: | |||
178 | } | 178 | } |
179 | 179 | ||
180 | /* | 180 | /* |
181 | * Congratulations to trinity for discovering this bug. | ||
182 | * mm/fremap.c's remap_file_pages() accepts any range within a single vma to | ||
183 | * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then | ||
184 | * replace the specified range by file ptes throughout (maybe populated after). | ||
185 | * If page migration finds a page within that range, while it's still located | ||
186 | * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem: | ||
187 | * zap_pte() clears the temporary migration entry before mmap_sem is dropped. | ||
188 | * But if the migrating page is in a part of the vma outside the range to be | ||
189 | * remapped, then it will not be cleared, and remove_migration_ptes() needs to | ||
190 | * deal with it. Fortunately, this part of the vma is of course still linear, | ||
191 | * so we just need to use linear location on the nonlinear list. | ||
192 | */ | ||
193 | static int remove_linear_migration_ptes_from_nonlinear(struct page *page, | ||
194 | struct address_space *mapping, void *arg) | ||
195 | { | ||
196 | struct vm_area_struct *vma; | ||
197 | /* hugetlbfs does not support remap_pages, so no huge pgoff worries */ | ||
198 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
199 | unsigned long addr; | ||
200 | |||
201 | list_for_each_entry(vma, | ||
202 | &mapping->i_mmap_nonlinear, shared.nonlinear) { | ||
203 | |||
204 | addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | ||
205 | if (addr >= vma->vm_start && addr < vma->vm_end) | ||
206 | remove_migration_pte(page, vma, addr, arg); | ||
207 | } | ||
208 | return SWAP_AGAIN; | ||
209 | } | ||
210 | |||
211 | /* | ||
181 | * Get rid of all migration entries and replace them by | 212 | * Get rid of all migration entries and replace them by |
182 | * references to the indicated page. | 213 | * references to the indicated page. |
183 | */ | 214 | */ |
@@ -186,6 +217,7 @@ static void remove_migration_ptes(struct page *old, struct page *new) | |||
186 | struct rmap_walk_control rwc = { | 217 | struct rmap_walk_control rwc = { |
187 | .rmap_one = remove_migration_pte, | 218 | .rmap_one = remove_migration_pte, |
188 | .arg = old, | 219 | .arg = old, |
220 | .file_nonlinear = remove_linear_migration_ptes_from_nonlinear, | ||
189 | }; | 221 | }; |
190 | 222 | ||
191 | rmap_walk(new, &rwc); | 223 | rmap_walk(new, &rwc); |
@@ -1158,7 +1190,7 @@ static struct page *new_page_node(struct page *p, unsigned long private, | |||
1158 | pm->node); | 1190 | pm->node); |
1159 | else | 1191 | else |
1160 | return alloc_pages_exact_node(pm->node, | 1192 | return alloc_pages_exact_node(pm->node, |
1161 | GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); | 1193 | GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0); |
1162 | } | 1194 | } |
1163 | 1195 | ||
1164 | /* | 1196 | /* |
@@ -1544,9 +1576,9 @@ static struct page *alloc_misplaced_dst_page(struct page *page, | |||
1544 | struct page *newpage; | 1576 | struct page *newpage; |
1545 | 1577 | ||
1546 | newpage = alloc_pages_exact_node(nid, | 1578 | newpage = alloc_pages_exact_node(nid, |
1547 | (GFP_HIGHUSER_MOVABLE | GFP_THISNODE | | 1579 | (GFP_HIGHUSER_MOVABLE | |
1548 | __GFP_NOMEMALLOC | __GFP_NORETRY | | 1580 | __GFP_THISNODE | __GFP_NOMEMALLOC | |
1549 | __GFP_NOWARN) & | 1581 | __GFP_NORETRY | __GFP_NOWARN) & |
1550 | ~GFP_IOFS, 0); | 1582 | ~GFP_IOFS, 0); |
1551 | 1583 | ||
1552 | return newpage; | 1584 | return newpage; |
@@ -1747,7 +1779,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1747 | goto out_dropref; | 1779 | goto out_dropref; |
1748 | 1780 | ||
1749 | new_page = alloc_pages_node(node, | 1781 | new_page = alloc_pages_node(node, |
1750 | (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); | 1782 | (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT, |
1783 | HPAGE_PMD_ORDER); | ||
1751 | if (!new_page) | 1784 | if (!new_page) |
1752 | goto out_fail; | 1785 | goto out_fail; |
1753 | 1786 | ||
diff --git a/mm/mmu_context.c b/mm/mmu_context.c index 8a8cd0265e52..f802c2d216a7 100644 --- a/mm/mmu_context.c +++ b/mm/mmu_context.c | |||
@@ -31,6 +31,9 @@ void use_mm(struct mm_struct *mm) | |||
31 | tsk->mm = mm; | 31 | tsk->mm = mm; |
32 | switch_mm(active_mm, mm, tsk); | 32 | switch_mm(active_mm, mm, tsk); |
33 | task_unlock(tsk); | 33 | task_unlock(tsk); |
34 | #ifdef finish_arch_post_lock_switch | ||
35 | finish_arch_post_lock_switch(); | ||
36 | #endif | ||
34 | 37 | ||
35 | if (active_mm != mm) | 38 | if (active_mm != mm) |
36 | mmdrop(active_mm); | 39 | mmdrop(active_mm); |
diff --git a/mm/percpu.c b/mm/percpu.c index 036cfe07050f..63e24fb4387b 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -102,10 +102,11 @@ struct pcpu_chunk { | |||
102 | int free_size; /* free bytes in the chunk */ | 102 | int free_size; /* free bytes in the chunk */ |
103 | int contig_hint; /* max contiguous size hint */ | 103 | int contig_hint; /* max contiguous size hint */ |
104 | void *base_addr; /* base address of this chunk */ | 104 | void *base_addr; /* base address of this chunk */ |
105 | int map_used; /* # of map entries used */ | 105 | int map_used; /* # of map entries used before the sentry */ |
106 | int map_alloc; /* # of map entries allocated */ | 106 | int map_alloc; /* # of map entries allocated */ |
107 | int *map; /* allocation map */ | 107 | int *map; /* allocation map */ |
108 | void *data; /* chunk data */ | 108 | void *data; /* chunk data */ |
109 | int first_free; /* no free below this */ | ||
109 | bool immutable; /* no [de]population allowed */ | 110 | bool immutable; /* no [de]population allowed */ |
110 | unsigned long populated[]; /* populated bitmap */ | 111 | unsigned long populated[]; /* populated bitmap */ |
111 | }; | 112 | }; |
@@ -356,11 +357,11 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk) | |||
356 | { | 357 | { |
357 | int new_alloc; | 358 | int new_alloc; |
358 | 359 | ||
359 | if (chunk->map_alloc >= chunk->map_used + 2) | 360 | if (chunk->map_alloc >= chunk->map_used + 3) |
360 | return 0; | 361 | return 0; |
361 | 362 | ||
362 | new_alloc = PCPU_DFL_MAP_ALLOC; | 363 | new_alloc = PCPU_DFL_MAP_ALLOC; |
363 | while (new_alloc < chunk->map_used + 2) | 364 | while (new_alloc < chunk->map_used + 3) |
364 | new_alloc *= 2; | 365 | new_alloc *= 2; |
365 | 366 | ||
366 | return new_alloc; | 367 | return new_alloc; |
@@ -418,48 +419,6 @@ out_unlock: | |||
418 | } | 419 | } |
419 | 420 | ||
420 | /** | 421 | /** |
421 | * pcpu_split_block - split a map block | ||
422 | * @chunk: chunk of interest | ||
423 | * @i: index of map block to split | ||
424 | * @head: head size in bytes (can be 0) | ||
425 | * @tail: tail size in bytes (can be 0) | ||
426 | * | ||
427 | * Split the @i'th map block into two or three blocks. If @head is | ||
428 | * non-zero, @head bytes block is inserted before block @i moving it | ||
429 | * to @i+1 and reducing its size by @head bytes. | ||
430 | * | ||
431 | * If @tail is non-zero, the target block, which can be @i or @i+1 | ||
432 | * depending on @head, is reduced by @tail bytes and @tail byte block | ||
433 | * is inserted after the target block. | ||
434 | * | ||
435 | * @chunk->map must have enough free slots to accommodate the split. | ||
436 | * | ||
437 | * CONTEXT: | ||
438 | * pcpu_lock. | ||
439 | */ | ||
440 | static void pcpu_split_block(struct pcpu_chunk *chunk, int i, | ||
441 | int head, int tail) | ||
442 | { | ||
443 | int nr_extra = !!head + !!tail; | ||
444 | |||
445 | BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra); | ||
446 | |||
447 | /* insert new subblocks */ | ||
448 | memmove(&chunk->map[i + nr_extra], &chunk->map[i], | ||
449 | sizeof(chunk->map[0]) * (chunk->map_used - i)); | ||
450 | chunk->map_used += nr_extra; | ||
451 | |||
452 | if (head) { | ||
453 | chunk->map[i + 1] = chunk->map[i] - head; | ||
454 | chunk->map[i++] = head; | ||
455 | } | ||
456 | if (tail) { | ||
457 | chunk->map[i++] -= tail; | ||
458 | chunk->map[i] = tail; | ||
459 | } | ||
460 | } | ||
461 | |||
462 | /** | ||
463 | * pcpu_alloc_area - allocate area from a pcpu_chunk | 422 | * pcpu_alloc_area - allocate area from a pcpu_chunk |
464 | * @chunk: chunk of interest | 423 | * @chunk: chunk of interest |
465 | * @size: wanted size in bytes | 424 | * @size: wanted size in bytes |
@@ -483,19 +442,27 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) | |||
483 | int oslot = pcpu_chunk_slot(chunk); | 442 | int oslot = pcpu_chunk_slot(chunk); |
484 | int max_contig = 0; | 443 | int max_contig = 0; |
485 | int i, off; | 444 | int i, off; |
445 | bool seen_free = false; | ||
446 | int *p; | ||
486 | 447 | ||
487 | for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { | 448 | for (i = chunk->first_free, p = chunk->map + i; i < chunk->map_used; i++, p++) { |
488 | bool is_last = i + 1 == chunk->map_used; | ||
489 | int head, tail; | 449 | int head, tail; |
450 | int this_size; | ||
451 | |||
452 | off = *p; | ||
453 | if (off & 1) | ||
454 | continue; | ||
490 | 455 | ||
491 | /* extra for alignment requirement */ | 456 | /* extra for alignment requirement */ |
492 | head = ALIGN(off, align) - off; | 457 | head = ALIGN(off, align) - off; |
493 | BUG_ON(i == 0 && head != 0); | ||
494 | 458 | ||
495 | if (chunk->map[i] < 0) | 459 | this_size = (p[1] & ~1) - off; |
496 | continue; | 460 | if (this_size < head + size) { |
497 | if (chunk->map[i] < head + size) { | 461 | if (!seen_free) { |
498 | max_contig = max(chunk->map[i], max_contig); | 462 | chunk->first_free = i; |
463 | seen_free = true; | ||
464 | } | ||
465 | max_contig = max(this_size, max_contig); | ||
499 | continue; | 466 | continue; |
500 | } | 467 | } |
501 | 468 | ||
@@ -505,44 +472,59 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) | |||
505 | * than sizeof(int), which is very small but isn't too | 472 | * than sizeof(int), which is very small but isn't too |
506 | * uncommon for percpu allocations. | 473 | * uncommon for percpu allocations. |
507 | */ | 474 | */ |
508 | if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) { | 475 | if (head && (head < sizeof(int) || !(p[-1] & 1))) { |
509 | if (chunk->map[i - 1] > 0) | 476 | *p = off += head; |
510 | chunk->map[i - 1] += head; | 477 | if (p[-1] & 1) |
511 | else { | ||
512 | chunk->map[i - 1] -= head; | ||
513 | chunk->free_size -= head; | 478 | chunk->free_size -= head; |
514 | } | 479 | else |
515 | chunk->map[i] -= head; | 480 | max_contig = max(*p - p[-1], max_contig); |
516 | off += head; | 481 | this_size -= head; |
517 | head = 0; | 482 | head = 0; |
518 | } | 483 | } |
519 | 484 | ||
520 | /* if tail is small, just keep it around */ | 485 | /* if tail is small, just keep it around */ |
521 | tail = chunk->map[i] - head - size; | 486 | tail = this_size - head - size; |
522 | if (tail < sizeof(int)) | 487 | if (tail < sizeof(int)) { |
523 | tail = 0; | 488 | tail = 0; |
489 | size = this_size - head; | ||
490 | } | ||
524 | 491 | ||
525 | /* split if warranted */ | 492 | /* split if warranted */ |
526 | if (head || tail) { | 493 | if (head || tail) { |
527 | pcpu_split_block(chunk, i, head, tail); | 494 | int nr_extra = !!head + !!tail; |
495 | |||
496 | /* insert new subblocks */ | ||
497 | memmove(p + nr_extra + 1, p + 1, | ||
498 | sizeof(chunk->map[0]) * (chunk->map_used - i)); | ||
499 | chunk->map_used += nr_extra; | ||
500 | |||
528 | if (head) { | 501 | if (head) { |
529 | i++; | 502 | if (!seen_free) { |
530 | off += head; | 503 | chunk->first_free = i; |
531 | max_contig = max(chunk->map[i - 1], max_contig); | 504 | seen_free = true; |
505 | } | ||
506 | *++p = off += head; | ||
507 | ++i; | ||
508 | max_contig = max(head, max_contig); | ||
509 | } | ||
510 | if (tail) { | ||
511 | p[1] = off + size; | ||
512 | max_contig = max(tail, max_contig); | ||
532 | } | 513 | } |
533 | if (tail) | ||
534 | max_contig = max(chunk->map[i + 1], max_contig); | ||
535 | } | 514 | } |
536 | 515 | ||
516 | if (!seen_free) | ||
517 | chunk->first_free = i + 1; | ||
518 | |||
537 | /* update hint and mark allocated */ | 519 | /* update hint and mark allocated */ |
538 | if (is_last) | 520 | if (i + 1 == chunk->map_used) |
539 | chunk->contig_hint = max_contig; /* fully scanned */ | 521 | chunk->contig_hint = max_contig; /* fully scanned */ |
540 | else | 522 | else |
541 | chunk->contig_hint = max(chunk->contig_hint, | 523 | chunk->contig_hint = max(chunk->contig_hint, |
542 | max_contig); | 524 | max_contig); |
543 | 525 | ||
544 | chunk->free_size -= chunk->map[i]; | 526 | chunk->free_size -= size; |
545 | chunk->map[i] = -chunk->map[i]; | 527 | *p |= 1; |
546 | 528 | ||
547 | pcpu_chunk_relocate(chunk, oslot); | 529 | pcpu_chunk_relocate(chunk, oslot); |
548 | return off; | 530 | return off; |
@@ -570,34 +552,50 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) | |||
570 | static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) | 552 | static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) |
571 | { | 553 | { |
572 | int oslot = pcpu_chunk_slot(chunk); | 554 | int oslot = pcpu_chunk_slot(chunk); |
573 | int i, off; | 555 | int off = 0; |
574 | 556 | unsigned i, j; | |
575 | for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) | 557 | int to_free = 0; |
576 | if (off == freeme) | 558 | int *p; |
577 | break; | 559 | |
560 | freeme |= 1; /* we are searching for <given offset, in use> pair */ | ||
561 | |||
562 | i = 0; | ||
563 | j = chunk->map_used; | ||
564 | while (i != j) { | ||
565 | unsigned k = (i + j) / 2; | ||
566 | off = chunk->map[k]; | ||
567 | if (off < freeme) | ||
568 | i = k + 1; | ||
569 | else if (off > freeme) | ||
570 | j = k; | ||
571 | else | ||
572 | i = j = k; | ||
573 | } | ||
578 | BUG_ON(off != freeme); | 574 | BUG_ON(off != freeme); |
579 | BUG_ON(chunk->map[i] > 0); | ||
580 | 575 | ||
581 | chunk->map[i] = -chunk->map[i]; | 576 | if (i < chunk->first_free) |
582 | chunk->free_size += chunk->map[i]; | 577 | chunk->first_free = i; |
583 | 578 | ||
579 | p = chunk->map + i; | ||
580 | *p = off &= ~1; | ||
581 | chunk->free_size += (p[1] & ~1) - off; | ||
582 | |||
583 | /* merge with next? */ | ||
584 | if (!(p[1] & 1)) | ||
585 | to_free++; | ||
584 | /* merge with previous? */ | 586 | /* merge with previous? */ |
585 | if (i > 0 && chunk->map[i - 1] >= 0) { | 587 | if (i > 0 && !(p[-1] & 1)) { |
586 | chunk->map[i - 1] += chunk->map[i]; | 588 | to_free++; |
587 | chunk->map_used--; | ||
588 | memmove(&chunk->map[i], &chunk->map[i + 1], | ||
589 | (chunk->map_used - i) * sizeof(chunk->map[0])); | ||
590 | i--; | 589 | i--; |
590 | p--; | ||
591 | } | 591 | } |
592 | /* merge with next? */ | 592 | if (to_free) { |
593 | if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) { | 593 | chunk->map_used -= to_free; |
594 | chunk->map[i] += chunk->map[i + 1]; | 594 | memmove(p + 1, p + 1 + to_free, |
595 | chunk->map_used--; | 595 | (chunk->map_used - i) * sizeof(chunk->map[0])); |
596 | memmove(&chunk->map[i + 1], &chunk->map[i + 2], | ||
597 | (chunk->map_used - (i + 1)) * sizeof(chunk->map[0])); | ||
598 | } | 596 | } |
599 | 597 | ||
600 | chunk->contig_hint = max(chunk->map[i], chunk->contig_hint); | 598 | chunk->contig_hint = max(chunk->map[i + 1] - chunk->map[i] - 1, chunk->contig_hint); |
601 | pcpu_chunk_relocate(chunk, oslot); | 599 | pcpu_chunk_relocate(chunk, oslot); |
602 | } | 600 | } |
603 | 601 | ||
@@ -617,7 +615,9 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
617 | } | 615 | } |
618 | 616 | ||
619 | chunk->map_alloc = PCPU_DFL_MAP_ALLOC; | 617 | chunk->map_alloc = PCPU_DFL_MAP_ALLOC; |
620 | chunk->map[chunk->map_used++] = pcpu_unit_size; | 618 | chunk->map[0] = 0; |
619 | chunk->map[1] = pcpu_unit_size | 1; | ||
620 | chunk->map_used = 1; | ||
621 | 621 | ||
622 | INIT_LIST_HEAD(&chunk->list); | 622 | INIT_LIST_HEAD(&chunk->list); |
623 | chunk->free_size = pcpu_unit_size; | 623 | chunk->free_size = pcpu_unit_size; |
@@ -713,6 +713,16 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
713 | unsigned long flags; | 713 | unsigned long flags; |
714 | void __percpu *ptr; | 714 | void __percpu *ptr; |
715 | 715 | ||
716 | /* | ||
717 | * We want the lowest bit of offset available for in-use/free | ||
718 | * indicator, so force >= 16bit alignment and make size even. | ||
719 | */ | ||
720 | if (unlikely(align < 2)) | ||
721 | align = 2; | ||
722 | |||
723 | if (unlikely(size & 1)) | ||
724 | size++; | ||
725 | |||
716 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { | 726 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { |
717 | WARN(true, "illegal size (%zu) or align (%zu) for " | 727 | WARN(true, "illegal size (%zu) or align (%zu) for " |
718 | "percpu allocation\n", size, align); | 728 | "percpu allocation\n", size, align); |
@@ -1343,9 +1353,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1343 | } | 1353 | } |
1344 | schunk->contig_hint = schunk->free_size; | 1354 | schunk->contig_hint = schunk->free_size; |
1345 | 1355 | ||
1346 | schunk->map[schunk->map_used++] = -ai->static_size; | 1356 | schunk->map[0] = 1; |
1357 | schunk->map[1] = ai->static_size; | ||
1358 | schunk->map_used = 1; | ||
1347 | if (schunk->free_size) | 1359 | if (schunk->free_size) |
1348 | schunk->map[schunk->map_used++] = schunk->free_size; | 1360 | schunk->map[++schunk->map_used] = 1 | (ai->static_size + schunk->free_size); |
1361 | else | ||
1362 | schunk->map[1] |= 1; | ||
1349 | 1363 | ||
1350 | /* init dynamic chunk if necessary */ | 1364 | /* init dynamic chunk if necessary */ |
1351 | if (dyn_size) { | 1365 | if (dyn_size) { |
@@ -1358,8 +1372,10 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1358 | bitmap_fill(dchunk->populated, pcpu_unit_pages); | 1372 | bitmap_fill(dchunk->populated, pcpu_unit_pages); |
1359 | 1373 | ||
1360 | dchunk->contig_hint = dchunk->free_size = dyn_size; | 1374 | dchunk->contig_hint = dchunk->free_size = dyn_size; |
1361 | dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; | 1375 | dchunk->map[0] = 1; |
1362 | dchunk->map[dchunk->map_used++] = dchunk->free_size; | 1376 | dchunk->map[1] = pcpu_reserved_chunk_limit; |
1377 | dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1; | ||
1378 | dchunk->map_used = 2; | ||
1363 | } | 1379 | } |
1364 | 1380 | ||
1365 | /* link the first chunk in */ | 1381 | /* link the first chunk in */ |
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index fd26d0433509..3c5cf68566ec 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c | |||
@@ -456,25 +456,23 @@ free_iovecs: | |||
456 | return rc; | 456 | return rc; |
457 | } | 457 | } |
458 | 458 | ||
459 | asmlinkage ssize_t | 459 | COMPAT_SYSCALL_DEFINE6(process_vm_readv, compat_pid_t, pid, |
460 | compat_sys_process_vm_readv(compat_pid_t pid, | 460 | const struct compat_iovec __user *, lvec, |
461 | const struct compat_iovec __user *lvec, | 461 | compat_ulong_t, liovcnt, |
462 | unsigned long liovcnt, | 462 | const struct compat_iovec __user *, rvec, |
463 | const struct compat_iovec __user *rvec, | 463 | compat_ulong_t, riovcnt, |
464 | unsigned long riovcnt, | 464 | compat_ulong_t, flags) |
465 | unsigned long flags) | ||
466 | { | 465 | { |
467 | return compat_process_vm_rw(pid, lvec, liovcnt, rvec, | 466 | return compat_process_vm_rw(pid, lvec, liovcnt, rvec, |
468 | riovcnt, flags, 0); | 467 | riovcnt, flags, 0); |
469 | } | 468 | } |
470 | 469 | ||
471 | asmlinkage ssize_t | 470 | COMPAT_SYSCALL_DEFINE6(process_vm_writev, compat_pid_t, pid, |
472 | compat_sys_process_vm_writev(compat_pid_t pid, | 471 | const struct compat_iovec __user *, lvec, |
473 | const struct compat_iovec __user *lvec, | 472 | compat_ulong_t, liovcnt, |
474 | unsigned long liovcnt, | 473 | const struct compat_iovec __user *, rvec, |
475 | const struct compat_iovec __user *rvec, | 474 | compat_ulong_t, riovcnt, |
476 | unsigned long riovcnt, | 475 | compat_ulong_t, flags) |
477 | unsigned long flags) | ||
478 | { | 476 | { |
479 | return compat_process_vm_rw(pid, lvec, liovcnt, rvec, | 477 | return compat_process_vm_rw(pid, lvec, liovcnt, rvec, |
480 | riovcnt, flags, 1); | 478 | riovcnt, flags, 1); |
@@ -1165,6 +1165,16 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
1165 | } | 1165 | } |
1166 | set_pte_at(mm, address, pte, | 1166 | set_pte_at(mm, address, pte, |
1167 | swp_entry_to_pte(make_hwpoison_entry(page))); | 1167 | swp_entry_to_pte(make_hwpoison_entry(page))); |
1168 | } else if (pte_unused(pteval)) { | ||
1169 | /* | ||
1170 | * The guest indicated that the page content is of no | ||
1171 | * interest anymore. Simply discard the pte, vmscan | ||
1172 | * will take care of the rest. | ||
1173 | */ | ||
1174 | if (PageAnon(page)) | ||
1175 | dec_mm_counter(mm, MM_ANONPAGES); | ||
1176 | else | ||
1177 | dec_mm_counter(mm, MM_FILEPAGES); | ||
1168 | } else if (PageAnon(page)) { | 1178 | } else if (PageAnon(page)) { |
1169 | swp_entry_t entry = { .val = page_private(page) }; | 1179 | swp_entry_t entry = { .val = page_private(page) }; |
1170 | pte_t swp_pte; | 1180 | pte_t swp_pte; |
@@ -1360,8 +1370,9 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | |||
1360 | } | 1370 | } |
1361 | 1371 | ||
1362 | static int try_to_unmap_nonlinear(struct page *page, | 1372 | static int try_to_unmap_nonlinear(struct page *page, |
1363 | struct address_space *mapping, struct vm_area_struct *vma) | 1373 | struct address_space *mapping, void *arg) |
1364 | { | 1374 | { |
1375 | struct vm_area_struct *vma; | ||
1365 | int ret = SWAP_AGAIN; | 1376 | int ret = SWAP_AGAIN; |
1366 | unsigned long cursor; | 1377 | unsigned long cursor; |
1367 | unsigned long max_nl_cursor = 0; | 1378 | unsigned long max_nl_cursor = 0; |
@@ -1663,7 +1674,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) | |||
1663 | if (list_empty(&mapping->i_mmap_nonlinear)) | 1674 | if (list_empty(&mapping->i_mmap_nonlinear)) |
1664 | goto done; | 1675 | goto done; |
1665 | 1676 | ||
1666 | ret = rwc->file_nonlinear(page, mapping, vma); | 1677 | ret = rwc->file_nonlinear(page, mapping, rwc->arg); |
1667 | 1678 | ||
1668 | done: | 1679 | done: |
1669 | mutex_unlock(&mapping->i_mmap_mutex); | 1680 | mutex_unlock(&mapping->i_mmap_mutex); |