diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 4 | ||||
-rw-r--r-- | mm/huge_memory.c | 20 | ||||
-rw-r--r-- | mm/ksm.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 20 | ||||
-rw-r--r-- | mm/memory-failure.c | 8 | ||||
-rw-r--r-- | mm/memory.c | 15 | ||||
-rw-r--r-- | mm/mprotect.c | 25 | ||||
-rw-r--r-- | mm/page-writeback.c | 5 | ||||
-rw-r--r-- | mm/page_alloc.c | 30 | ||||
-rw-r--r-- | mm/slub.c | 38 | ||||
-rw-r--r-- | mm/swap.c | 4 | ||||
-rw-r--r-- | mm/swap_state.c | 63 | ||||
-rw-r--r-- | mm/swapfile.c | 11 | ||||
-rw-r--r-- | mm/vmpressure.c | 1 | ||||
-rw-r--r-- | mm/vmstat.c | 4 |
15 files changed, 169 insertions, 81 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index d56d3c145b9f..7a13f6ac5421 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2553,8 +2553,8 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2553 | if (ret > 0) { | 2553 | if (ret > 0) { |
2554 | ssize_t err; | 2554 | ssize_t err; |
2555 | 2555 | ||
2556 | err = generic_write_sync(file, pos, ret); | 2556 | err = generic_write_sync(file, iocb->ki_pos - ret, ret); |
2557 | if (err < 0 && ret > 0) | 2557 | if (err < 0) |
2558 | ret = err; | 2558 | ret = err; |
2559 | } | 2559 | } |
2560 | return ret; | 2560 | return ret; |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 82166bf974e1..1546655a2d78 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1166,8 +1166,10 @@ alloc: | |||
1166 | } else { | 1166 | } else { |
1167 | ret = do_huge_pmd_wp_page_fallback(mm, vma, address, | 1167 | ret = do_huge_pmd_wp_page_fallback(mm, vma, address, |
1168 | pmd, orig_pmd, page, haddr); | 1168 | pmd, orig_pmd, page, haddr); |
1169 | if (ret & VM_FAULT_OOM) | 1169 | if (ret & VM_FAULT_OOM) { |
1170 | split_huge_page(page); | 1170 | split_huge_page(page); |
1171 | ret |= VM_FAULT_FALLBACK; | ||
1172 | } | ||
1171 | put_page(page); | 1173 | put_page(page); |
1172 | } | 1174 | } |
1173 | count_vm_event(THP_FAULT_FALLBACK); | 1175 | count_vm_event(THP_FAULT_FALLBACK); |
@@ -1179,9 +1181,10 @@ alloc: | |||
1179 | if (page) { | 1181 | if (page) { |
1180 | split_huge_page(page); | 1182 | split_huge_page(page); |
1181 | put_page(page); | 1183 | put_page(page); |
1182 | } | 1184 | } else |
1185 | split_huge_page_pmd(vma, address, pmd); | ||
1186 | ret |= VM_FAULT_FALLBACK; | ||
1183 | count_vm_event(THP_FAULT_FALLBACK); | 1187 | count_vm_event(THP_FAULT_FALLBACK); |
1184 | ret |= VM_FAULT_OOM; | ||
1185 | goto out; | 1188 | goto out; |
1186 | } | 1189 | } |
1187 | 1190 | ||
@@ -1545,6 +1548,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1545 | entry = pmd_mknonnuma(entry); | 1548 | entry = pmd_mknonnuma(entry); |
1546 | entry = pmd_modify(entry, newprot); | 1549 | entry = pmd_modify(entry, newprot); |
1547 | ret = HPAGE_PMD_NR; | 1550 | ret = HPAGE_PMD_NR; |
1551 | set_pmd_at(mm, addr, pmd, entry); | ||
1548 | BUG_ON(pmd_write(entry)); | 1552 | BUG_ON(pmd_write(entry)); |
1549 | } else { | 1553 | } else { |
1550 | struct page *page = pmd_page(*pmd); | 1554 | struct page *page = pmd_page(*pmd); |
@@ -1557,16 +1561,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1557 | */ | 1561 | */ |
1558 | if (!is_huge_zero_page(page) && | 1562 | if (!is_huge_zero_page(page) && |
1559 | !pmd_numa(*pmd)) { | 1563 | !pmd_numa(*pmd)) { |
1560 | entry = *pmd; | 1564 | pmdp_set_numa(mm, addr, pmd); |
1561 | entry = pmd_mknuma(entry); | ||
1562 | ret = HPAGE_PMD_NR; | 1565 | ret = HPAGE_PMD_NR; |
1563 | } | 1566 | } |
1564 | } | 1567 | } |
1565 | |||
1566 | /* Set PMD if cleared earlier */ | ||
1567 | if (ret == HPAGE_PMD_NR) | ||
1568 | set_pmd_at(mm, addr, pmd, entry); | ||
1569 | |||
1570 | spin_unlock(ptl); | 1568 | spin_unlock(ptl); |
1571 | } | 1569 | } |
1572 | 1570 | ||
@@ -1963,7 +1961,7 @@ out: | |||
1963 | return ret; | 1961 | return ret; |
1964 | } | 1962 | } |
1965 | 1963 | ||
1966 | #define VM_NO_THP (VM_SPECIAL|VM_MIXEDMAP|VM_HUGETLB|VM_SHARED|VM_MAYSHARE) | 1964 | #define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE) |
1967 | 1965 | ||
1968 | int hugepage_madvise(struct vm_area_struct *vma, | 1966 | int hugepage_madvise(struct vm_area_struct *vma, |
1969 | unsigned long *vm_flags, int advice) | 1967 | unsigned long *vm_flags, int advice) |
@@ -444,7 +444,7 @@ static void break_cow(struct rmap_item *rmap_item) | |||
444 | static struct page *page_trans_compound_anon(struct page *page) | 444 | static struct page *page_trans_compound_anon(struct page *page) |
445 | { | 445 | { |
446 | if (PageTransCompound(page)) { | 446 | if (PageTransCompound(page)) { |
447 | struct page *head = compound_trans_head(page); | 447 | struct page *head = compound_head(page); |
448 | /* | 448 | /* |
449 | * head may actually be splitted and freed from under | 449 | * head may actually be splitted and freed from under |
450 | * us but it's ok here. | 450 | * us but it's ok here. |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 53385cd4e6f0..5b6b0039f725 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1127,8 +1127,8 @@ skip_node: | |||
1127 | * skipping css reference should be safe. | 1127 | * skipping css reference should be safe. |
1128 | */ | 1128 | */ |
1129 | if (next_css) { | 1129 | if (next_css) { |
1130 | if ((next_css->flags & CSS_ONLINE) && | 1130 | if ((next_css == &root->css) || |
1131 | (next_css == &root->css || css_tryget(next_css))) | 1131 | ((next_css->flags & CSS_ONLINE) && css_tryget(next_css))) |
1132 | return mem_cgroup_from_css(next_css); | 1132 | return mem_cgroup_from_css(next_css); |
1133 | 1133 | ||
1134 | prev_css = next_css; | 1134 | prev_css = next_css; |
@@ -1687,7 +1687,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
1687 | * protects memcg_name and makes sure that parallel ooms do not | 1687 | * protects memcg_name and makes sure that parallel ooms do not |
1688 | * interleave | 1688 | * interleave |
1689 | */ | 1689 | */ |
1690 | static DEFINE_SPINLOCK(oom_info_lock); | 1690 | static DEFINE_MUTEX(oom_info_lock); |
1691 | struct cgroup *task_cgrp; | 1691 | struct cgroup *task_cgrp; |
1692 | struct cgroup *mem_cgrp; | 1692 | struct cgroup *mem_cgrp; |
1693 | static char memcg_name[PATH_MAX]; | 1693 | static char memcg_name[PATH_MAX]; |
@@ -1698,7 +1698,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
1698 | if (!p) | 1698 | if (!p) |
1699 | return; | 1699 | return; |
1700 | 1700 | ||
1701 | spin_lock(&oom_info_lock); | 1701 | mutex_lock(&oom_info_lock); |
1702 | rcu_read_lock(); | 1702 | rcu_read_lock(); |
1703 | 1703 | ||
1704 | mem_cgrp = memcg->css.cgroup; | 1704 | mem_cgrp = memcg->css.cgroup; |
@@ -1767,7 +1767,7 @@ done: | |||
1767 | 1767 | ||
1768 | pr_cont("\n"); | 1768 | pr_cont("\n"); |
1769 | } | 1769 | } |
1770 | spin_unlock(&oom_info_lock); | 1770 | mutex_unlock(&oom_info_lock); |
1771 | } | 1771 | } |
1772 | 1772 | ||
1773 | /* | 1773 | /* |
@@ -6595,6 +6595,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
6595 | { | 6595 | { |
6596 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 6596 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6597 | struct mem_cgroup_event *event, *tmp; | 6597 | struct mem_cgroup_event *event, *tmp; |
6598 | struct cgroup_subsys_state *iter; | ||
6598 | 6599 | ||
6599 | /* | 6600 | /* |
6600 | * Unregister events and notify userspace. | 6601 | * Unregister events and notify userspace. |
@@ -6611,7 +6612,14 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
6611 | kmem_cgroup_css_offline(memcg); | 6612 | kmem_cgroup_css_offline(memcg); |
6612 | 6613 | ||
6613 | mem_cgroup_invalidate_reclaim_iterators(memcg); | 6614 | mem_cgroup_invalidate_reclaim_iterators(memcg); |
6614 | mem_cgroup_reparent_charges(memcg); | 6615 | |
6616 | /* | ||
6617 | * This requires that offlining is serialized. Right now that is | ||
6618 | * guaranteed because css_killed_work_fn() holds the cgroup_mutex. | ||
6619 | */ | ||
6620 | css_for_each_descendant_post(iter, css) | ||
6621 | mem_cgroup_reparent_charges(mem_cgroup_from_css(iter)); | ||
6622 | |||
6615 | mem_cgroup_destroy_all_caches(memcg); | 6623 | mem_cgroup_destroy_all_caches(memcg); |
6616 | vmpressure_cleanup(&memcg->vmpressure); | 6624 | vmpressure_cleanup(&memcg->vmpressure); |
6617 | } | 6625 | } |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4f08a2d61487..90002ea43638 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -945,8 +945,10 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
945 | * to it. Similarly, page lock is shifted. | 945 | * to it. Similarly, page lock is shifted. |
946 | */ | 946 | */ |
947 | if (hpage != p) { | 947 | if (hpage != p) { |
948 | put_page(hpage); | 948 | if (!(flags & MF_COUNT_INCREASED)) { |
949 | get_page(p); | 949 | put_page(hpage); |
950 | get_page(p); | ||
951 | } | ||
950 | lock_page(p); | 952 | lock_page(p); |
951 | unlock_page(hpage); | 953 | unlock_page(hpage); |
952 | *hpagep = p; | 954 | *hpagep = p; |
@@ -1649,7 +1651,7 @@ int soft_offline_page(struct page *page, int flags) | |||
1649 | { | 1651 | { |
1650 | int ret; | 1652 | int ret; |
1651 | unsigned long pfn = page_to_pfn(page); | 1653 | unsigned long pfn = page_to_pfn(page); |
1652 | struct page *hpage = compound_trans_head(page); | 1654 | struct page *hpage = compound_head(page); |
1653 | 1655 | ||
1654 | if (PageHWPoison(page)) { | 1656 | if (PageHWPoison(page)) { |
1655 | pr_info("soft offline: %#lx page already poisoned\n", pfn); | 1657 | pr_info("soft offline: %#lx page already poisoned\n", pfn); |
diff --git a/mm/memory.c b/mm/memory.c index be6a0c0d4ae0..22dfa617bddb 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3348,6 +3348,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3348 | if (ret & VM_FAULT_LOCKED) | 3348 | if (ret & VM_FAULT_LOCKED) |
3349 | unlock_page(vmf.page); | 3349 | unlock_page(vmf.page); |
3350 | ret = VM_FAULT_HWPOISON; | 3350 | ret = VM_FAULT_HWPOISON; |
3351 | page_cache_release(vmf.page); | ||
3351 | goto uncharge_out; | 3352 | goto uncharge_out; |
3352 | } | 3353 | } |
3353 | 3354 | ||
@@ -3703,7 +3704,6 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3703 | if (unlikely(is_vm_hugetlb_page(vma))) | 3704 | if (unlikely(is_vm_hugetlb_page(vma))) |
3704 | return hugetlb_fault(mm, vma, address, flags); | 3705 | return hugetlb_fault(mm, vma, address, flags); |
3705 | 3706 | ||
3706 | retry: | ||
3707 | pgd = pgd_offset(mm, address); | 3707 | pgd = pgd_offset(mm, address); |
3708 | pud = pud_alloc(mm, pgd, address); | 3708 | pud = pud_alloc(mm, pgd, address); |
3709 | if (!pud) | 3709 | if (!pud) |
@@ -3741,20 +3741,13 @@ retry: | |||
3741 | if (dirty && !pmd_write(orig_pmd)) { | 3741 | if (dirty && !pmd_write(orig_pmd)) { |
3742 | ret = do_huge_pmd_wp_page(mm, vma, address, pmd, | 3742 | ret = do_huge_pmd_wp_page(mm, vma, address, pmd, |
3743 | orig_pmd); | 3743 | orig_pmd); |
3744 | /* | 3744 | if (!(ret & VM_FAULT_FALLBACK)) |
3745 | * If COW results in an oom, the huge pmd will | 3745 | return ret; |
3746 | * have been split, so retry the fault on the | ||
3747 | * pte for a smaller charge. | ||
3748 | */ | ||
3749 | if (unlikely(ret & VM_FAULT_OOM)) | ||
3750 | goto retry; | ||
3751 | return ret; | ||
3752 | } else { | 3746 | } else { |
3753 | huge_pmd_set_accessed(mm, vma, address, pmd, | 3747 | huge_pmd_set_accessed(mm, vma, address, pmd, |
3754 | orig_pmd, dirty); | 3748 | orig_pmd, dirty); |
3749 | return 0; | ||
3755 | } | 3750 | } |
3756 | |||
3757 | return 0; | ||
3758 | } | 3751 | } |
3759 | } | 3752 | } |
3760 | 3753 | ||
diff --git a/mm/mprotect.c b/mm/mprotect.c index 7332c1785744..769a67a15803 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -58,36 +58,27 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
58 | if (pte_numa(ptent)) | 58 | if (pte_numa(ptent)) |
59 | ptent = pte_mknonnuma(ptent); | 59 | ptent = pte_mknonnuma(ptent); |
60 | ptent = pte_modify(ptent, newprot); | 60 | ptent = pte_modify(ptent, newprot); |
61 | /* | ||
62 | * Avoid taking write faults for pages we | ||
63 | * know to be dirty. | ||
64 | */ | ||
65 | if (dirty_accountable && pte_dirty(ptent)) | ||
66 | ptent = pte_mkwrite(ptent); | ||
67 | ptep_modify_prot_commit(mm, addr, pte, ptent); | ||
61 | updated = true; | 68 | updated = true; |
62 | } else { | 69 | } else { |
63 | struct page *page; | 70 | struct page *page; |
64 | 71 | ||
65 | ptent = *pte; | ||
66 | page = vm_normal_page(vma, addr, oldpte); | 72 | page = vm_normal_page(vma, addr, oldpte); |
67 | if (page && !PageKsm(page)) { | 73 | if (page && !PageKsm(page)) { |
68 | if (!pte_numa(oldpte)) { | 74 | if (!pte_numa(oldpte)) { |
69 | ptent = pte_mknuma(ptent); | 75 | ptep_set_numa(mm, addr, pte); |
70 | set_pte_at(mm, addr, pte, ptent); | ||
71 | updated = true; | 76 | updated = true; |
72 | } | 77 | } |
73 | } | 78 | } |
74 | } | 79 | } |
75 | |||
76 | /* | ||
77 | * Avoid taking write faults for pages we know to be | ||
78 | * dirty. | ||
79 | */ | ||
80 | if (dirty_accountable && pte_dirty(ptent)) { | ||
81 | ptent = pte_mkwrite(ptent); | ||
82 | updated = true; | ||
83 | } | ||
84 | |||
85 | if (updated) | 80 | if (updated) |
86 | pages++; | 81 | pages++; |
87 | |||
88 | /* Only !prot_numa always clears the pte */ | ||
89 | if (!prot_numa) | ||
90 | ptep_modify_prot_commit(mm, addr, pte, ptent); | ||
91 | } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { | 82 | } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { |
92 | swp_entry_t entry = pte_to_swp_entry(oldpte); | 83 | swp_entry_t entry = pte_to_swp_entry(oldpte); |
93 | 84 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2d30e2cfe804..7106cb1aca8e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -2173,11 +2173,12 @@ int __set_page_dirty_nobuffers(struct page *page) | |||
2173 | if (!TestSetPageDirty(page)) { | 2173 | if (!TestSetPageDirty(page)) { |
2174 | struct address_space *mapping = page_mapping(page); | 2174 | struct address_space *mapping = page_mapping(page); |
2175 | struct address_space *mapping2; | 2175 | struct address_space *mapping2; |
2176 | unsigned long flags; | ||
2176 | 2177 | ||
2177 | if (!mapping) | 2178 | if (!mapping) |
2178 | return 1; | 2179 | return 1; |
2179 | 2180 | ||
2180 | spin_lock_irq(&mapping->tree_lock); | 2181 | spin_lock_irqsave(&mapping->tree_lock, flags); |
2181 | mapping2 = page_mapping(page); | 2182 | mapping2 = page_mapping(page); |
2182 | if (mapping2) { /* Race with truncate? */ | 2183 | if (mapping2) { /* Race with truncate? */ |
2183 | BUG_ON(mapping2 != mapping); | 2184 | BUG_ON(mapping2 != mapping); |
@@ -2186,7 +2187,7 @@ int __set_page_dirty_nobuffers(struct page *page) | |||
2186 | radix_tree_tag_set(&mapping->page_tree, | 2187 | radix_tree_tag_set(&mapping->page_tree, |
2187 | page_index(page), PAGECACHE_TAG_DIRTY); | 2188 | page_index(page), PAGECACHE_TAG_DIRTY); |
2188 | } | 2189 | } |
2189 | spin_unlock_irq(&mapping->tree_lock); | 2190 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
2190 | if (mapping->host) { | 2191 | if (mapping->host) { |
2191 | /* !PageAnon && !swapper_space */ | 2192 | /* !PageAnon && !swapper_space */ |
2192 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 2193 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e3758a09a009..3bac76ae4b30 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -369,9 +369,11 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
369 | __SetPageHead(page); | 369 | __SetPageHead(page); |
370 | for (i = 1; i < nr_pages; i++) { | 370 | for (i = 1; i < nr_pages; i++) { |
371 | struct page *p = page + i; | 371 | struct page *p = page + i; |
372 | __SetPageTail(p); | ||
373 | set_page_count(p, 0); | 372 | set_page_count(p, 0); |
374 | p->first_page = page; | 373 | p->first_page = page; |
374 | /* Make sure p->first_page is always valid for PageTail() */ | ||
375 | smp_wmb(); | ||
376 | __SetPageTail(p); | ||
375 | } | 377 | } |
376 | } | 378 | } |
377 | 379 | ||
@@ -1236,6 +1238,15 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |||
1236 | } | 1238 | } |
1237 | local_irq_restore(flags); | 1239 | local_irq_restore(flags); |
1238 | } | 1240 | } |
1241 | static bool gfp_thisnode_allocation(gfp_t gfp_mask) | ||
1242 | { | ||
1243 | return (gfp_mask & GFP_THISNODE) == GFP_THISNODE; | ||
1244 | } | ||
1245 | #else | ||
1246 | static bool gfp_thisnode_allocation(gfp_t gfp_mask) | ||
1247 | { | ||
1248 | return false; | ||
1249 | } | ||
1239 | #endif | 1250 | #endif |
1240 | 1251 | ||
1241 | /* | 1252 | /* |
@@ -1572,7 +1583,13 @@ again: | |||
1572 | get_pageblock_migratetype(page)); | 1583 | get_pageblock_migratetype(page)); |
1573 | } | 1584 | } |
1574 | 1585 | ||
1575 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); | 1586 | /* |
1587 | * NOTE: GFP_THISNODE allocations do not partake in the kswapd | ||
1588 | * aging protocol, so they can't be fair. | ||
1589 | */ | ||
1590 | if (!gfp_thisnode_allocation(gfp_flags)) | ||
1591 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); | ||
1592 | |||
1576 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1593 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
1577 | zone_statistics(preferred_zone, zone, gfp_flags); | 1594 | zone_statistics(preferred_zone, zone, gfp_flags); |
1578 | local_irq_restore(flags); | 1595 | local_irq_restore(flags); |
@@ -1944,8 +1961,12 @@ zonelist_scan: | |||
1944 | * ultimately fall back to remote zones that do not | 1961 | * ultimately fall back to remote zones that do not |
1945 | * partake in the fairness round-robin cycle of this | 1962 | * partake in the fairness round-robin cycle of this |
1946 | * zonelist. | 1963 | * zonelist. |
1964 | * | ||
1965 | * NOTE: GFP_THISNODE allocations do not partake in | ||
1966 | * the kswapd aging protocol, so they can't be fair. | ||
1947 | */ | 1967 | */ |
1948 | if (alloc_flags & ALLOC_WMARK_LOW) { | 1968 | if ((alloc_flags & ALLOC_WMARK_LOW) && |
1969 | !gfp_thisnode_allocation(gfp_mask)) { | ||
1949 | if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) | 1970 | if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) |
1950 | continue; | 1971 | continue; |
1951 | if (!zone_local(preferred_zone, zone)) | 1972 | if (!zone_local(preferred_zone, zone)) |
@@ -2501,8 +2522,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2501 | * allowed per node queues are empty and that nodes are | 2522 | * allowed per node queues are empty and that nodes are |
2502 | * over allocated. | 2523 | * over allocated. |
2503 | */ | 2524 | */ |
2504 | if (IS_ENABLED(CONFIG_NUMA) && | 2525 | if (gfp_thisnode_allocation(gfp_mask)) |
2505 | (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | ||
2506 | goto nopage; | 2526 | goto nopage; |
2507 | 2527 | ||
2508 | restart: | 2528 | restart: |
@@ -1004,21 +1004,19 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) | |||
1004 | static void add_full(struct kmem_cache *s, | 1004 | static void add_full(struct kmem_cache *s, |
1005 | struct kmem_cache_node *n, struct page *page) | 1005 | struct kmem_cache_node *n, struct page *page) |
1006 | { | 1006 | { |
1007 | lockdep_assert_held(&n->list_lock); | ||
1008 | |||
1009 | if (!(s->flags & SLAB_STORE_USER)) | 1007 | if (!(s->flags & SLAB_STORE_USER)) |
1010 | return; | 1008 | return; |
1011 | 1009 | ||
1010 | lockdep_assert_held(&n->list_lock); | ||
1012 | list_add(&page->lru, &n->full); | 1011 | list_add(&page->lru, &n->full); |
1013 | } | 1012 | } |
1014 | 1013 | ||
1015 | static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) | 1014 | static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) |
1016 | { | 1015 | { |
1017 | lockdep_assert_held(&n->list_lock); | ||
1018 | |||
1019 | if (!(s->flags & SLAB_STORE_USER)) | 1016 | if (!(s->flags & SLAB_STORE_USER)) |
1020 | return; | 1017 | return; |
1021 | 1018 | ||
1019 | lockdep_assert_held(&n->list_lock); | ||
1022 | list_del(&page->lru); | 1020 | list_del(&page->lru); |
1023 | } | 1021 | } |
1024 | 1022 | ||
@@ -1520,11 +1518,9 @@ static void discard_slab(struct kmem_cache *s, struct page *page) | |||
1520 | /* | 1518 | /* |
1521 | * Management of partially allocated slabs. | 1519 | * Management of partially allocated slabs. |
1522 | */ | 1520 | */ |
1523 | static inline void add_partial(struct kmem_cache_node *n, | 1521 | static inline void |
1524 | struct page *page, int tail) | 1522 | __add_partial(struct kmem_cache_node *n, struct page *page, int tail) |
1525 | { | 1523 | { |
1526 | lockdep_assert_held(&n->list_lock); | ||
1527 | |||
1528 | n->nr_partial++; | 1524 | n->nr_partial++; |
1529 | if (tail == DEACTIVATE_TO_TAIL) | 1525 | if (tail == DEACTIVATE_TO_TAIL) |
1530 | list_add_tail(&page->lru, &n->partial); | 1526 | list_add_tail(&page->lru, &n->partial); |
@@ -1532,15 +1528,27 @@ static inline void add_partial(struct kmem_cache_node *n, | |||
1532 | list_add(&page->lru, &n->partial); | 1528 | list_add(&page->lru, &n->partial); |
1533 | } | 1529 | } |
1534 | 1530 | ||
1535 | static inline void remove_partial(struct kmem_cache_node *n, | 1531 | static inline void add_partial(struct kmem_cache_node *n, |
1536 | struct page *page) | 1532 | struct page *page, int tail) |
1537 | { | 1533 | { |
1538 | lockdep_assert_held(&n->list_lock); | 1534 | lockdep_assert_held(&n->list_lock); |
1535 | __add_partial(n, page, tail); | ||
1536 | } | ||
1539 | 1537 | ||
1538 | static inline void | ||
1539 | __remove_partial(struct kmem_cache_node *n, struct page *page) | ||
1540 | { | ||
1540 | list_del(&page->lru); | 1541 | list_del(&page->lru); |
1541 | n->nr_partial--; | 1542 | n->nr_partial--; |
1542 | } | 1543 | } |
1543 | 1544 | ||
1545 | static inline void remove_partial(struct kmem_cache_node *n, | ||
1546 | struct page *page) | ||
1547 | { | ||
1548 | lockdep_assert_held(&n->list_lock); | ||
1549 | __remove_partial(n, page); | ||
1550 | } | ||
1551 | |||
1544 | /* | 1552 | /* |
1545 | * Remove slab from the partial list, freeze it and | 1553 | * Remove slab from the partial list, freeze it and |
1546 | * return the pointer to the freelist. | 1554 | * return the pointer to the freelist. |
@@ -2906,12 +2914,10 @@ static void early_kmem_cache_node_alloc(int node) | |||
2906 | inc_slabs_node(kmem_cache_node, node, page->objects); | 2914 | inc_slabs_node(kmem_cache_node, node, page->objects); |
2907 | 2915 | ||
2908 | /* | 2916 | /* |
2909 | * the lock is for lockdep's sake, not for any actual | 2917 | * No locks need to be taken here as it has just been |
2910 | * race protection | 2918 | * initialized and there is no concurrent access. |
2911 | */ | 2919 | */ |
2912 | spin_lock(&n->list_lock); | 2920 | __add_partial(n, page, DEACTIVATE_TO_HEAD); |
2913 | add_partial(n, page, DEACTIVATE_TO_HEAD); | ||
2914 | spin_unlock(&n->list_lock); | ||
2915 | } | 2921 | } |
2916 | 2922 | ||
2917 | static void free_kmem_cache_nodes(struct kmem_cache *s) | 2923 | static void free_kmem_cache_nodes(struct kmem_cache *s) |
@@ -3197,7 +3203,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) | |||
3197 | 3203 | ||
3198 | list_for_each_entry_safe(page, h, &n->partial, lru) { | 3204 | list_for_each_entry_safe(page, h, &n->partial, lru) { |
3199 | if (!page->inuse) { | 3205 | if (!page->inuse) { |
3200 | remove_partial(n, page); | 3206 | __remove_partial(n, page); |
3201 | discard_slab(s, page); | 3207 | discard_slab(s, page); |
3202 | } else { | 3208 | } else { |
3203 | list_slab_objects(s, page, | 3209 | list_slab_objects(s, page, |
@@ -98,7 +98,7 @@ static void put_compound_page(struct page *page) | |||
98 | } | 98 | } |
99 | 99 | ||
100 | /* __split_huge_page_refcount can run under us */ | 100 | /* __split_huge_page_refcount can run under us */ |
101 | page_head = compound_trans_head(page); | 101 | page_head = compound_head(page); |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * THP can not break up slab pages so avoid taking | 104 | * THP can not break up slab pages so avoid taking |
@@ -253,7 +253,7 @@ bool __get_page_tail(struct page *page) | |||
253 | */ | 253 | */ |
254 | unsigned long flags; | 254 | unsigned long flags; |
255 | bool got; | 255 | bool got; |
256 | struct page *page_head = compound_trans_head(page); | 256 | struct page *page_head = compound_head(page); |
257 | 257 | ||
258 | /* Ref to put_compound_page() comment. */ | 258 | /* Ref to put_compound_page() comment. */ |
259 | if (!__compound_tail_refcounted(page_head)) { | 259 | if (!__compound_tail_refcounted(page_head)) { |
diff --git a/mm/swap_state.c b/mm/swap_state.c index 98e85e9c2b2d..e76ace30d436 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void) | |||
63 | return ret; | 63 | return ret; |
64 | } | 64 | } |
65 | 65 | ||
66 | static atomic_t swapin_readahead_hits = ATOMIC_INIT(4); | ||
67 | |||
66 | void show_swap_cache_info(void) | 68 | void show_swap_cache_info(void) |
67 | { | 69 | { |
68 | printk("%lu pages in swap cache\n", total_swapcache_pages()); | 70 | printk("%lu pages in swap cache\n", total_swapcache_pages()); |
@@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry) | |||
286 | 288 | ||
287 | page = find_get_page(swap_address_space(entry), entry.val); | 289 | page = find_get_page(swap_address_space(entry), entry.val); |
288 | 290 | ||
289 | if (page) | 291 | if (page) { |
290 | INC_CACHE_INFO(find_success); | 292 | INC_CACHE_INFO(find_success); |
293 | if (TestClearPageReadahead(page)) | ||
294 | atomic_inc(&swapin_readahead_hits); | ||
295 | } | ||
291 | 296 | ||
292 | INC_CACHE_INFO(find_total); | 297 | INC_CACHE_INFO(find_total); |
293 | return page; | 298 | return page; |
@@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, | |||
389 | return found_page; | 394 | return found_page; |
390 | } | 395 | } |
391 | 396 | ||
397 | static unsigned long swapin_nr_pages(unsigned long offset) | ||
398 | { | ||
399 | static unsigned long prev_offset; | ||
400 | unsigned int pages, max_pages, last_ra; | ||
401 | static atomic_t last_readahead_pages; | ||
402 | |||
403 | max_pages = 1 << ACCESS_ONCE(page_cluster); | ||
404 | if (max_pages <= 1) | ||
405 | return 1; | ||
406 | |||
407 | /* | ||
408 | * This heuristic has been found to work well on both sequential and | ||
409 | * random loads, swapping to hard disk or to SSD: please don't ask | ||
410 | * what the "+ 2" means, it just happens to work well, that's all. | ||
411 | */ | ||
412 | pages = atomic_xchg(&swapin_readahead_hits, 0) + 2; | ||
413 | if (pages == 2) { | ||
414 | /* | ||
415 | * We can have no readahead hits to judge by: but must not get | ||
416 | * stuck here forever, so check for an adjacent offset instead | ||
417 | * (and don't even bother to check whether swap type is same). | ||
418 | */ | ||
419 | if (offset != prev_offset + 1 && offset != prev_offset - 1) | ||
420 | pages = 1; | ||
421 | prev_offset = offset; | ||
422 | } else { | ||
423 | unsigned int roundup = 4; | ||
424 | while (roundup < pages) | ||
425 | roundup <<= 1; | ||
426 | pages = roundup; | ||
427 | } | ||
428 | |||
429 | if (pages > max_pages) | ||
430 | pages = max_pages; | ||
431 | |||
432 | /* Don't shrink readahead too fast */ | ||
433 | last_ra = atomic_read(&last_readahead_pages) / 2; | ||
434 | if (pages < last_ra) | ||
435 | pages = last_ra; | ||
436 | atomic_set(&last_readahead_pages, pages); | ||
437 | |||
438 | return pages; | ||
439 | } | ||
440 | |||
392 | /** | 441 | /** |
393 | * swapin_readahead - swap in pages in hope we need them soon | 442 | * swapin_readahead - swap in pages in hope we need them soon |
394 | * @entry: swap entry of this memory | 443 | * @entry: swap entry of this memory |
@@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | |||
412 | struct vm_area_struct *vma, unsigned long addr) | 461 | struct vm_area_struct *vma, unsigned long addr) |
413 | { | 462 | { |
414 | struct page *page; | 463 | struct page *page; |
415 | unsigned long offset = swp_offset(entry); | 464 | unsigned long entry_offset = swp_offset(entry); |
465 | unsigned long offset = entry_offset; | ||
416 | unsigned long start_offset, end_offset; | 466 | unsigned long start_offset, end_offset; |
417 | unsigned long mask = (1UL << page_cluster) - 1; | 467 | unsigned long mask; |
418 | struct blk_plug plug; | 468 | struct blk_plug plug; |
419 | 469 | ||
470 | mask = swapin_nr_pages(offset) - 1; | ||
471 | if (!mask) | ||
472 | goto skip; | ||
473 | |||
420 | /* Read a page_cluster sized and aligned cluster around offset. */ | 474 | /* Read a page_cluster sized and aligned cluster around offset. */ |
421 | start_offset = offset & ~mask; | 475 | start_offset = offset & ~mask; |
422 | end_offset = offset | mask; | 476 | end_offset = offset | mask; |
@@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | |||
430 | gfp_mask, vma, addr); | 484 | gfp_mask, vma, addr); |
431 | if (!page) | 485 | if (!page) |
432 | continue; | 486 | continue; |
487 | if (offset != entry_offset) | ||
488 | SetPageReadahead(page); | ||
433 | page_cache_release(page); | 489 | page_cache_release(page); |
434 | } | 490 | } |
435 | blk_finish_plug(&plug); | 491 | blk_finish_plug(&plug); |
436 | 492 | ||
437 | lru_add_drain(); /* Push any new pages onto the LRU now */ | 493 | lru_add_drain(); /* Push any new pages onto the LRU now */ |
494 | skip: | ||
438 | return read_swap_cache_async(entry, gfp_mask, vma, addr); | 495 | return read_swap_cache_async(entry, gfp_mask, vma, addr); |
439 | } | 496 | } |
diff --git a/mm/swapfile.c b/mm/swapfile.c index c6c13b050a58..4a7f7e6992b6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1923,7 +1923,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1923 | p->swap_map = NULL; | 1923 | p->swap_map = NULL; |
1924 | cluster_info = p->cluster_info; | 1924 | cluster_info = p->cluster_info; |
1925 | p->cluster_info = NULL; | 1925 | p->cluster_info = NULL; |
1926 | p->flags = 0; | ||
1927 | frontswap_map = frontswap_map_get(p); | 1926 | frontswap_map = frontswap_map_get(p); |
1928 | spin_unlock(&p->lock); | 1927 | spin_unlock(&p->lock); |
1929 | spin_unlock(&swap_lock); | 1928 | spin_unlock(&swap_lock); |
@@ -1949,6 +1948,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1949 | mutex_unlock(&inode->i_mutex); | 1948 | mutex_unlock(&inode->i_mutex); |
1950 | } | 1949 | } |
1951 | filp_close(swap_file, NULL); | 1950 | filp_close(swap_file, NULL); |
1951 | |||
1952 | /* | ||
1953 | * Clear the SWP_USED flag after all resources are freed so that swapon | ||
1954 | * can reuse this swap_info in alloc_swap_info() safely. It is ok to | ||
1955 | * not hold p->lock after we cleared its SWP_WRITEOK. | ||
1956 | */ | ||
1957 | spin_lock(&swap_lock); | ||
1958 | p->flags = 0; | ||
1959 | spin_unlock(&swap_lock); | ||
1960 | |||
1952 | err = 0; | 1961 | err = 0; |
1953 | atomic_inc(&proc_poll_event); | 1962 | atomic_inc(&proc_poll_event); |
1954 | wake_up_interruptible(&proc_poll_wait); | 1963 | wake_up_interruptible(&proc_poll_wait); |
diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 196970a4541f..d4042e75f7c7 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
20 | #include <linux/vmstat.h> | 20 | #include <linux/vmstat.h> |
21 | #include <linux/eventfd.h> | 21 | #include <linux/eventfd.h> |
22 | #include <linux/slab.h> | ||
22 | #include <linux/swap.h> | 23 | #include <linux/swap.h> |
23 | #include <linux/printk.h> | 24 | #include <linux/printk.h> |
24 | #include <linux/vmpressure.h> | 25 | #include <linux/vmpressure.h> |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 72496140ac08..def5dd2fbe61 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -851,12 +851,14 @@ const char * const vmstat_text[] = { | |||
851 | "thp_zero_page_alloc", | 851 | "thp_zero_page_alloc", |
852 | "thp_zero_page_alloc_failed", | 852 | "thp_zero_page_alloc_failed", |
853 | #endif | 853 | #endif |
854 | #ifdef CONFIG_DEBUG_TLBFLUSH | ||
854 | #ifdef CONFIG_SMP | 855 | #ifdef CONFIG_SMP |
855 | "nr_tlb_remote_flush", | 856 | "nr_tlb_remote_flush", |
856 | "nr_tlb_remote_flush_received", | 857 | "nr_tlb_remote_flush_received", |
857 | #endif | 858 | #endif /* CONFIG_SMP */ |
858 | "nr_tlb_local_flush_all", | 859 | "nr_tlb_local_flush_all", |
859 | "nr_tlb_local_flush_one", | 860 | "nr_tlb_local_flush_one", |
861 | #endif /* CONFIG_DEBUG_TLBFLUSH */ | ||
860 | 862 | ||
861 | #endif /* CONFIG_VM_EVENTS_COUNTERS */ | 863 | #endif /* CONFIG_VM_EVENTS_COUNTERS */ |
862 | }; | 864 | }; |