diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/gup.c | 10 | ||||
-rw-r--r-- | mm/hugetlb.c | 23 | ||||
-rw-r--r-- | mm/memblock.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 28 | ||||
-rw-r--r-- | mm/shmem.c | 6 | ||||
-rw-r--r-- | mm/swapfile.c | 6 | ||||
-rw-r--r-- | mm/vmscan.c | 22 | ||||
-rw-r--r-- | mm/vmstat.c | 7 | ||||
-rw-r--r-- | mm/z3fold.c | 101 |
9 files changed, 127 insertions, 78 deletions
@@ -385,11 +385,17 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, | |||
385 | * @vma: vm_area_struct mapping @address | 385 | * @vma: vm_area_struct mapping @address |
386 | * @address: virtual address to look up | 386 | * @address: virtual address to look up |
387 | * @flags: flags modifying lookup behaviour | 387 | * @flags: flags modifying lookup behaviour |
388 | * @page_mask: on output, *page_mask is set according to the size of the page | 388 | * @ctx: contains dev_pagemap for %ZONE_DEVICE memory pinning and a |
389 | * pointer to output page_mask | ||
389 | * | 390 | * |
390 | * @flags can have FOLL_ flags set, defined in <linux/mm.h> | 391 | * @flags can have FOLL_ flags set, defined in <linux/mm.h> |
391 | * | 392 | * |
392 | * Returns the mapped (struct page *), %NULL if no mapping exists, or | 393 | * When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches |
394 | * the device's dev_pagemap metadata to avoid repeating expensive lookups. | ||
395 | * | ||
396 | * On output, the @ctx->page_mask is set according to the size of the page. | ||
397 | * | ||
398 | * Return: the mapped (struct page *), %NULL if no mapping exists, or | ||
393 | * an error pointer if there is a mapping to something not represented | 399 | * an error pointer if there is a mapping to something not represented |
394 | * by a page descriptor (see also vm_normal_page()). | 400 | * by a page descriptor (see also vm_normal_page()). |
395 | */ | 401 | */ |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c007fb5fb8d5..7f2a28ab46d5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -3233,7 +3233,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte) | |||
3233 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | 3233 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, |
3234 | struct vm_area_struct *vma) | 3234 | struct vm_area_struct *vma) |
3235 | { | 3235 | { |
3236 | pte_t *src_pte, *dst_pte, entry; | 3236 | pte_t *src_pte, *dst_pte, entry, dst_entry; |
3237 | struct page *ptepage; | 3237 | struct page *ptepage; |
3238 | unsigned long addr; | 3238 | unsigned long addr; |
3239 | int cow; | 3239 | int cow; |
@@ -3261,15 +3261,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
3261 | break; | 3261 | break; |
3262 | } | 3262 | } |
3263 | 3263 | ||
3264 | /* If the pagetables are shared don't copy or take references */ | 3264 | /* |
3265 | if (dst_pte == src_pte) | 3265 | * If the pagetables are shared don't copy or take references. |
3266 | * dst_pte == src_pte is the common case of src/dest sharing. | ||
3267 | * | ||
3268 | * However, src could have 'unshared' and dst shares with | ||
3269 | * another vma. If dst_pte !none, this implies sharing. | ||
3270 | * Check here before taking page table lock, and once again | ||
3271 | * after taking the lock below. | ||
3272 | */ | ||
3273 | dst_entry = huge_ptep_get(dst_pte); | ||
3274 | if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) | ||
3266 | continue; | 3275 | continue; |
3267 | 3276 | ||
3268 | dst_ptl = huge_pte_lock(h, dst, dst_pte); | 3277 | dst_ptl = huge_pte_lock(h, dst, dst_pte); |
3269 | src_ptl = huge_pte_lockptr(h, src, src_pte); | 3278 | src_ptl = huge_pte_lockptr(h, src, src_pte); |
3270 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); | 3279 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); |
3271 | entry = huge_ptep_get(src_pte); | 3280 | entry = huge_ptep_get(src_pte); |
3272 | if (huge_pte_none(entry)) { /* skip none entry */ | 3281 | dst_entry = huge_ptep_get(dst_pte); |
3282 | if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { | ||
3283 | /* | ||
3284 | * Skip if src entry none. Also, skip in the | ||
3285 | * unlikely case dst entry !none as this implies | ||
3286 | * sharing with another vma. | ||
3287 | */ | ||
3273 | ; | 3288 | ; |
3274 | } else if (unlikely(is_hugetlb_entry_migration(entry) || | 3289 | } else if (unlikely(is_hugetlb_entry_migration(entry) || |
3275 | is_hugetlb_entry_hwpoisoned(entry))) { | 3290 | is_hugetlb_entry_hwpoisoned(entry))) { |
diff --git a/mm/memblock.c b/mm/memblock.c index 7df468c8ebc8..9a2d5ae81ae1 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -1179,7 +1179,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, | |||
1179 | 1179 | ||
1180 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 1180 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
1181 | /* | 1181 | /* |
1182 | * Common iterator interface used to define for_each_mem_range(). | 1182 | * Common iterator interface used to define for_each_mem_pfn_range(). |
1183 | */ | 1183 | */ |
1184 | void __init_memblock __next_mem_pfn_range(int *idx, int nid, | 1184 | void __init_memblock __next_mem_pfn_range(int *idx, int nid, |
1185 | unsigned long *out_start_pfn, | 1185 | unsigned long *out_start_pfn, |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a919ba5cb3c8..6847177dc4a1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -4061,17 +4061,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
4061 | int reserve_flags; | 4061 | int reserve_flags; |
4062 | 4062 | ||
4063 | /* | 4063 | /* |
4064 | * In the slowpath, we sanity check order to avoid ever trying to | ||
4065 | * reclaim >= MAX_ORDER areas which will never succeed. Callers may | ||
4066 | * be using allocators in order of preference for an area that is | ||
4067 | * too large. | ||
4068 | */ | ||
4069 | if (order >= MAX_ORDER) { | ||
4070 | WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); | ||
4071 | return NULL; | ||
4072 | } | ||
4073 | |||
4074 | /* | ||
4075 | * We also sanity check to catch abuse of atomic reserves being used by | 4064 | * We also sanity check to catch abuse of atomic reserves being used by |
4076 | * callers that are not in atomic context. | 4065 | * callers that are not in atomic context. |
4077 | */ | 4066 | */ |
@@ -4364,6 +4353,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, | |||
4364 | gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ | 4353 | gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ |
4365 | struct alloc_context ac = { }; | 4354 | struct alloc_context ac = { }; |
4366 | 4355 | ||
4356 | /* | ||
4357 | * There are several places where we assume that the order value is sane | ||
4358 | * so bail out early if the request is out of bound. | ||
4359 | */ | ||
4360 | if (unlikely(order >= MAX_ORDER)) { | ||
4361 | WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); | ||
4362 | return NULL; | ||
4363 | } | ||
4364 | |||
4367 | gfp_mask &= gfp_allowed_mask; | 4365 | gfp_mask &= gfp_allowed_mask; |
4368 | alloc_mask = gfp_mask; | 4366 | alloc_mask = gfp_mask; |
4369 | if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) | 4367 | if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) |
@@ -7789,6 +7787,14 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | |||
7789 | goto unmovable; | 7787 | goto unmovable; |
7790 | 7788 | ||
7791 | /* | 7789 | /* |
7790 | * If the zone is movable and we have ruled out all reserved | ||
7791 | * pages then it should be reasonably safe to assume the rest | ||
7792 | * is movable. | ||
7793 | */ | ||
7794 | if (zone_idx(zone) == ZONE_MOVABLE) | ||
7795 | continue; | ||
7796 | |||
7797 | /* | ||
7792 | * Hugepages are not in LRU lists, but they're movable. | 7798 | * Hugepages are not in LRU lists, but they're movable. |
7793 | * We need not scan over tail pages bacause we don't | 7799 | * We need not scan over tail pages bacause we don't |
7794 | * handle each tail page individually in migration. | 7800 | * handle each tail page individually in migration. |
diff --git a/mm/shmem.c b/mm/shmem.c index ea26d7a0342d..0e10b06fc7d6 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -756,7 +756,7 @@ void shmem_unlock_mapping(struct address_space *mapping) | |||
756 | break; | 756 | break; |
757 | index = indices[pvec.nr - 1] + 1; | 757 | index = indices[pvec.nr - 1] + 1; |
758 | pagevec_remove_exceptionals(&pvec); | 758 | pagevec_remove_exceptionals(&pvec); |
759 | check_move_unevictable_pages(pvec.pages, pvec.nr); | 759 | check_move_unevictable_pages(&pvec); |
760 | pagevec_release(&pvec); | 760 | pagevec_release(&pvec); |
761 | cond_resched(); | 761 | cond_resched(); |
762 | } | 762 | } |
@@ -2563,9 +2563,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) | |||
2563 | inode_lock(inode); | 2563 | inode_lock(inode); |
2564 | /* We're holding i_mutex so we can access i_size directly */ | 2564 | /* We're holding i_mutex so we can access i_size directly */ |
2565 | 2565 | ||
2566 | if (offset < 0) | 2566 | if (offset < 0 || offset >= inode->i_size) |
2567 | offset = -EINVAL; | ||
2568 | else if (offset >= inode->i_size) | ||
2569 | offset = -ENXIO; | 2567 | offset = -ENXIO; |
2570 | else { | 2568 | else { |
2571 | start = offset >> PAGE_SHIFT; | 2569 | start = offset >> PAGE_SHIFT; |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 644f746e167a..8688ae65ef58 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -2813,7 +2813,7 @@ static struct swap_info_struct *alloc_swap_info(void) | |||
2813 | unsigned int type; | 2813 | unsigned int type; |
2814 | int i; | 2814 | int i; |
2815 | 2815 | ||
2816 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 2816 | p = kvzalloc(sizeof(*p), GFP_KERNEL); |
2817 | if (!p) | 2817 | if (!p) |
2818 | return ERR_PTR(-ENOMEM); | 2818 | return ERR_PTR(-ENOMEM); |
2819 | 2819 | ||
@@ -2824,7 +2824,7 @@ static struct swap_info_struct *alloc_swap_info(void) | |||
2824 | } | 2824 | } |
2825 | if (type >= MAX_SWAPFILES) { | 2825 | if (type >= MAX_SWAPFILES) { |
2826 | spin_unlock(&swap_lock); | 2826 | spin_unlock(&swap_lock); |
2827 | kfree(p); | 2827 | kvfree(p); |
2828 | return ERR_PTR(-EPERM); | 2828 | return ERR_PTR(-EPERM); |
2829 | } | 2829 | } |
2830 | if (type >= nr_swapfiles) { | 2830 | if (type >= nr_swapfiles) { |
@@ -2838,7 +2838,7 @@ static struct swap_info_struct *alloc_swap_info(void) | |||
2838 | smp_wmb(); | 2838 | smp_wmb(); |
2839 | nr_swapfiles++; | 2839 | nr_swapfiles++; |
2840 | } else { | 2840 | } else { |
2841 | kfree(p); | 2841 | kvfree(p); |
2842 | p = swap_info[type]; | 2842 | p = swap_info[type]; |
2843 | /* | 2843 | /* |
2844 | * Do not memset this entry: a racing procfs swap_next() | 2844 | * Do not memset this entry: a racing procfs swap_next() |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 62ac0c488624..24ab1f7394ab 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/delayacct.h> | 46 | #include <linux/delayacct.h> |
47 | #include <linux/sysctl.h> | 47 | #include <linux/sysctl.h> |
48 | #include <linux/oom.h> | 48 | #include <linux/oom.h> |
49 | #include <linux/pagevec.h> | ||
49 | #include <linux/prefetch.h> | 50 | #include <linux/prefetch.h> |
50 | #include <linux/printk.h> | 51 | #include <linux/printk.h> |
51 | #include <linux/dax.h> | 52 | #include <linux/dax.h> |
@@ -4182,17 +4183,16 @@ int page_evictable(struct page *page) | |||
4182 | return ret; | 4183 | return ret; |
4183 | } | 4184 | } |
4184 | 4185 | ||
4185 | #ifdef CONFIG_SHMEM | ||
4186 | /** | 4186 | /** |
4187 | * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list | 4187 | * check_move_unevictable_pages - check pages for evictability and move to |
4188 | * @pages: array of pages to check | 4188 | * appropriate zone lru list |
4189 | * @nr_pages: number of pages to check | 4189 | * @pvec: pagevec with lru pages to check |
4190 | * | 4190 | * |
4191 | * Checks pages for evictability and moves them to the appropriate lru list. | 4191 | * Checks pages for evictability, if an evictable page is in the unevictable |
4192 | * | 4192 | * lru list, moves it to the appropriate evictable lru list. This function |
4193 | * This function is only used for SysV IPC SHM_UNLOCK. | 4193 | * should be only used for lru pages. |
4194 | */ | 4194 | */ |
4195 | void check_move_unevictable_pages(struct page **pages, int nr_pages) | 4195 | void check_move_unevictable_pages(struct pagevec *pvec) |
4196 | { | 4196 | { |
4197 | struct lruvec *lruvec; | 4197 | struct lruvec *lruvec; |
4198 | struct pglist_data *pgdat = NULL; | 4198 | struct pglist_data *pgdat = NULL; |
@@ -4200,8 +4200,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) | |||
4200 | int pgrescued = 0; | 4200 | int pgrescued = 0; |
4201 | int i; | 4201 | int i; |
4202 | 4202 | ||
4203 | for (i = 0; i < nr_pages; i++) { | 4203 | for (i = 0; i < pvec->nr; i++) { |
4204 | struct page *page = pages[i]; | 4204 | struct page *page = pvec->pages[i]; |
4205 | struct pglist_data *pagepgdat = page_pgdat(page); | 4205 | struct pglist_data *pagepgdat = page_pgdat(page); |
4206 | 4206 | ||
4207 | pgscanned++; | 4207 | pgscanned++; |
@@ -4233,4 +4233,4 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) | |||
4233 | spin_unlock_irq(&pgdat->lru_lock); | 4233 | spin_unlock_irq(&pgdat->lru_lock); |
4234 | } | 4234 | } |
4235 | } | 4235 | } |
4236 | #endif /* CONFIG_SHMEM */ | 4236 | EXPORT_SYMBOL_GPL(check_move_unevictable_pages); |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 6038ce593ce3..9c624595e904 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -1827,12 +1827,13 @@ static bool need_update(int cpu) | |||
1827 | 1827 | ||
1828 | /* | 1828 | /* |
1829 | * The fast way of checking if there are any vmstat diffs. | 1829 | * The fast way of checking if there are any vmstat diffs. |
1830 | * This works because the diffs are byte sized items. | ||
1831 | */ | 1830 | */ |
1832 | if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS)) | 1831 | if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * |
1832 | sizeof(p->vm_stat_diff[0]))) | ||
1833 | return true; | 1833 | return true; |
1834 | #ifdef CONFIG_NUMA | 1834 | #ifdef CONFIG_NUMA |
1835 | if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS)) | 1835 | if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS * |
1836 | sizeof(p->vm_numa_stat_diff[0]))) | ||
1836 | return true; | 1837 | return true; |
1837 | #endif | 1838 | #endif |
1838 | } | 1839 | } |
diff --git a/mm/z3fold.c b/mm/z3fold.c index 4b366d181f35..aee9b0b8d907 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c | |||
@@ -99,6 +99,7 @@ struct z3fold_header { | |||
99 | #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) | 99 | #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) |
100 | 100 | ||
101 | #define BUDDY_MASK (0x3) | 101 | #define BUDDY_MASK (0x3) |
102 | #define BUDDY_SHIFT 2 | ||
102 | 103 | ||
103 | /** | 104 | /** |
104 | * struct z3fold_pool - stores metadata for each z3fold pool | 105 | * struct z3fold_pool - stores metadata for each z3fold pool |
@@ -145,7 +146,7 @@ enum z3fold_page_flags { | |||
145 | MIDDLE_CHUNK_MAPPED, | 146 | MIDDLE_CHUNK_MAPPED, |
146 | NEEDS_COMPACTING, | 147 | NEEDS_COMPACTING, |
147 | PAGE_STALE, | 148 | PAGE_STALE, |
148 | UNDER_RECLAIM | 149 | PAGE_CLAIMED, /* by either reclaim or free */ |
149 | }; | 150 | }; |
150 | 151 | ||
151 | /***************** | 152 | /***************** |
@@ -174,7 +175,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, | |||
174 | clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); | 175 | clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); |
175 | clear_bit(NEEDS_COMPACTING, &page->private); | 176 | clear_bit(NEEDS_COMPACTING, &page->private); |
176 | clear_bit(PAGE_STALE, &page->private); | 177 | clear_bit(PAGE_STALE, &page->private); |
177 | clear_bit(UNDER_RECLAIM, &page->private); | 178 | clear_bit(PAGE_CLAIMED, &page->private); |
178 | 179 | ||
179 | spin_lock_init(&zhdr->page_lock); | 180 | spin_lock_init(&zhdr->page_lock); |
180 | kref_init(&zhdr->refcount); | 181 | kref_init(&zhdr->refcount); |
@@ -223,8 +224,11 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) | |||
223 | unsigned long handle; | 224 | unsigned long handle; |
224 | 225 | ||
225 | handle = (unsigned long)zhdr; | 226 | handle = (unsigned long)zhdr; |
226 | if (bud != HEADLESS) | 227 | if (bud != HEADLESS) { |
227 | handle += (bud + zhdr->first_num) & BUDDY_MASK; | 228 | handle |= (bud + zhdr->first_num) & BUDDY_MASK; |
229 | if (bud == LAST) | ||
230 | handle |= (zhdr->last_chunks << BUDDY_SHIFT); | ||
231 | } | ||
228 | return handle; | 232 | return handle; |
229 | } | 233 | } |
230 | 234 | ||
@@ -234,6 +238,12 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle) | |||
234 | return (struct z3fold_header *)(handle & PAGE_MASK); | 238 | return (struct z3fold_header *)(handle & PAGE_MASK); |
235 | } | 239 | } |
236 | 240 | ||
241 | /* only for LAST bud, returns zero otherwise */ | ||
242 | static unsigned short handle_to_chunks(unsigned long handle) | ||
243 | { | ||
244 | return (handle & ~PAGE_MASK) >> BUDDY_SHIFT; | ||
245 | } | ||
246 | |||
237 | /* | 247 | /* |
238 | * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle | 248 | * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle |
239 | * but that doesn't matter. because the masking will result in the | 249 | * but that doesn't matter. because the masking will result in the |
@@ -720,37 +730,39 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) | |||
720 | page = virt_to_page(zhdr); | 730 | page = virt_to_page(zhdr); |
721 | 731 | ||
722 | if (test_bit(PAGE_HEADLESS, &page->private)) { | 732 | if (test_bit(PAGE_HEADLESS, &page->private)) { |
723 | /* HEADLESS page stored */ | 733 | /* if a headless page is under reclaim, just leave. |
724 | bud = HEADLESS; | 734 | * NB: we use test_and_set_bit for a reason: if the bit |
725 | } else { | 735 | * has not been set before, we release this page |
726 | z3fold_page_lock(zhdr); | 736 | * immediately so we don't care about its value any more. |
727 | bud = handle_to_buddy(handle); | 737 | */ |
728 | 738 | if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) { | |
729 | switch (bud) { | 739 | spin_lock(&pool->lock); |
730 | case FIRST: | 740 | list_del(&page->lru); |
731 | zhdr->first_chunks = 0; | 741 | spin_unlock(&pool->lock); |
732 | break; | 742 | free_z3fold_page(page); |
733 | case MIDDLE: | 743 | atomic64_dec(&pool->pages_nr); |
734 | zhdr->middle_chunks = 0; | ||
735 | zhdr->start_middle = 0; | ||
736 | break; | ||
737 | case LAST: | ||
738 | zhdr->last_chunks = 0; | ||
739 | break; | ||
740 | default: | ||
741 | pr_err("%s: unknown bud %d\n", __func__, bud); | ||
742 | WARN_ON(1); | ||
743 | z3fold_page_unlock(zhdr); | ||
744 | return; | ||
745 | } | 744 | } |
745 | return; | ||
746 | } | 746 | } |
747 | 747 | ||
748 | if (bud == HEADLESS) { | 748 | /* Non-headless case */ |
749 | spin_lock(&pool->lock); | 749 | z3fold_page_lock(zhdr); |
750 | list_del(&page->lru); | 750 | bud = handle_to_buddy(handle); |
751 | spin_unlock(&pool->lock); | 751 | |
752 | free_z3fold_page(page); | 752 | switch (bud) { |
753 | atomic64_dec(&pool->pages_nr); | 753 | case FIRST: |
754 | zhdr->first_chunks = 0; | ||
755 | break; | ||
756 | case MIDDLE: | ||
757 | zhdr->middle_chunks = 0; | ||
758 | break; | ||
759 | case LAST: | ||
760 | zhdr->last_chunks = 0; | ||
761 | break; | ||
762 | default: | ||
763 | pr_err("%s: unknown bud %d\n", __func__, bud); | ||
764 | WARN_ON(1); | ||
765 | z3fold_page_unlock(zhdr); | ||
754 | return; | 766 | return; |
755 | } | 767 | } |
756 | 768 | ||
@@ -758,7 +770,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) | |||
758 | atomic64_dec(&pool->pages_nr); | 770 | atomic64_dec(&pool->pages_nr); |
759 | return; | 771 | return; |
760 | } | 772 | } |
761 | if (test_bit(UNDER_RECLAIM, &page->private)) { | 773 | if (test_bit(PAGE_CLAIMED, &page->private)) { |
762 | z3fold_page_unlock(zhdr); | 774 | z3fold_page_unlock(zhdr); |
763 | return; | 775 | return; |
764 | } | 776 | } |
@@ -836,20 +848,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) | |||
836 | } | 848 | } |
837 | list_for_each_prev(pos, &pool->lru) { | 849 | list_for_each_prev(pos, &pool->lru) { |
838 | page = list_entry(pos, struct page, lru); | 850 | page = list_entry(pos, struct page, lru); |
851 | |||
852 | /* this bit could have been set by free, in which case | ||
853 | * we pass over to the next page in the pool. | ||
854 | */ | ||
855 | if (test_and_set_bit(PAGE_CLAIMED, &page->private)) | ||
856 | continue; | ||
857 | |||
858 | zhdr = page_address(page); | ||
839 | if (test_bit(PAGE_HEADLESS, &page->private)) | 859 | if (test_bit(PAGE_HEADLESS, &page->private)) |
840 | /* candidate found */ | ||
841 | break; | 860 | break; |
842 | 861 | ||
843 | zhdr = page_address(page); | 862 | if (!z3fold_page_trylock(zhdr)) { |
844 | if (!z3fold_page_trylock(zhdr)) | 863 | zhdr = NULL; |
845 | continue; /* can't evict at this point */ | 864 | continue; /* can't evict at this point */ |
865 | } | ||
846 | kref_get(&zhdr->refcount); | 866 | kref_get(&zhdr->refcount); |
847 | list_del_init(&zhdr->buddy); | 867 | list_del_init(&zhdr->buddy); |
848 | zhdr->cpu = -1; | 868 | zhdr->cpu = -1; |
849 | set_bit(UNDER_RECLAIM, &page->private); | ||
850 | break; | 869 | break; |
851 | } | 870 | } |
852 | 871 | ||
872 | if (!zhdr) | ||
873 | break; | ||
874 | |||
853 | list_del_init(&page->lru); | 875 | list_del_init(&page->lru); |
854 | spin_unlock(&pool->lock); | 876 | spin_unlock(&pool->lock); |
855 | 877 | ||
@@ -898,6 +920,7 @@ next: | |||
898 | if (test_bit(PAGE_HEADLESS, &page->private)) { | 920 | if (test_bit(PAGE_HEADLESS, &page->private)) { |
899 | if (ret == 0) { | 921 | if (ret == 0) { |
900 | free_z3fold_page(page); | 922 | free_z3fold_page(page); |
923 | atomic64_dec(&pool->pages_nr); | ||
901 | return 0; | 924 | return 0; |
902 | } | 925 | } |
903 | spin_lock(&pool->lock); | 926 | spin_lock(&pool->lock); |
@@ -905,7 +928,7 @@ next: | |||
905 | spin_unlock(&pool->lock); | 928 | spin_unlock(&pool->lock); |
906 | } else { | 929 | } else { |
907 | z3fold_page_lock(zhdr); | 930 | z3fold_page_lock(zhdr); |
908 | clear_bit(UNDER_RECLAIM, &page->private); | 931 | clear_bit(PAGE_CLAIMED, &page->private); |
909 | if (kref_put(&zhdr->refcount, | 932 | if (kref_put(&zhdr->refcount, |
910 | release_z3fold_page_locked)) { | 933 | release_z3fold_page_locked)) { |
911 | atomic64_dec(&pool->pages_nr); | 934 | atomic64_dec(&pool->pages_nr); |
@@ -964,7 +987,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) | |||
964 | set_bit(MIDDLE_CHUNK_MAPPED, &page->private); | 987 | set_bit(MIDDLE_CHUNK_MAPPED, &page->private); |
965 | break; | 988 | break; |
966 | case LAST: | 989 | case LAST: |
967 | addr += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); | 990 | addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); |
968 | break; | 991 | break; |
969 | default: | 992 | default: |
970 | pr_err("unknown buddy id %d\n", buddy); | 993 | pr_err("unknown buddy id %d\n", buddy); |