diff options
| author | Minchan Kim <minchan@kernel.org> | 2018-04-05 19:23:39 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-06 00:36:25 -0400 |
| commit | eaf649ebc3acfbb235ce31cebd06e4876d05758e (patch) | |
| tree | 68225a29f76c5f4d78c628c405af7ba37ad75f31 /mm/swap_state.c | |
| parent | e830c63a621e20894a663351b968706bd0efbbd0 (diff) | |
mm: swap: clean up swap readahead
When I see recent change of swap readahead, I am very unhappy about
current code structure which diverges two swap readahead algorithm in
do_swap_page. This patch is to clean it up.
Main motivation is that fault handler doesn't need to be aware of
readahead algorithms but just should call swapin_readahead.
As first step, this patch cleans up a little bit but not perfect (I just
separate for review easier) so next patch will make the goal complete.
[minchan@kernel.org: do not check readahead flag with THP anon]
Link: http://lkml.kernel.org/r/874lm83zho.fsf@yhuang-dev.intel.com
Link: http://lkml.kernel.org/r/20180227232611.169883-1-minchan@kernel.org
Link: http://lkml.kernel.org/r/1509520520-32367-2-git-send-email-minchan@kernel.org
Link: http://lkml.kernel.org/r/20180220085249.151400-2-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swap_state.c')
| -rw-r--r-- | mm/swap_state.c | 96 |
1 files changed, 53 insertions, 43 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c index 39ae7cfad90f..db5da2baafb1 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
| @@ -332,32 +332,43 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, | |||
| 332 | unsigned long addr) | 332 | unsigned long addr) |
| 333 | { | 333 | { |
| 334 | struct page *page; | 334 | struct page *page; |
| 335 | unsigned long ra_info; | ||
| 336 | int win, hits, readahead; | ||
| 337 | 335 | ||
| 338 | page = find_get_page(swap_address_space(entry), swp_offset(entry)); | 336 | page = find_get_page(swap_address_space(entry), swp_offset(entry)); |
| 339 | 337 | ||
| 340 | INC_CACHE_INFO(find_total); | 338 | INC_CACHE_INFO(find_total); |
| 341 | if (page) { | 339 | if (page) { |
| 340 | bool vma_ra = swap_use_vma_readahead(); | ||
| 341 | bool readahead; | ||
| 342 | |||
| 342 | INC_CACHE_INFO(find_success); | 343 | INC_CACHE_INFO(find_success); |
| 344 | /* | ||
| 345 | * At the moment, we don't support PG_readahead for anon THP | ||
| 346 | * so let's bail out rather than confusing the readahead stat. | ||
| 347 | */ | ||
| 343 | if (unlikely(PageTransCompound(page))) | 348 | if (unlikely(PageTransCompound(page))) |
| 344 | return page; | 349 | return page; |
| 350 | |||
| 345 | readahead = TestClearPageReadahead(page); | 351 | readahead = TestClearPageReadahead(page); |
| 346 | if (vma) { | 352 | if (vma && vma_ra) { |
| 347 | ra_info = GET_SWAP_RA_VAL(vma); | 353 | unsigned long ra_val; |
| 348 | win = SWAP_RA_WIN(ra_info); | 354 | int win, hits; |
| 349 | hits = SWAP_RA_HITS(ra_info); | 355 | |
| 356 | ra_val = GET_SWAP_RA_VAL(vma); | ||
| 357 | win = SWAP_RA_WIN(ra_val); | ||
| 358 | hits = SWAP_RA_HITS(ra_val); | ||
| 350 | if (readahead) | 359 | if (readahead) |
| 351 | hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); | 360 | hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); |
| 352 | atomic_long_set(&vma->swap_readahead_info, | 361 | atomic_long_set(&vma->swap_readahead_info, |
| 353 | SWAP_RA_VAL(addr, win, hits)); | 362 | SWAP_RA_VAL(addr, win, hits)); |
| 354 | } | 363 | } |
| 364 | |||
| 355 | if (readahead) { | 365 | if (readahead) { |
| 356 | count_vm_event(SWAP_RA_HIT); | 366 | count_vm_event(SWAP_RA_HIT); |
| 357 | if (!vma) | 367 | if (!vma || !vma_ra) |
| 358 | atomic_inc(&swapin_readahead_hits); | 368 | atomic_inc(&swapin_readahead_hits); |
| 359 | } | 369 | } |
| 360 | } | 370 | } |
| 371 | |||
| 361 | return page; | 372 | return page; |
| 362 | } | 373 | } |
| 363 | 374 | ||
| @@ -586,8 +597,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | |||
| 586 | continue; | 597 | continue; |
| 587 | if (page_allocated) { | 598 | if (page_allocated) { |
| 588 | swap_readpage(page, false); | 599 | swap_readpage(page, false); |
| 589 | if (offset != entry_offset && | 600 | if (offset != entry_offset) { |
| 590 | likely(!PageTransCompound(page))) { | ||
| 591 | SetPageReadahead(page); | 601 | SetPageReadahead(page); |
| 592 | count_vm_event(SWAP_RA); | 602 | count_vm_event(SWAP_RA); |
| 593 | } | 603 | } |
| @@ -649,16 +659,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma, | |||
| 649 | PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); | 659 | PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); |
| 650 | } | 660 | } |
| 651 | 661 | ||
| 652 | struct page *swap_readahead_detect(struct vm_fault *vmf, | 662 | static void swap_ra_info(struct vm_fault *vmf, |
| 653 | struct vma_swap_readahead *swap_ra) | 663 | struct vma_swap_readahead *ra_info) |
| 654 | { | 664 | { |
| 655 | struct vm_area_struct *vma = vmf->vma; | 665 | struct vm_area_struct *vma = vmf->vma; |
| 656 | unsigned long swap_ra_info; | 666 | unsigned long ra_val; |
| 657 | struct page *page; | ||
| 658 | swp_entry_t entry; | 667 | swp_entry_t entry; |
| 659 | unsigned long faddr, pfn, fpfn; | 668 | unsigned long faddr, pfn, fpfn; |
| 660 | unsigned long start, end; | 669 | unsigned long start, end; |
| 661 | pte_t *pte; | 670 | pte_t *pte, *orig_pte; |
| 662 | unsigned int max_win, hits, prev_win, win, left; | 671 | unsigned int max_win, hits, prev_win, win, left; |
| 663 | #ifndef CONFIG_64BIT | 672 | #ifndef CONFIG_64BIT |
| 664 | pte_t *tpte; | 673 | pte_t *tpte; |
| @@ -667,30 +676,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, | |||
| 667 | max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), | 676 | max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), |
| 668 | SWAP_RA_ORDER_CEILING); | 677 | SWAP_RA_ORDER_CEILING); |
| 669 | if (max_win == 1) { | 678 | if (max_win == 1) { |
| 670 | swap_ra->win = 1; | 679 | ra_info->win = 1; |
| 671 | return NULL; | 680 | return; |
| 672 | } | 681 | } |
| 673 | 682 | ||
| 674 | faddr = vmf->address; | 683 | faddr = vmf->address; |
| 675 | entry = pte_to_swp_entry(vmf->orig_pte); | 684 | orig_pte = pte = pte_offset_map(vmf->pmd, faddr); |
| 676 | if ((unlikely(non_swap_entry(entry)))) | 685 | entry = pte_to_swp_entry(*pte); |
| 677 | return NULL; | 686 | if ((unlikely(non_swap_entry(entry)))) { |
| 678 | page = lookup_swap_cache(entry, vma, faddr); | 687 | pte_unmap(orig_pte); |
| 679 | if (page) | 688 | return; |
| 680 | return page; | 689 | } |
| 681 | 690 | ||
| 682 | fpfn = PFN_DOWN(faddr); | 691 | fpfn = PFN_DOWN(faddr); |
| 683 | swap_ra_info = GET_SWAP_RA_VAL(vma); | 692 | ra_val = GET_SWAP_RA_VAL(vma); |
| 684 | pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); | 693 | pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val)); |
| 685 | prev_win = SWAP_RA_WIN(swap_ra_info); | 694 | prev_win = SWAP_RA_WIN(ra_val); |
| 686 | hits = SWAP_RA_HITS(swap_ra_info); | 695 | hits = SWAP_RA_HITS(ra_val); |
| 687 | swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits, | 696 | ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits, |
| 688 | max_win, prev_win); | 697 | max_win, prev_win); |
| 689 | atomic_long_set(&vma->swap_readahead_info, | 698 | atomic_long_set(&vma->swap_readahead_info, |
| 690 | SWAP_RA_VAL(faddr, win, 0)); | 699 | SWAP_RA_VAL(faddr, win, 0)); |
| 691 | 700 | ||
| 692 | if (win == 1) | 701 | if (win == 1) { |
| 693 | return NULL; | 702 | pte_unmap(orig_pte); |
| 703 | return; | ||
| 704 | } | ||
| 694 | 705 | ||
| 695 | /* Copy the PTEs because the page table may be unmapped */ | 706 | /* Copy the PTEs because the page table may be unmapped */ |
| 696 | if (fpfn == pfn + 1) | 707 | if (fpfn == pfn + 1) |
| @@ -703,23 +714,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, | |||
| 703 | swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, | 714 | swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, |
| 704 | &start, &end); | 715 | &start, &end); |
| 705 | } | 716 | } |
| 706 | swap_ra->nr_pte = end - start; | 717 | ra_info->nr_pte = end - start; |
| 707 | swap_ra->offset = fpfn - start; | 718 | ra_info->offset = fpfn - start; |
| 708 | pte = vmf->pte - swap_ra->offset; | 719 | pte -= ra_info->offset; |
| 709 | #ifdef CONFIG_64BIT | 720 | #ifdef CONFIG_64BIT |
| 710 | swap_ra->ptes = pte; | 721 | ra_info->ptes = pte; |
| 711 | #else | 722 | #else |
| 712 | tpte = swap_ra->ptes; | 723 | tpte = ra_info->ptes; |
| 713 | for (pfn = start; pfn != end; pfn++) | 724 | for (pfn = start; pfn != end; pfn++) |
| 714 | *tpte++ = *pte++; | 725 | *tpte++ = *pte++; |
| 715 | #endif | 726 | #endif |
| 716 | 727 | pte_unmap(orig_pte); | |
| 717 | return NULL; | ||
| 718 | } | 728 | } |
| 719 | 729 | ||
| 720 | struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | 730 | struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, |
| 721 | struct vm_fault *vmf, | 731 | struct vm_fault *vmf) |
| 722 | struct vma_swap_readahead *swap_ra) | ||
| 723 | { | 732 | { |
| 724 | struct blk_plug plug; | 733 | struct blk_plug plug; |
| 725 | struct vm_area_struct *vma = vmf->vma; | 734 | struct vm_area_struct *vma = vmf->vma; |
| @@ -728,12 +737,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
| 728 | swp_entry_t entry; | 737 | swp_entry_t entry; |
| 729 | unsigned int i; | 738 | unsigned int i; |
| 730 | bool page_allocated; | 739 | bool page_allocated; |
| 740 | struct vma_swap_readahead ra_info = {0,}; | ||
| 731 | 741 | ||
| 732 | if (swap_ra->win == 1) | 742 | swap_ra_info(vmf, &ra_info); |
| 743 | if (ra_info.win == 1) | ||
| 733 | goto skip; | 744 | goto skip; |
| 734 | 745 | ||
| 735 | blk_start_plug(&plug); | 746 | blk_start_plug(&plug); |
| 736 | for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte; | 747 | for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte; |
| 737 | i++, pte++) { | 748 | i++, pte++) { |
| 738 | pentry = *pte; | 749 | pentry = *pte; |
| 739 | if (pte_none(pentry)) | 750 | if (pte_none(pentry)) |
| @@ -749,8 +760,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
| 749 | continue; | 760 | continue; |
| 750 | if (page_allocated) { | 761 | if (page_allocated) { |
| 751 | swap_readpage(page, false); | 762 | swap_readpage(page, false); |
| 752 | if (i != swap_ra->offset && | 763 | if (i != ra_info.offset) { |
| 753 | likely(!PageTransCompound(page))) { | ||
| 754 | SetPageReadahead(page); | 764 | SetPageReadahead(page); |
| 755 | count_vm_event(SWAP_RA); | 765 | count_vm_event(SWAP_RA); |
| 756 | } | 766 | } |
| @@ -761,7 +771,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
| 761 | lru_add_drain(); | 771 | lru_add_drain(); |
| 762 | skip: | 772 | skip: |
| 763 | return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, | 773 | return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, |
| 764 | swap_ra->win == 1); | 774 | ra_info.win == 1); |
| 765 | } | 775 | } |
| 766 | 776 | ||
| 767 | #ifdef CONFIG_SYSFS | 777 | #ifdef CONFIG_SYSFS |
