diff options
author | Minchan Kim <minchan@kernel.org> | 2018-04-05 19:23:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-06 00:36:25 -0400 |
commit | eaf649ebc3acfbb235ce31cebd06e4876d05758e (patch) | |
tree | 68225a29f76c5f4d78c628c405af7ba37ad75f31 /mm/swap_state.c | |
parent | e830c63a621e20894a663351b968706bd0efbbd0 (diff) |
mm: swap: clean up swap readahead
When I see recent change of swap readahead, I am very unhappy about
current code structure which diverges two swap readahead algorithm in
do_swap_page. This patch is to clean it up.
Main motivation is that fault handler doesn't need to be aware of
readahead algorithms but just should call swapin_readahead.
As first step, this patch cleans up a little bit but not perfect (I just
separate for review easier) so next patch will make the goal complete.
[minchan@kernel.org: do not check readahead flag with THP anon]
Link: http://lkml.kernel.org/r/874lm83zho.fsf@yhuang-dev.intel.com
Link: http://lkml.kernel.org/r/20180227232611.169883-1-minchan@kernel.org
Link: http://lkml.kernel.org/r/1509520520-32367-2-git-send-email-minchan@kernel.org
Link: http://lkml.kernel.org/r/20180220085249.151400-2-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swap_state.c')
-rw-r--r-- | mm/swap_state.c | 96 |
1 files changed, 53 insertions, 43 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c index 39ae7cfad90f..db5da2baafb1 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -332,32 +332,43 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, | |||
332 | unsigned long addr) | 332 | unsigned long addr) |
333 | { | 333 | { |
334 | struct page *page; | 334 | struct page *page; |
335 | unsigned long ra_info; | ||
336 | int win, hits, readahead; | ||
337 | 335 | ||
338 | page = find_get_page(swap_address_space(entry), swp_offset(entry)); | 336 | page = find_get_page(swap_address_space(entry), swp_offset(entry)); |
339 | 337 | ||
340 | INC_CACHE_INFO(find_total); | 338 | INC_CACHE_INFO(find_total); |
341 | if (page) { | 339 | if (page) { |
340 | bool vma_ra = swap_use_vma_readahead(); | ||
341 | bool readahead; | ||
342 | |||
342 | INC_CACHE_INFO(find_success); | 343 | INC_CACHE_INFO(find_success); |
344 | /* | ||
345 | * At the moment, we don't support PG_readahead for anon THP | ||
346 | * so let's bail out rather than confusing the readahead stat. | ||
347 | */ | ||
343 | if (unlikely(PageTransCompound(page))) | 348 | if (unlikely(PageTransCompound(page))) |
344 | return page; | 349 | return page; |
350 | |||
345 | readahead = TestClearPageReadahead(page); | 351 | readahead = TestClearPageReadahead(page); |
346 | if (vma) { | 352 | if (vma && vma_ra) { |
347 | ra_info = GET_SWAP_RA_VAL(vma); | 353 | unsigned long ra_val; |
348 | win = SWAP_RA_WIN(ra_info); | 354 | int win, hits; |
349 | hits = SWAP_RA_HITS(ra_info); | 355 | |
356 | ra_val = GET_SWAP_RA_VAL(vma); | ||
357 | win = SWAP_RA_WIN(ra_val); | ||
358 | hits = SWAP_RA_HITS(ra_val); | ||
350 | if (readahead) | 359 | if (readahead) |
351 | hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); | 360 | hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); |
352 | atomic_long_set(&vma->swap_readahead_info, | 361 | atomic_long_set(&vma->swap_readahead_info, |
353 | SWAP_RA_VAL(addr, win, hits)); | 362 | SWAP_RA_VAL(addr, win, hits)); |
354 | } | 363 | } |
364 | |||
355 | if (readahead) { | 365 | if (readahead) { |
356 | count_vm_event(SWAP_RA_HIT); | 366 | count_vm_event(SWAP_RA_HIT); |
357 | if (!vma) | 367 | if (!vma || !vma_ra) |
358 | atomic_inc(&swapin_readahead_hits); | 368 | atomic_inc(&swapin_readahead_hits); |
359 | } | 369 | } |
360 | } | 370 | } |
371 | |||
361 | return page; | 372 | return page; |
362 | } | 373 | } |
363 | 374 | ||
@@ -586,8 +597,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | |||
586 | continue; | 597 | continue; |
587 | if (page_allocated) { | 598 | if (page_allocated) { |
588 | swap_readpage(page, false); | 599 | swap_readpage(page, false); |
589 | if (offset != entry_offset && | 600 | if (offset != entry_offset) { |
590 | likely(!PageTransCompound(page))) { | ||
591 | SetPageReadahead(page); | 601 | SetPageReadahead(page); |
592 | count_vm_event(SWAP_RA); | 602 | count_vm_event(SWAP_RA); |
593 | } | 603 | } |
@@ -649,16 +659,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma, | |||
649 | PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); | 659 | PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); |
650 | } | 660 | } |
651 | 661 | ||
652 | struct page *swap_readahead_detect(struct vm_fault *vmf, | 662 | static void swap_ra_info(struct vm_fault *vmf, |
653 | struct vma_swap_readahead *swap_ra) | 663 | struct vma_swap_readahead *ra_info) |
654 | { | 664 | { |
655 | struct vm_area_struct *vma = vmf->vma; | 665 | struct vm_area_struct *vma = vmf->vma; |
656 | unsigned long swap_ra_info; | 666 | unsigned long ra_val; |
657 | struct page *page; | ||
658 | swp_entry_t entry; | 667 | swp_entry_t entry; |
659 | unsigned long faddr, pfn, fpfn; | 668 | unsigned long faddr, pfn, fpfn; |
660 | unsigned long start, end; | 669 | unsigned long start, end; |
661 | pte_t *pte; | 670 | pte_t *pte, *orig_pte; |
662 | unsigned int max_win, hits, prev_win, win, left; | 671 | unsigned int max_win, hits, prev_win, win, left; |
663 | #ifndef CONFIG_64BIT | 672 | #ifndef CONFIG_64BIT |
664 | pte_t *tpte; | 673 | pte_t *tpte; |
@@ -667,30 +676,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, | |||
667 | max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), | 676 | max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), |
668 | SWAP_RA_ORDER_CEILING); | 677 | SWAP_RA_ORDER_CEILING); |
669 | if (max_win == 1) { | 678 | if (max_win == 1) { |
670 | swap_ra->win = 1; | 679 | ra_info->win = 1; |
671 | return NULL; | 680 | return; |
672 | } | 681 | } |
673 | 682 | ||
674 | faddr = vmf->address; | 683 | faddr = vmf->address; |
675 | entry = pte_to_swp_entry(vmf->orig_pte); | 684 | orig_pte = pte = pte_offset_map(vmf->pmd, faddr); |
676 | if ((unlikely(non_swap_entry(entry)))) | 685 | entry = pte_to_swp_entry(*pte); |
677 | return NULL; | 686 | if ((unlikely(non_swap_entry(entry)))) { |
678 | page = lookup_swap_cache(entry, vma, faddr); | 687 | pte_unmap(orig_pte); |
679 | if (page) | 688 | return; |
680 | return page; | 689 | } |
681 | 690 | ||
682 | fpfn = PFN_DOWN(faddr); | 691 | fpfn = PFN_DOWN(faddr); |
683 | swap_ra_info = GET_SWAP_RA_VAL(vma); | 692 | ra_val = GET_SWAP_RA_VAL(vma); |
684 | pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); | 693 | pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val)); |
685 | prev_win = SWAP_RA_WIN(swap_ra_info); | 694 | prev_win = SWAP_RA_WIN(ra_val); |
686 | hits = SWAP_RA_HITS(swap_ra_info); | 695 | hits = SWAP_RA_HITS(ra_val); |
687 | swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits, | 696 | ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits, |
688 | max_win, prev_win); | 697 | max_win, prev_win); |
689 | atomic_long_set(&vma->swap_readahead_info, | 698 | atomic_long_set(&vma->swap_readahead_info, |
690 | SWAP_RA_VAL(faddr, win, 0)); | 699 | SWAP_RA_VAL(faddr, win, 0)); |
691 | 700 | ||
692 | if (win == 1) | 701 | if (win == 1) { |
693 | return NULL; | 702 | pte_unmap(orig_pte); |
703 | return; | ||
704 | } | ||
694 | 705 | ||
695 | /* Copy the PTEs because the page table may be unmapped */ | 706 | /* Copy the PTEs because the page table may be unmapped */ |
696 | if (fpfn == pfn + 1) | 707 | if (fpfn == pfn + 1) |
@@ -703,23 +714,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, | |||
703 | swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, | 714 | swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, |
704 | &start, &end); | 715 | &start, &end); |
705 | } | 716 | } |
706 | swap_ra->nr_pte = end - start; | 717 | ra_info->nr_pte = end - start; |
707 | swap_ra->offset = fpfn - start; | 718 | ra_info->offset = fpfn - start; |
708 | pte = vmf->pte - swap_ra->offset; | 719 | pte -= ra_info->offset; |
709 | #ifdef CONFIG_64BIT | 720 | #ifdef CONFIG_64BIT |
710 | swap_ra->ptes = pte; | 721 | ra_info->ptes = pte; |
711 | #else | 722 | #else |
712 | tpte = swap_ra->ptes; | 723 | tpte = ra_info->ptes; |
713 | for (pfn = start; pfn != end; pfn++) | 724 | for (pfn = start; pfn != end; pfn++) |
714 | *tpte++ = *pte++; | 725 | *tpte++ = *pte++; |
715 | #endif | 726 | #endif |
716 | 727 | pte_unmap(orig_pte); | |
717 | return NULL; | ||
718 | } | 728 | } |
719 | 729 | ||
720 | struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | 730 | struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, |
721 | struct vm_fault *vmf, | 731 | struct vm_fault *vmf) |
722 | struct vma_swap_readahead *swap_ra) | ||
723 | { | 732 | { |
724 | struct blk_plug plug; | 733 | struct blk_plug plug; |
725 | struct vm_area_struct *vma = vmf->vma; | 734 | struct vm_area_struct *vma = vmf->vma; |
@@ -728,12 +737,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
728 | swp_entry_t entry; | 737 | swp_entry_t entry; |
729 | unsigned int i; | 738 | unsigned int i; |
730 | bool page_allocated; | 739 | bool page_allocated; |
740 | struct vma_swap_readahead ra_info = {0,}; | ||
731 | 741 | ||
732 | if (swap_ra->win == 1) | 742 | swap_ra_info(vmf, &ra_info); |
743 | if (ra_info.win == 1) | ||
733 | goto skip; | 744 | goto skip; |
734 | 745 | ||
735 | blk_start_plug(&plug); | 746 | blk_start_plug(&plug); |
736 | for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte; | 747 | for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte; |
737 | i++, pte++) { | 748 | i++, pte++) { |
738 | pentry = *pte; | 749 | pentry = *pte; |
739 | if (pte_none(pentry)) | 750 | if (pte_none(pentry)) |
@@ -749,8 +760,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
749 | continue; | 760 | continue; |
750 | if (page_allocated) { | 761 | if (page_allocated) { |
751 | swap_readpage(page, false); | 762 | swap_readpage(page, false); |
752 | if (i != swap_ra->offset && | 763 | if (i != ra_info.offset) { |
753 | likely(!PageTransCompound(page))) { | ||
754 | SetPageReadahead(page); | 764 | SetPageReadahead(page); |
755 | count_vm_event(SWAP_RA); | 765 | count_vm_event(SWAP_RA); |
756 | } | 766 | } |
@@ -761,7 +771,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
761 | lru_add_drain(); | 771 | lru_add_drain(); |
762 | skip: | 772 | skip: |
763 | return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, | 773 | return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, |
764 | swap_ra->win == 1); | 774 | ra_info.win == 1); |
765 | } | 775 | } |
766 | 776 | ||
767 | #ifdef CONFIG_SYSFS | 777 | #ifdef CONFIG_SYSFS |