summaryrefslogtreecommitdiffstats
path: root/mm/swap_state.c
diff options
context:
space:
mode:
authorMinchan Kim <minchan@kernel.org>2018-04-05 19:23:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-06 00:36:25 -0400
commiteaf649ebc3acfbb235ce31cebd06e4876d05758e (patch)
tree68225a29f76c5f4d78c628c405af7ba37ad75f31 /mm/swap_state.c
parente830c63a621e20894a663351b968706bd0efbbd0 (diff)
mm: swap: clean up swap readahead
When I see recent change of swap readahead, I am very unhappy about current code structure which diverges two swap readahead algorithm in do_swap_page. This patch is to clean it up. Main motivation is that fault handler doesn't need to be aware of readahead algorithms but just should call swapin_readahead. As first step, this patch cleans up a little bit but not perfect (I just separate for review easier) so next patch will make the goal complete. [minchan@kernel.org: do not check readahead flag with THP anon] Link: http://lkml.kernel.org/r/874lm83zho.fsf@yhuang-dev.intel.com Link: http://lkml.kernel.org/r/20180227232611.169883-1-minchan@kernel.org Link: http://lkml.kernel.org/r/1509520520-32367-2-git-send-email-minchan@kernel.org Link: http://lkml.kernel.org/r/20180220085249.151400-2-minchan@kernel.org Signed-off-by: Minchan Kim <minchan@kernel.org> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Hugh Dickins <hughd@google.com> Cc: Huang Ying <ying.huang@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swap_state.c')
-rw-r--r--mm/swap_state.c96
1 files changed, 53 insertions, 43 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 39ae7cfad90f..db5da2baafb1 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -332,32 +332,43 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
332 unsigned long addr) 332 unsigned long addr)
333{ 333{
334 struct page *page; 334 struct page *page;
335 unsigned long ra_info;
336 int win, hits, readahead;
337 335
338 page = find_get_page(swap_address_space(entry), swp_offset(entry)); 336 page = find_get_page(swap_address_space(entry), swp_offset(entry));
339 337
340 INC_CACHE_INFO(find_total); 338 INC_CACHE_INFO(find_total);
341 if (page) { 339 if (page) {
340 bool vma_ra = swap_use_vma_readahead();
341 bool readahead;
342
342 INC_CACHE_INFO(find_success); 343 INC_CACHE_INFO(find_success);
344 /*
345 * At the moment, we don't support PG_readahead for anon THP
346 * so let's bail out rather than confusing the readahead stat.
347 */
343 if (unlikely(PageTransCompound(page))) 348 if (unlikely(PageTransCompound(page)))
344 return page; 349 return page;
350
345 readahead = TestClearPageReadahead(page); 351 readahead = TestClearPageReadahead(page);
346 if (vma) { 352 if (vma && vma_ra) {
347 ra_info = GET_SWAP_RA_VAL(vma); 353 unsigned long ra_val;
348 win = SWAP_RA_WIN(ra_info); 354 int win, hits;
349 hits = SWAP_RA_HITS(ra_info); 355
356 ra_val = GET_SWAP_RA_VAL(vma);
357 win = SWAP_RA_WIN(ra_val);
358 hits = SWAP_RA_HITS(ra_val);
350 if (readahead) 359 if (readahead)
351 hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); 360 hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
352 atomic_long_set(&vma->swap_readahead_info, 361 atomic_long_set(&vma->swap_readahead_info,
353 SWAP_RA_VAL(addr, win, hits)); 362 SWAP_RA_VAL(addr, win, hits));
354 } 363 }
364
355 if (readahead) { 365 if (readahead) {
356 count_vm_event(SWAP_RA_HIT); 366 count_vm_event(SWAP_RA_HIT);
357 if (!vma) 367 if (!vma || !vma_ra)
358 atomic_inc(&swapin_readahead_hits); 368 atomic_inc(&swapin_readahead_hits);
359 } 369 }
360 } 370 }
371
361 return page; 372 return page;
362} 373}
363 374
@@ -586,8 +597,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
586 continue; 597 continue;
587 if (page_allocated) { 598 if (page_allocated) {
588 swap_readpage(page, false); 599 swap_readpage(page, false);
589 if (offset != entry_offset && 600 if (offset != entry_offset) {
590 likely(!PageTransCompound(page))) {
591 SetPageReadahead(page); 601 SetPageReadahead(page);
592 count_vm_event(SWAP_RA); 602 count_vm_event(SWAP_RA);
593 } 603 }
@@ -649,16 +659,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
649 PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); 659 PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
650} 660}
651 661
652struct page *swap_readahead_detect(struct vm_fault *vmf, 662static void swap_ra_info(struct vm_fault *vmf,
653 struct vma_swap_readahead *swap_ra) 663 struct vma_swap_readahead *ra_info)
654{ 664{
655 struct vm_area_struct *vma = vmf->vma; 665 struct vm_area_struct *vma = vmf->vma;
656 unsigned long swap_ra_info; 666 unsigned long ra_val;
657 struct page *page;
658 swp_entry_t entry; 667 swp_entry_t entry;
659 unsigned long faddr, pfn, fpfn; 668 unsigned long faddr, pfn, fpfn;
660 unsigned long start, end; 669 unsigned long start, end;
661 pte_t *pte; 670 pte_t *pte, *orig_pte;
662 unsigned int max_win, hits, prev_win, win, left; 671 unsigned int max_win, hits, prev_win, win, left;
663#ifndef CONFIG_64BIT 672#ifndef CONFIG_64BIT
664 pte_t *tpte; 673 pte_t *tpte;
@@ -667,30 +676,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
667 max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), 676 max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
668 SWAP_RA_ORDER_CEILING); 677 SWAP_RA_ORDER_CEILING);
669 if (max_win == 1) { 678 if (max_win == 1) {
670 swap_ra->win = 1; 679 ra_info->win = 1;
671 return NULL; 680 return;
672 } 681 }
673 682
674 faddr = vmf->address; 683 faddr = vmf->address;
675 entry = pte_to_swp_entry(vmf->orig_pte); 684 orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
676 if ((unlikely(non_swap_entry(entry)))) 685 entry = pte_to_swp_entry(*pte);
677 return NULL; 686 if ((unlikely(non_swap_entry(entry)))) {
678 page = lookup_swap_cache(entry, vma, faddr); 687 pte_unmap(orig_pte);
679 if (page) 688 return;
680 return page; 689 }
681 690
682 fpfn = PFN_DOWN(faddr); 691 fpfn = PFN_DOWN(faddr);
683 swap_ra_info = GET_SWAP_RA_VAL(vma); 692 ra_val = GET_SWAP_RA_VAL(vma);
684 pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); 693 pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
685 prev_win = SWAP_RA_WIN(swap_ra_info); 694 prev_win = SWAP_RA_WIN(ra_val);
686 hits = SWAP_RA_HITS(swap_ra_info); 695 hits = SWAP_RA_HITS(ra_val);
687 swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits, 696 ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
688 max_win, prev_win); 697 max_win, prev_win);
689 atomic_long_set(&vma->swap_readahead_info, 698 atomic_long_set(&vma->swap_readahead_info,
690 SWAP_RA_VAL(faddr, win, 0)); 699 SWAP_RA_VAL(faddr, win, 0));
691 700
692 if (win == 1) 701 if (win == 1) {
693 return NULL; 702 pte_unmap(orig_pte);
703 return;
704 }
694 705
695 /* Copy the PTEs because the page table may be unmapped */ 706 /* Copy the PTEs because the page table may be unmapped */
696 if (fpfn == pfn + 1) 707 if (fpfn == pfn + 1)
@@ -703,23 +714,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
703 swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, 714 swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
704 &start, &end); 715 &start, &end);
705 } 716 }
706 swap_ra->nr_pte = end - start; 717 ra_info->nr_pte = end - start;
707 swap_ra->offset = fpfn - start; 718 ra_info->offset = fpfn - start;
708 pte = vmf->pte - swap_ra->offset; 719 pte -= ra_info->offset;
709#ifdef CONFIG_64BIT 720#ifdef CONFIG_64BIT
710 swap_ra->ptes = pte; 721 ra_info->ptes = pte;
711#else 722#else
712 tpte = swap_ra->ptes; 723 tpte = ra_info->ptes;
713 for (pfn = start; pfn != end; pfn++) 724 for (pfn = start; pfn != end; pfn++)
714 *tpte++ = *pte++; 725 *tpte++ = *pte++;
715#endif 726#endif
716 727 pte_unmap(orig_pte);
717 return NULL;
718} 728}
719 729
720struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, 730struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
721 struct vm_fault *vmf, 731 struct vm_fault *vmf)
722 struct vma_swap_readahead *swap_ra)
723{ 732{
724 struct blk_plug plug; 733 struct blk_plug plug;
725 struct vm_area_struct *vma = vmf->vma; 734 struct vm_area_struct *vma = vmf->vma;
@@ -728,12 +737,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
728 swp_entry_t entry; 737 swp_entry_t entry;
729 unsigned int i; 738 unsigned int i;
730 bool page_allocated; 739 bool page_allocated;
740 struct vma_swap_readahead ra_info = {0,};
731 741
732 if (swap_ra->win == 1) 742 swap_ra_info(vmf, &ra_info);
743 if (ra_info.win == 1)
733 goto skip; 744 goto skip;
734 745
735 blk_start_plug(&plug); 746 blk_start_plug(&plug);
736 for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte; 747 for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
737 i++, pte++) { 748 i++, pte++) {
738 pentry = *pte; 749 pentry = *pte;
739 if (pte_none(pentry)) 750 if (pte_none(pentry))
@@ -749,8 +760,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
749 continue; 760 continue;
750 if (page_allocated) { 761 if (page_allocated) {
751 swap_readpage(page, false); 762 swap_readpage(page, false);
752 if (i != swap_ra->offset && 763 if (i != ra_info.offset) {
753 likely(!PageTransCompound(page))) {
754 SetPageReadahead(page); 764 SetPageReadahead(page);
755 count_vm_event(SWAP_RA); 765 count_vm_event(SWAP_RA);
756 } 766 }
@@ -761,7 +771,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
761 lru_add_drain(); 771 lru_add_drain();
762skip: 772skip:
763 return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, 773 return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
764 swap_ra->win == 1); 774 ra_info.win == 1);
765} 775}
766 776
767#ifdef CONFIG_SYSFS 777#ifdef CONFIG_SYSFS