diff options
-rw-r--r-- | include/linux/swap.h | 17 | ||||
-rw-r--r-- | mm/memory.c | 26 | ||||
-rw-r--r-- | mm/swap_state.c | 96 |
3 files changed, 62 insertions, 77 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h index a1a3f4ed94ce..fa92177d863e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -424,12 +424,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, | |||
424 | bool *new_page_allocated); | 424 | bool *new_page_allocated); |
425 | extern struct page *swapin_readahead(swp_entry_t, gfp_t, | 425 | extern struct page *swapin_readahead(swp_entry_t, gfp_t, |
426 | struct vm_area_struct *vma, unsigned long addr); | 426 | struct vm_area_struct *vma, unsigned long addr); |
427 | |||
428 | extern struct page *swap_readahead_detect(struct vm_fault *vmf, | ||
429 | struct vma_swap_readahead *swap_ra); | ||
430 | extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | 427 | extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, |
431 | struct vm_fault *vmf, | 428 | struct vm_fault *vmf); |
432 | struct vma_swap_readahead *swap_ra); | ||
433 | 429 | ||
434 | /* linux/mm/swapfile.c */ | 430 | /* linux/mm/swapfile.c */ |
435 | extern atomic_long_t nr_swap_pages; | 431 | extern atomic_long_t nr_swap_pages; |
@@ -548,15 +544,8 @@ static inline bool swap_use_vma_readahead(void) | |||
548 | return false; | 544 | return false; |
549 | } | 545 | } |
550 | 546 | ||
551 | static inline struct page *swap_readahead_detect( | 547 | static inline struct page *do_swap_page_readahead(swp_entry_t fentry, |
552 | struct vm_fault *vmf, struct vma_swap_readahead *swap_ra) | 548 | gfp_t gfp_mask, struct vm_fault *vmf) |
553 | { | ||
554 | return NULL; | ||
555 | } | ||
556 | |||
557 | static inline struct page *do_swap_page_readahead( | ||
558 | swp_entry_t fentry, gfp_t gfp_mask, | ||
559 | struct vm_fault *vmf, struct vma_swap_readahead *swap_ra) | ||
560 | { | 549 | { |
561 | return NULL; | 550 | return NULL; |
562 | } | 551 | } |
diff --git a/mm/memory.c b/mm/memory.c index aed37325d94e..bc1ccff79538 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2883,26 +2883,16 @@ EXPORT_SYMBOL(unmap_mapping_range); | |||
2883 | int do_swap_page(struct vm_fault *vmf) | 2883 | int do_swap_page(struct vm_fault *vmf) |
2884 | { | 2884 | { |
2885 | struct vm_area_struct *vma = vmf->vma; | 2885 | struct vm_area_struct *vma = vmf->vma; |
2886 | struct page *page = NULL, *swapcache = NULL; | 2886 | struct page *page = NULL, *swapcache; |
2887 | struct mem_cgroup *memcg; | 2887 | struct mem_cgroup *memcg; |
2888 | struct vma_swap_readahead swap_ra; | ||
2889 | swp_entry_t entry; | 2888 | swp_entry_t entry; |
2890 | pte_t pte; | 2889 | pte_t pte; |
2891 | int locked; | 2890 | int locked; |
2892 | int exclusive = 0; | 2891 | int exclusive = 0; |
2893 | int ret = 0; | 2892 | int ret = 0; |
2894 | bool vma_readahead = swap_use_vma_readahead(); | ||
2895 | 2893 | ||
2896 | if (vma_readahead) { | 2894 | if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) |
2897 | page = swap_readahead_detect(vmf, &swap_ra); | ||
2898 | swapcache = page; | ||
2899 | } | ||
2900 | |||
2901 | if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) { | ||
2902 | if (page) | ||
2903 | put_page(page); | ||
2904 | goto out; | 2895 | goto out; |
2905 | } | ||
2906 | 2896 | ||
2907 | entry = pte_to_swp_entry(vmf->orig_pte); | 2897 | entry = pte_to_swp_entry(vmf->orig_pte); |
2908 | if (unlikely(non_swap_entry(entry))) { | 2898 | if (unlikely(non_swap_entry(entry))) { |
@@ -2928,11 +2918,8 @@ int do_swap_page(struct vm_fault *vmf) | |||
2928 | 2918 | ||
2929 | 2919 | ||
2930 | delayacct_set_flag(DELAYACCT_PF_SWAPIN); | 2920 | delayacct_set_flag(DELAYACCT_PF_SWAPIN); |
2931 | if (!page) { | 2921 | page = lookup_swap_cache(entry, vma, vmf->address); |
2932 | page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, | 2922 | swapcache = page; |
2933 | vmf->address); | ||
2934 | swapcache = page; | ||
2935 | } | ||
2936 | 2923 | ||
2937 | if (!page) { | 2924 | if (!page) { |
2938 | struct swap_info_struct *si = swp_swap_info(entry); | 2925 | struct swap_info_struct *si = swp_swap_info(entry); |
@@ -2949,9 +2936,9 @@ int do_swap_page(struct vm_fault *vmf) | |||
2949 | swap_readpage(page, true); | 2936 | swap_readpage(page, true); |
2950 | } | 2937 | } |
2951 | } else { | 2938 | } else { |
2952 | if (vma_readahead) | 2939 | if (swap_use_vma_readahead()) |
2953 | page = do_swap_page_readahead(entry, | 2940 | page = do_swap_page_readahead(entry, |
2954 | GFP_HIGHUSER_MOVABLE, vmf, &swap_ra); | 2941 | GFP_HIGHUSER_MOVABLE, vmf); |
2955 | else | 2942 | else |
2956 | page = swapin_readahead(entry, | 2943 | page = swapin_readahead(entry, |
2957 | GFP_HIGHUSER_MOVABLE, vma, vmf->address); | 2944 | GFP_HIGHUSER_MOVABLE, vma, vmf->address); |
@@ -2982,7 +2969,6 @@ int do_swap_page(struct vm_fault *vmf) | |||
2982 | */ | 2969 | */ |
2983 | ret = VM_FAULT_HWPOISON; | 2970 | ret = VM_FAULT_HWPOISON; |
2984 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2971 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
2985 | swapcache = page; | ||
2986 | goto out_release; | 2972 | goto out_release; |
2987 | } | 2973 | } |
2988 | 2974 | ||
diff --git a/mm/swap_state.c b/mm/swap_state.c index 39ae7cfad90f..db5da2baafb1 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -332,32 +332,43 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, | |||
332 | unsigned long addr) | 332 | unsigned long addr) |
333 | { | 333 | { |
334 | struct page *page; | 334 | struct page *page; |
335 | unsigned long ra_info; | ||
336 | int win, hits, readahead; | ||
337 | 335 | ||
338 | page = find_get_page(swap_address_space(entry), swp_offset(entry)); | 336 | page = find_get_page(swap_address_space(entry), swp_offset(entry)); |
339 | 337 | ||
340 | INC_CACHE_INFO(find_total); | 338 | INC_CACHE_INFO(find_total); |
341 | if (page) { | 339 | if (page) { |
340 | bool vma_ra = swap_use_vma_readahead(); | ||
341 | bool readahead; | ||
342 | |||
342 | INC_CACHE_INFO(find_success); | 343 | INC_CACHE_INFO(find_success); |
344 | /* | ||
345 | * At the moment, we don't support PG_readahead for anon THP | ||
346 | * so let's bail out rather than confusing the readahead stat. | ||
347 | */ | ||
343 | if (unlikely(PageTransCompound(page))) | 348 | if (unlikely(PageTransCompound(page))) |
344 | return page; | 349 | return page; |
350 | |||
345 | readahead = TestClearPageReadahead(page); | 351 | readahead = TestClearPageReadahead(page); |
346 | if (vma) { | 352 | if (vma && vma_ra) { |
347 | ra_info = GET_SWAP_RA_VAL(vma); | 353 | unsigned long ra_val; |
348 | win = SWAP_RA_WIN(ra_info); | 354 | int win, hits; |
349 | hits = SWAP_RA_HITS(ra_info); | 355 | |
356 | ra_val = GET_SWAP_RA_VAL(vma); | ||
357 | win = SWAP_RA_WIN(ra_val); | ||
358 | hits = SWAP_RA_HITS(ra_val); | ||
350 | if (readahead) | 359 | if (readahead) |
351 | hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); | 360 | hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); |
352 | atomic_long_set(&vma->swap_readahead_info, | 361 | atomic_long_set(&vma->swap_readahead_info, |
353 | SWAP_RA_VAL(addr, win, hits)); | 362 | SWAP_RA_VAL(addr, win, hits)); |
354 | } | 363 | } |
364 | |||
355 | if (readahead) { | 365 | if (readahead) { |
356 | count_vm_event(SWAP_RA_HIT); | 366 | count_vm_event(SWAP_RA_HIT); |
357 | if (!vma) | 367 | if (!vma || !vma_ra) |
358 | atomic_inc(&swapin_readahead_hits); | 368 | atomic_inc(&swapin_readahead_hits); |
359 | } | 369 | } |
360 | } | 370 | } |
371 | |||
361 | return page; | 372 | return page; |
362 | } | 373 | } |
363 | 374 | ||
@@ -586,8 +597,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | |||
586 | continue; | 597 | continue; |
587 | if (page_allocated) { | 598 | if (page_allocated) { |
588 | swap_readpage(page, false); | 599 | swap_readpage(page, false); |
589 | if (offset != entry_offset && | 600 | if (offset != entry_offset) { |
590 | likely(!PageTransCompound(page))) { | ||
591 | SetPageReadahead(page); | 601 | SetPageReadahead(page); |
592 | count_vm_event(SWAP_RA); | 602 | count_vm_event(SWAP_RA); |
593 | } | 603 | } |
@@ -649,16 +659,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma, | |||
649 | PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); | 659 | PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); |
650 | } | 660 | } |
651 | 661 | ||
652 | struct page *swap_readahead_detect(struct vm_fault *vmf, | 662 | static void swap_ra_info(struct vm_fault *vmf, |
653 | struct vma_swap_readahead *swap_ra) | 663 | struct vma_swap_readahead *ra_info) |
654 | { | 664 | { |
655 | struct vm_area_struct *vma = vmf->vma; | 665 | struct vm_area_struct *vma = vmf->vma; |
656 | unsigned long swap_ra_info; | 666 | unsigned long ra_val; |
657 | struct page *page; | ||
658 | swp_entry_t entry; | 667 | swp_entry_t entry; |
659 | unsigned long faddr, pfn, fpfn; | 668 | unsigned long faddr, pfn, fpfn; |
660 | unsigned long start, end; | 669 | unsigned long start, end; |
661 | pte_t *pte; | 670 | pte_t *pte, *orig_pte; |
662 | unsigned int max_win, hits, prev_win, win, left; | 671 | unsigned int max_win, hits, prev_win, win, left; |
663 | #ifndef CONFIG_64BIT | 672 | #ifndef CONFIG_64BIT |
664 | pte_t *tpte; | 673 | pte_t *tpte; |
@@ -667,30 +676,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, | |||
667 | max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), | 676 | max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), |
668 | SWAP_RA_ORDER_CEILING); | 677 | SWAP_RA_ORDER_CEILING); |
669 | if (max_win == 1) { | 678 | if (max_win == 1) { |
670 | swap_ra->win = 1; | 679 | ra_info->win = 1; |
671 | return NULL; | 680 | return; |
672 | } | 681 | } |
673 | 682 | ||
674 | faddr = vmf->address; | 683 | faddr = vmf->address; |
675 | entry = pte_to_swp_entry(vmf->orig_pte); | 684 | orig_pte = pte = pte_offset_map(vmf->pmd, faddr); |
676 | if ((unlikely(non_swap_entry(entry)))) | 685 | entry = pte_to_swp_entry(*pte); |
677 | return NULL; | 686 | if ((unlikely(non_swap_entry(entry)))) { |
678 | page = lookup_swap_cache(entry, vma, faddr); | 687 | pte_unmap(orig_pte); |
679 | if (page) | 688 | return; |
680 | return page; | 689 | } |
681 | 690 | ||
682 | fpfn = PFN_DOWN(faddr); | 691 | fpfn = PFN_DOWN(faddr); |
683 | swap_ra_info = GET_SWAP_RA_VAL(vma); | 692 | ra_val = GET_SWAP_RA_VAL(vma); |
684 | pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); | 693 | pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val)); |
685 | prev_win = SWAP_RA_WIN(swap_ra_info); | 694 | prev_win = SWAP_RA_WIN(ra_val); |
686 | hits = SWAP_RA_HITS(swap_ra_info); | 695 | hits = SWAP_RA_HITS(ra_val); |
687 | swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits, | 696 | ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits, |
688 | max_win, prev_win); | 697 | max_win, prev_win); |
689 | atomic_long_set(&vma->swap_readahead_info, | 698 | atomic_long_set(&vma->swap_readahead_info, |
690 | SWAP_RA_VAL(faddr, win, 0)); | 699 | SWAP_RA_VAL(faddr, win, 0)); |
691 | 700 | ||
692 | if (win == 1) | 701 | if (win == 1) { |
693 | return NULL; | 702 | pte_unmap(orig_pte); |
703 | return; | ||
704 | } | ||
694 | 705 | ||
695 | /* Copy the PTEs because the page table may be unmapped */ | 706 | /* Copy the PTEs because the page table may be unmapped */ |
696 | if (fpfn == pfn + 1) | 707 | if (fpfn == pfn + 1) |
@@ -703,23 +714,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, | |||
703 | swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, | 714 | swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, |
704 | &start, &end); | 715 | &start, &end); |
705 | } | 716 | } |
706 | swap_ra->nr_pte = end - start; | 717 | ra_info->nr_pte = end - start; |
707 | swap_ra->offset = fpfn - start; | 718 | ra_info->offset = fpfn - start; |
708 | pte = vmf->pte - swap_ra->offset; | 719 | pte -= ra_info->offset; |
709 | #ifdef CONFIG_64BIT | 720 | #ifdef CONFIG_64BIT |
710 | swap_ra->ptes = pte; | 721 | ra_info->ptes = pte; |
711 | #else | 722 | #else |
712 | tpte = swap_ra->ptes; | 723 | tpte = ra_info->ptes; |
713 | for (pfn = start; pfn != end; pfn++) | 724 | for (pfn = start; pfn != end; pfn++) |
714 | *tpte++ = *pte++; | 725 | *tpte++ = *pte++; |
715 | #endif | 726 | #endif |
716 | 727 | pte_unmap(orig_pte); | |
717 | return NULL; | ||
718 | } | 728 | } |
719 | 729 | ||
720 | struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | 730 | struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, |
721 | struct vm_fault *vmf, | 731 | struct vm_fault *vmf) |
722 | struct vma_swap_readahead *swap_ra) | ||
723 | { | 732 | { |
724 | struct blk_plug plug; | 733 | struct blk_plug plug; |
725 | struct vm_area_struct *vma = vmf->vma; | 734 | struct vm_area_struct *vma = vmf->vma; |
@@ -728,12 +737,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
728 | swp_entry_t entry; | 737 | swp_entry_t entry; |
729 | unsigned int i; | 738 | unsigned int i; |
730 | bool page_allocated; | 739 | bool page_allocated; |
740 | struct vma_swap_readahead ra_info = {0,}; | ||
731 | 741 | ||
732 | if (swap_ra->win == 1) | 742 | swap_ra_info(vmf, &ra_info); |
743 | if (ra_info.win == 1) | ||
733 | goto skip; | 744 | goto skip; |
734 | 745 | ||
735 | blk_start_plug(&plug); | 746 | blk_start_plug(&plug); |
736 | for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte; | 747 | for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte; |
737 | i++, pte++) { | 748 | i++, pte++) { |
738 | pentry = *pte; | 749 | pentry = *pte; |
739 | if (pte_none(pentry)) | 750 | if (pte_none(pentry)) |
@@ -749,8 +760,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
749 | continue; | 760 | continue; |
750 | if (page_allocated) { | 761 | if (page_allocated) { |
751 | swap_readpage(page, false); | 762 | swap_readpage(page, false); |
752 | if (i != swap_ra->offset && | 763 | if (i != ra_info.offset) { |
753 | likely(!PageTransCompound(page))) { | ||
754 | SetPageReadahead(page); | 764 | SetPageReadahead(page); |
755 | count_vm_event(SWAP_RA); | 765 | count_vm_event(SWAP_RA); |
756 | } | 766 | } |
@@ -761,7 +771,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, | |||
761 | lru_add_drain(); | 771 | lru_add_drain(); |
762 | skip: | 772 | skip: |
763 | return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, | 773 | return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, |
764 | swap_ra->win == 1); | 774 | ra_info.win == 1); |
765 | } | 775 | } |
766 | 776 | ||
767 | #ifdef CONFIG_SYSFS | 777 | #ifdef CONFIG_SYSFS |