aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/swap.h17
-rw-r--r--mm/memory.c26
-rw-r--r--mm/swap_state.c96
3 files changed, 62 insertions, 77 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a1a3f4ed94ce..fa92177d863e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -424,12 +424,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
424 bool *new_page_allocated); 424 bool *new_page_allocated);
425extern struct page *swapin_readahead(swp_entry_t, gfp_t, 425extern struct page *swapin_readahead(swp_entry_t, gfp_t,
426 struct vm_area_struct *vma, unsigned long addr); 426 struct vm_area_struct *vma, unsigned long addr);
427
428extern struct page *swap_readahead_detect(struct vm_fault *vmf,
429 struct vma_swap_readahead *swap_ra);
430extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, 427extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
431 struct vm_fault *vmf, 428 struct vm_fault *vmf);
432 struct vma_swap_readahead *swap_ra);
433 429
434/* linux/mm/swapfile.c */ 430/* linux/mm/swapfile.c */
435extern atomic_long_t nr_swap_pages; 431extern atomic_long_t nr_swap_pages;
@@ -548,15 +544,8 @@ static inline bool swap_use_vma_readahead(void)
548 return false; 544 return false;
549} 545}
550 546
551static inline struct page *swap_readahead_detect( 547static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
552 struct vm_fault *vmf, struct vma_swap_readahead *swap_ra) 548 gfp_t gfp_mask, struct vm_fault *vmf)
553{
554 return NULL;
555}
556
557static inline struct page *do_swap_page_readahead(
558 swp_entry_t fentry, gfp_t gfp_mask,
559 struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
560{ 549{
561 return NULL; 550 return NULL;
562} 551}
diff --git a/mm/memory.c b/mm/memory.c
index aed37325d94e..bc1ccff79538 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2883,26 +2883,16 @@ EXPORT_SYMBOL(unmap_mapping_range);
2883int do_swap_page(struct vm_fault *vmf) 2883int do_swap_page(struct vm_fault *vmf)
2884{ 2884{
2885 struct vm_area_struct *vma = vmf->vma; 2885 struct vm_area_struct *vma = vmf->vma;
2886 struct page *page = NULL, *swapcache = NULL; 2886 struct page *page = NULL, *swapcache;
2887 struct mem_cgroup *memcg; 2887 struct mem_cgroup *memcg;
2888 struct vma_swap_readahead swap_ra;
2889 swp_entry_t entry; 2888 swp_entry_t entry;
2890 pte_t pte; 2889 pte_t pte;
2891 int locked; 2890 int locked;
2892 int exclusive = 0; 2891 int exclusive = 0;
2893 int ret = 0; 2892 int ret = 0;
2894 bool vma_readahead = swap_use_vma_readahead();
2895 2893
2896 if (vma_readahead) { 2894 if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
2897 page = swap_readahead_detect(vmf, &swap_ra);
2898 swapcache = page;
2899 }
2900
2901 if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
2902 if (page)
2903 put_page(page);
2904 goto out; 2895 goto out;
2905 }
2906 2896
2907 entry = pte_to_swp_entry(vmf->orig_pte); 2897 entry = pte_to_swp_entry(vmf->orig_pte);
2908 if (unlikely(non_swap_entry(entry))) { 2898 if (unlikely(non_swap_entry(entry))) {
@@ -2928,11 +2918,8 @@ int do_swap_page(struct vm_fault *vmf)
2928 2918
2929 2919
2930 delayacct_set_flag(DELAYACCT_PF_SWAPIN); 2920 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
2931 if (!page) { 2921 page = lookup_swap_cache(entry, vma, vmf->address);
2932 page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, 2922 swapcache = page;
2933 vmf->address);
2934 swapcache = page;
2935 }
2936 2923
2937 if (!page) { 2924 if (!page) {
2938 struct swap_info_struct *si = swp_swap_info(entry); 2925 struct swap_info_struct *si = swp_swap_info(entry);
@@ -2949,9 +2936,9 @@ int do_swap_page(struct vm_fault *vmf)
2949 swap_readpage(page, true); 2936 swap_readpage(page, true);
2950 } 2937 }
2951 } else { 2938 } else {
2952 if (vma_readahead) 2939 if (swap_use_vma_readahead())
2953 page = do_swap_page_readahead(entry, 2940 page = do_swap_page_readahead(entry,
2954 GFP_HIGHUSER_MOVABLE, vmf, &swap_ra); 2941 GFP_HIGHUSER_MOVABLE, vmf);
2955 else 2942 else
2956 page = swapin_readahead(entry, 2943 page = swapin_readahead(entry,
2957 GFP_HIGHUSER_MOVABLE, vma, vmf->address); 2944 GFP_HIGHUSER_MOVABLE, vma, vmf->address);
@@ -2982,7 +2969,6 @@ int do_swap_page(struct vm_fault *vmf)
2982 */ 2969 */
2983 ret = VM_FAULT_HWPOISON; 2970 ret = VM_FAULT_HWPOISON;
2984 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2971 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2985 swapcache = page;
2986 goto out_release; 2972 goto out_release;
2987 } 2973 }
2988 2974
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 39ae7cfad90f..db5da2baafb1 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -332,32 +332,43 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
332 unsigned long addr) 332 unsigned long addr)
333{ 333{
334 struct page *page; 334 struct page *page;
335 unsigned long ra_info;
336 int win, hits, readahead;
337 335
338 page = find_get_page(swap_address_space(entry), swp_offset(entry)); 336 page = find_get_page(swap_address_space(entry), swp_offset(entry));
339 337
340 INC_CACHE_INFO(find_total); 338 INC_CACHE_INFO(find_total);
341 if (page) { 339 if (page) {
340 bool vma_ra = swap_use_vma_readahead();
341 bool readahead;
342
342 INC_CACHE_INFO(find_success); 343 INC_CACHE_INFO(find_success);
344 /*
345 * At the moment, we don't support PG_readahead for anon THP
346 * so let's bail out rather than confusing the readahead stat.
347 */
343 if (unlikely(PageTransCompound(page))) 348 if (unlikely(PageTransCompound(page)))
344 return page; 349 return page;
350
345 readahead = TestClearPageReadahead(page); 351 readahead = TestClearPageReadahead(page);
346 if (vma) { 352 if (vma && vma_ra) {
347 ra_info = GET_SWAP_RA_VAL(vma); 353 unsigned long ra_val;
348 win = SWAP_RA_WIN(ra_info); 354 int win, hits;
349 hits = SWAP_RA_HITS(ra_info); 355
356 ra_val = GET_SWAP_RA_VAL(vma);
357 win = SWAP_RA_WIN(ra_val);
358 hits = SWAP_RA_HITS(ra_val);
350 if (readahead) 359 if (readahead)
351 hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); 360 hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
352 atomic_long_set(&vma->swap_readahead_info, 361 atomic_long_set(&vma->swap_readahead_info,
353 SWAP_RA_VAL(addr, win, hits)); 362 SWAP_RA_VAL(addr, win, hits));
354 } 363 }
364
355 if (readahead) { 365 if (readahead) {
356 count_vm_event(SWAP_RA_HIT); 366 count_vm_event(SWAP_RA_HIT);
357 if (!vma) 367 if (!vma || !vma_ra)
358 atomic_inc(&swapin_readahead_hits); 368 atomic_inc(&swapin_readahead_hits);
359 } 369 }
360 } 370 }
371
361 return page; 372 return page;
362} 373}
363 374
@@ -586,8 +597,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
586 continue; 597 continue;
587 if (page_allocated) { 598 if (page_allocated) {
588 swap_readpage(page, false); 599 swap_readpage(page, false);
589 if (offset != entry_offset && 600 if (offset != entry_offset) {
590 likely(!PageTransCompound(page))) {
591 SetPageReadahead(page); 601 SetPageReadahead(page);
592 count_vm_event(SWAP_RA); 602 count_vm_event(SWAP_RA);
593 } 603 }
@@ -649,16 +659,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
649 PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); 659 PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
650} 660}
651 661
652struct page *swap_readahead_detect(struct vm_fault *vmf, 662static void swap_ra_info(struct vm_fault *vmf,
653 struct vma_swap_readahead *swap_ra) 663 struct vma_swap_readahead *ra_info)
654{ 664{
655 struct vm_area_struct *vma = vmf->vma; 665 struct vm_area_struct *vma = vmf->vma;
656 unsigned long swap_ra_info; 666 unsigned long ra_val;
657 struct page *page;
658 swp_entry_t entry; 667 swp_entry_t entry;
659 unsigned long faddr, pfn, fpfn; 668 unsigned long faddr, pfn, fpfn;
660 unsigned long start, end; 669 unsigned long start, end;
661 pte_t *pte; 670 pte_t *pte, *orig_pte;
662 unsigned int max_win, hits, prev_win, win, left; 671 unsigned int max_win, hits, prev_win, win, left;
663#ifndef CONFIG_64BIT 672#ifndef CONFIG_64BIT
664 pte_t *tpte; 673 pte_t *tpte;
@@ -667,30 +676,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
667 max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), 676 max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
668 SWAP_RA_ORDER_CEILING); 677 SWAP_RA_ORDER_CEILING);
669 if (max_win == 1) { 678 if (max_win == 1) {
670 swap_ra->win = 1; 679 ra_info->win = 1;
671 return NULL; 680 return;
672 } 681 }
673 682
674 faddr = vmf->address; 683 faddr = vmf->address;
675 entry = pte_to_swp_entry(vmf->orig_pte); 684 orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
676 if ((unlikely(non_swap_entry(entry)))) 685 entry = pte_to_swp_entry(*pte);
677 return NULL; 686 if ((unlikely(non_swap_entry(entry)))) {
678 page = lookup_swap_cache(entry, vma, faddr); 687 pte_unmap(orig_pte);
679 if (page) 688 return;
680 return page; 689 }
681 690
682 fpfn = PFN_DOWN(faddr); 691 fpfn = PFN_DOWN(faddr);
683 swap_ra_info = GET_SWAP_RA_VAL(vma); 692 ra_val = GET_SWAP_RA_VAL(vma);
684 pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); 693 pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
685 prev_win = SWAP_RA_WIN(swap_ra_info); 694 prev_win = SWAP_RA_WIN(ra_val);
686 hits = SWAP_RA_HITS(swap_ra_info); 695 hits = SWAP_RA_HITS(ra_val);
687 swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits, 696 ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
688 max_win, prev_win); 697 max_win, prev_win);
689 atomic_long_set(&vma->swap_readahead_info, 698 atomic_long_set(&vma->swap_readahead_info,
690 SWAP_RA_VAL(faddr, win, 0)); 699 SWAP_RA_VAL(faddr, win, 0));
691 700
692 if (win == 1) 701 if (win == 1) {
693 return NULL; 702 pte_unmap(orig_pte);
703 return;
704 }
694 705
695 /* Copy the PTEs because the page table may be unmapped */ 706 /* Copy the PTEs because the page table may be unmapped */
696 if (fpfn == pfn + 1) 707 if (fpfn == pfn + 1)
@@ -703,23 +714,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
703 swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, 714 swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
704 &start, &end); 715 &start, &end);
705 } 716 }
706 swap_ra->nr_pte = end - start; 717 ra_info->nr_pte = end - start;
707 swap_ra->offset = fpfn - start; 718 ra_info->offset = fpfn - start;
708 pte = vmf->pte - swap_ra->offset; 719 pte -= ra_info->offset;
709#ifdef CONFIG_64BIT 720#ifdef CONFIG_64BIT
710 swap_ra->ptes = pte; 721 ra_info->ptes = pte;
711#else 722#else
712 tpte = swap_ra->ptes; 723 tpte = ra_info->ptes;
713 for (pfn = start; pfn != end; pfn++) 724 for (pfn = start; pfn != end; pfn++)
714 *tpte++ = *pte++; 725 *tpte++ = *pte++;
715#endif 726#endif
716 727 pte_unmap(orig_pte);
717 return NULL;
718} 728}
719 729
720struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, 730struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
721 struct vm_fault *vmf, 731 struct vm_fault *vmf)
722 struct vma_swap_readahead *swap_ra)
723{ 732{
724 struct blk_plug plug; 733 struct blk_plug plug;
725 struct vm_area_struct *vma = vmf->vma; 734 struct vm_area_struct *vma = vmf->vma;
@@ -728,12 +737,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
728 swp_entry_t entry; 737 swp_entry_t entry;
729 unsigned int i; 738 unsigned int i;
730 bool page_allocated; 739 bool page_allocated;
740 struct vma_swap_readahead ra_info = {0,};
731 741
732 if (swap_ra->win == 1) 742 swap_ra_info(vmf, &ra_info);
743 if (ra_info.win == 1)
733 goto skip; 744 goto skip;
734 745
735 blk_start_plug(&plug); 746 blk_start_plug(&plug);
736 for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte; 747 for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
737 i++, pte++) { 748 i++, pte++) {
738 pentry = *pte; 749 pentry = *pte;
739 if (pte_none(pentry)) 750 if (pte_none(pentry))
@@ -749,8 +760,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
749 continue; 760 continue;
750 if (page_allocated) { 761 if (page_allocated) {
751 swap_readpage(page, false); 762 swap_readpage(page, false);
752 if (i != swap_ra->offset && 763 if (i != ra_info.offset) {
753 likely(!PageTransCompound(page))) {
754 SetPageReadahead(page); 764 SetPageReadahead(page);
755 count_vm_event(SWAP_RA); 765 count_vm_event(SWAP_RA);
756 } 766 }
@@ -761,7 +771,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
761 lru_add_drain(); 771 lru_add_drain();
762skip: 772skip:
763 return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, 773 return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
764 swap_ra->win == 1); 774 ra_info.win == 1);
765} 775}
766 776
767#ifdef CONFIG_SYSFS 777#ifdef CONFIG_SYSFS