diff options
author | Tim Chen <tim.c.chen@linux.intel.com> | 2017-02-22 18:45:33 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-22 19:41:30 -0500 |
commit | 36005bae205da3eef0016a5c96a34f10a68afa1e (patch) | |
tree | b3201463a223eb7ec4a0bcf960696db21914566d /mm/swapfile.c | |
parent | e8c26ab60598558ec3a626e7925b06e7417d7710 (diff) |
mm/swap: allocate swap slots in batches
Currently, the swap slots are allocated one page at a time, causing
contention to the swap_info lock protecting the swap partition on every
page being swapped.
This patch adds new functions get_swap_pages and scan_swap_map_slots to
request multiple swap slots at once. This will reduces the lock
contention on the swap_info lock. Also scan_swap_map_slots can operate
more efficiently as swap slots often occurs in clusters close to each
other on a swap device and it is quicker to allocate them together.
Link: http://lkml.kernel.org/r/9fec2845544371f62c3763d43510045e33d286a6.1484082593.git.tim.c.chen@linux.intel.com
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net> escreveu:
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 136 |
1 files changed, 111 insertions, 25 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index 7e888de35c41..e73b5441055b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -496,7 +496,7 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si, | |||
496 | * Try to get a swap entry from current cpu's swap entry pool (a cluster). This | 496 | * Try to get a swap entry from current cpu's swap entry pool (a cluster). This |
497 | * might involve allocating a new cluster for current CPU too. | 497 | * might involve allocating a new cluster for current CPU too. |
498 | */ | 498 | */ |
499 | static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, | 499 | static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, |
500 | unsigned long *offset, unsigned long *scan_base) | 500 | unsigned long *offset, unsigned long *scan_base) |
501 | { | 501 | { |
502 | struct percpu_cluster *cluster; | 502 | struct percpu_cluster *cluster; |
@@ -520,7 +520,7 @@ new_cluster: | |||
520 | *scan_base = *offset = si->cluster_next; | 520 | *scan_base = *offset = si->cluster_next; |
521 | goto new_cluster; | 521 | goto new_cluster; |
522 | } else | 522 | } else |
523 | return; | 523 | return false; |
524 | } | 524 | } |
525 | 525 | ||
526 | found_free = false; | 526 | found_free = false; |
@@ -552,16 +552,22 @@ new_cluster: | |||
552 | cluster->next = tmp + 1; | 552 | cluster->next = tmp + 1; |
553 | *offset = tmp; | 553 | *offset = tmp; |
554 | *scan_base = tmp; | 554 | *scan_base = tmp; |
555 | return found_free; | ||
555 | } | 556 | } |
556 | 557 | ||
557 | static unsigned long scan_swap_map(struct swap_info_struct *si, | 558 | static int scan_swap_map_slots(struct swap_info_struct *si, |
558 | unsigned char usage) | 559 | unsigned char usage, int nr, |
560 | swp_entry_t slots[]) | ||
559 | { | 561 | { |
560 | struct swap_cluster_info *ci; | 562 | struct swap_cluster_info *ci; |
561 | unsigned long offset; | 563 | unsigned long offset; |
562 | unsigned long scan_base; | 564 | unsigned long scan_base; |
563 | unsigned long last_in_cluster = 0; | 565 | unsigned long last_in_cluster = 0; |
564 | int latency_ration = LATENCY_LIMIT; | 566 | int latency_ration = LATENCY_LIMIT; |
567 | int n_ret = 0; | ||
568 | |||
569 | if (nr > SWAP_BATCH) | ||
570 | nr = SWAP_BATCH; | ||
565 | 571 | ||
566 | /* | 572 | /* |
567 | * We try to cluster swap pages by allocating them sequentially | 573 | * We try to cluster swap pages by allocating them sequentially |
@@ -579,8 +585,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, | |||
579 | 585 | ||
580 | /* SSD algorithm */ | 586 | /* SSD algorithm */ |
581 | if (si->cluster_info) { | 587 | if (si->cluster_info) { |
582 | scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); | 588 | if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) |
583 | goto checks; | 589 | goto checks; |
590 | else | ||
591 | goto scan; | ||
584 | } | 592 | } |
585 | 593 | ||
586 | if (unlikely(!si->cluster_nr--)) { | 594 | if (unlikely(!si->cluster_nr--)) { |
@@ -624,8 +632,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, | |||
624 | 632 | ||
625 | checks: | 633 | checks: |
626 | if (si->cluster_info) { | 634 | if (si->cluster_info) { |
627 | while (scan_swap_map_ssd_cluster_conflict(si, offset)) | 635 | while (scan_swap_map_ssd_cluster_conflict(si, offset)) { |
628 | scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); | 636 | /* take a break if we already got some slots */ |
637 | if (n_ret) | ||
638 | goto done; | ||
639 | if (!scan_swap_map_try_ssd_cluster(si, &offset, | ||
640 | &scan_base)) | ||
641 | goto scan; | ||
642 | } | ||
629 | } | 643 | } |
630 | if (!(si->flags & SWP_WRITEOK)) | 644 | if (!(si->flags & SWP_WRITEOK)) |
631 | goto no_page; | 645 | goto no_page; |
@@ -650,7 +664,10 @@ checks: | |||
650 | 664 | ||
651 | if (si->swap_map[offset]) { | 665 | if (si->swap_map[offset]) { |
652 | unlock_cluster(ci); | 666 | unlock_cluster(ci); |
653 | goto scan; | 667 | if (!n_ret) |
668 | goto scan; | ||
669 | else | ||
670 | goto done; | ||
654 | } | 671 | } |
655 | 672 | ||
656 | if (offset == si->lowest_bit) | 673 | if (offset == si->lowest_bit) |
@@ -669,9 +686,43 @@ checks: | |||
669 | inc_cluster_info_page(si, si->cluster_info, offset); | 686 | inc_cluster_info_page(si, si->cluster_info, offset); |
670 | unlock_cluster(ci); | 687 | unlock_cluster(ci); |
671 | si->cluster_next = offset + 1; | 688 | si->cluster_next = offset + 1; |
672 | si->flags -= SWP_SCANNING; | 689 | slots[n_ret++] = swp_entry(si->type, offset); |
690 | |||
691 | /* got enough slots or reach max slots? */ | ||
692 | if ((n_ret == nr) || (offset >= si->highest_bit)) | ||
693 | goto done; | ||
694 | |||
695 | /* search for next available slot */ | ||
696 | |||
697 | /* time to take a break? */ | ||
698 | if (unlikely(--latency_ration < 0)) { | ||
699 | if (n_ret) | ||
700 | goto done; | ||
701 | spin_unlock(&si->lock); | ||
702 | cond_resched(); | ||
703 | spin_lock(&si->lock); | ||
704 | latency_ration = LATENCY_LIMIT; | ||
705 | } | ||
673 | 706 | ||
674 | return offset; | 707 | /* try to get more slots in cluster */ |
708 | if (si->cluster_info) { | ||
709 | if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) | ||
710 | goto checks; | ||
711 | else | ||
712 | goto done; | ||
713 | } | ||
714 | /* non-ssd case */ | ||
715 | ++offset; | ||
716 | |||
717 | /* non-ssd case, still more slots in cluster? */ | ||
718 | if (si->cluster_nr && !si->swap_map[offset]) { | ||
719 | --si->cluster_nr; | ||
720 | goto checks; | ||
721 | } | ||
722 | |||
723 | done: | ||
724 | si->flags -= SWP_SCANNING; | ||
725 | return n_ret; | ||
675 | 726 | ||
676 | scan: | 727 | scan: |
677 | spin_unlock(&si->lock); | 728 | spin_unlock(&si->lock); |
@@ -709,17 +760,41 @@ scan: | |||
709 | 760 | ||
710 | no_page: | 761 | no_page: |
711 | si->flags -= SWP_SCANNING; | 762 | si->flags -= SWP_SCANNING; |
712 | return 0; | 763 | return n_ret; |
713 | } | 764 | } |
714 | 765 | ||
715 | swp_entry_t get_swap_page(void) | 766 | static unsigned long scan_swap_map(struct swap_info_struct *si, |
767 | unsigned char usage) | ||
768 | { | ||
769 | swp_entry_t entry; | ||
770 | int n_ret; | ||
771 | |||
772 | n_ret = scan_swap_map_slots(si, usage, 1, &entry); | ||
773 | |||
774 | if (n_ret) | ||
775 | return swp_offset(entry); | ||
776 | else | ||
777 | return 0; | ||
778 | |||
779 | } | ||
780 | |||
781 | int get_swap_pages(int n_goal, swp_entry_t swp_entries[]) | ||
716 | { | 782 | { |
717 | struct swap_info_struct *si, *next; | 783 | struct swap_info_struct *si, *next; |
718 | pgoff_t offset; | 784 | long avail_pgs; |
785 | int n_ret = 0; | ||
719 | 786 | ||
720 | if (atomic_long_read(&nr_swap_pages) <= 0) | 787 | avail_pgs = atomic_long_read(&nr_swap_pages); |
788 | if (avail_pgs <= 0) | ||
721 | goto noswap; | 789 | goto noswap; |
722 | atomic_long_dec(&nr_swap_pages); | 790 | |
791 | if (n_goal > SWAP_BATCH) | ||
792 | n_goal = SWAP_BATCH; | ||
793 | |||
794 | if (n_goal > avail_pgs) | ||
795 | n_goal = avail_pgs; | ||
796 | |||
797 | atomic_long_sub(n_goal, &nr_swap_pages); | ||
723 | 798 | ||
724 | spin_lock(&swap_avail_lock); | 799 | spin_lock(&swap_avail_lock); |
725 | 800 | ||
@@ -745,14 +820,14 @@ start_over: | |||
745 | spin_unlock(&si->lock); | 820 | spin_unlock(&si->lock); |
746 | goto nextsi; | 821 | goto nextsi; |
747 | } | 822 | } |
748 | 823 | n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, | |
749 | /* This is called for allocating swap entry for cache */ | 824 | n_goal, swp_entries); |
750 | offset = scan_swap_map(si, SWAP_HAS_CACHE); | ||
751 | spin_unlock(&si->lock); | 825 | spin_unlock(&si->lock); |
752 | if (offset) | 826 | if (n_ret) |
753 | return swp_entry(si->type, offset); | 827 | goto check_out; |
754 | pr_debug("scan_swap_map of si %d failed to find offset\n", | 828 | pr_debug("scan_swap_map of si %d failed to find offset\n", |
755 | si->type); | 829 | si->type); |
830 | |||
756 | spin_lock(&swap_avail_lock); | 831 | spin_lock(&swap_avail_lock); |
757 | nextsi: | 832 | nextsi: |
758 | /* | 833 | /* |
@@ -763,7 +838,8 @@ nextsi: | |||
763 | * up between us dropping swap_avail_lock and taking si->lock. | 838 | * up between us dropping swap_avail_lock and taking si->lock. |
764 | * Since we dropped the swap_avail_lock, the swap_avail_head | 839 | * Since we dropped the swap_avail_lock, the swap_avail_head |
765 | * list may have been modified; so if next is still in the | 840 | * list may have been modified; so if next is still in the |
766 | * swap_avail_head list then try it, otherwise start over. | 841 | * swap_avail_head list then try it, otherwise start over |
842 | * if we have not gotten any slots. | ||
767 | */ | 843 | */ |
768 | if (plist_node_empty(&next->avail_list)) | 844 | if (plist_node_empty(&next->avail_list)) |
769 | goto start_over; | 845 | goto start_over; |
@@ -771,9 +847,19 @@ nextsi: | |||
771 | 847 | ||
772 | spin_unlock(&swap_avail_lock); | 848 | spin_unlock(&swap_avail_lock); |
773 | 849 | ||
774 | atomic_long_inc(&nr_swap_pages); | 850 | check_out: |
851 | if (n_ret < n_goal) | ||
852 | atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages); | ||
775 | noswap: | 853 | noswap: |
776 | return (swp_entry_t) {0}; | 854 | return n_ret; |
855 | } | ||
856 | |||
857 | swp_entry_t get_swap_page(void) | ||
858 | { | ||
859 | swp_entry_t entry; | ||
860 | |||
861 | get_swap_pages(1, &entry); | ||
862 | return entry; | ||
777 | } | 863 | } |
778 | 864 | ||
779 | /* The only caller of this function is now suspend routine */ | 865 | /* The only caller of this function is now suspend routine */ |