summaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorTim Chen <tim.c.chen@linux.intel.com>2017-02-22 18:45:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 19:41:30 -0500
commit36005bae205da3eef0016a5c96a34f10a68afa1e (patch)
treeb3201463a223eb7ec4a0bcf960696db21914566d /mm/swapfile.c
parente8c26ab60598558ec3a626e7925b06e7417d7710 (diff)
mm/swap: allocate swap slots in batches
Currently, the swap slots are allocated one page at a time, causing contention to the swap_info lock protecting the swap partition on every page being swapped. This patch adds new functions get_swap_pages and scan_swap_map_slots to request multiple swap slots at once. This will reduces the lock contention on the swap_info lock. Also scan_swap_map_slots can operate more efficiently as swap slots often occurs in clusters close to each other on a swap device and it is quicker to allocate them together. Link: http://lkml.kernel.org/r/9fec2845544371f62c3763d43510045e33d286a6.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Cc: Aaron Lu <aaron.lu@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Huang Ying <ying.huang@intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> escreveu: Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@redhat.com> Cc: Shaohua Li <shli@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c136
1 files changed, 111 insertions, 25 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 7e888de35c41..e73b5441055b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -496,7 +496,7 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
496 * Try to get a swap entry from current cpu's swap entry pool (a cluster). This 496 * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
497 * might involve allocating a new cluster for current CPU too. 497 * might involve allocating a new cluster for current CPU too.
498 */ 498 */
499static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, 499static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
500 unsigned long *offset, unsigned long *scan_base) 500 unsigned long *offset, unsigned long *scan_base)
501{ 501{
502 struct percpu_cluster *cluster; 502 struct percpu_cluster *cluster;
@@ -520,7 +520,7 @@ new_cluster:
520 *scan_base = *offset = si->cluster_next; 520 *scan_base = *offset = si->cluster_next;
521 goto new_cluster; 521 goto new_cluster;
522 } else 522 } else
523 return; 523 return false;
524 } 524 }
525 525
526 found_free = false; 526 found_free = false;
@@ -552,16 +552,22 @@ new_cluster:
552 cluster->next = tmp + 1; 552 cluster->next = tmp + 1;
553 *offset = tmp; 553 *offset = tmp;
554 *scan_base = tmp; 554 *scan_base = tmp;
555 return found_free;
555} 556}
556 557
557static unsigned long scan_swap_map(struct swap_info_struct *si, 558static int scan_swap_map_slots(struct swap_info_struct *si,
558 unsigned char usage) 559 unsigned char usage, int nr,
560 swp_entry_t slots[])
559{ 561{
560 struct swap_cluster_info *ci; 562 struct swap_cluster_info *ci;
561 unsigned long offset; 563 unsigned long offset;
562 unsigned long scan_base; 564 unsigned long scan_base;
563 unsigned long last_in_cluster = 0; 565 unsigned long last_in_cluster = 0;
564 int latency_ration = LATENCY_LIMIT; 566 int latency_ration = LATENCY_LIMIT;
567 int n_ret = 0;
568
569 if (nr > SWAP_BATCH)
570 nr = SWAP_BATCH;
565 571
566 /* 572 /*
567 * We try to cluster swap pages by allocating them sequentially 573 * We try to cluster swap pages by allocating them sequentially
@@ -579,8 +585,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
579 585
580 /* SSD algorithm */ 586 /* SSD algorithm */
581 if (si->cluster_info) { 587 if (si->cluster_info) {
582 scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); 588 if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
583 goto checks; 589 goto checks;
590 else
591 goto scan;
584 } 592 }
585 593
586 if (unlikely(!si->cluster_nr--)) { 594 if (unlikely(!si->cluster_nr--)) {
@@ -624,8 +632,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
624 632
625checks: 633checks:
626 if (si->cluster_info) { 634 if (si->cluster_info) {
627 while (scan_swap_map_ssd_cluster_conflict(si, offset)) 635 while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
628 scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); 636 /* take a break if we already got some slots */
637 if (n_ret)
638 goto done;
639 if (!scan_swap_map_try_ssd_cluster(si, &offset,
640 &scan_base))
641 goto scan;
642 }
629 } 643 }
630 if (!(si->flags & SWP_WRITEOK)) 644 if (!(si->flags & SWP_WRITEOK))
631 goto no_page; 645 goto no_page;
@@ -650,7 +664,10 @@ checks:
650 664
651 if (si->swap_map[offset]) { 665 if (si->swap_map[offset]) {
652 unlock_cluster(ci); 666 unlock_cluster(ci);
653 goto scan; 667 if (!n_ret)
668 goto scan;
669 else
670 goto done;
654 } 671 }
655 672
656 if (offset == si->lowest_bit) 673 if (offset == si->lowest_bit)
@@ -669,9 +686,43 @@ checks:
669 inc_cluster_info_page(si, si->cluster_info, offset); 686 inc_cluster_info_page(si, si->cluster_info, offset);
670 unlock_cluster(ci); 687 unlock_cluster(ci);
671 si->cluster_next = offset + 1; 688 si->cluster_next = offset + 1;
672 si->flags -= SWP_SCANNING; 689 slots[n_ret++] = swp_entry(si->type, offset);
690
691 /* got enough slots or reach max slots? */
692 if ((n_ret == nr) || (offset >= si->highest_bit))
693 goto done;
694
695 /* search for next available slot */
696
697 /* time to take a break? */
698 if (unlikely(--latency_ration < 0)) {
699 if (n_ret)
700 goto done;
701 spin_unlock(&si->lock);
702 cond_resched();
703 spin_lock(&si->lock);
704 latency_ration = LATENCY_LIMIT;
705 }
673 706
674 return offset; 707 /* try to get more slots in cluster */
708 if (si->cluster_info) {
709 if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
710 goto checks;
711 else
712 goto done;
713 }
714 /* non-ssd case */
715 ++offset;
716
717 /* non-ssd case, still more slots in cluster? */
718 if (si->cluster_nr && !si->swap_map[offset]) {
719 --si->cluster_nr;
720 goto checks;
721 }
722
723done:
724 si->flags -= SWP_SCANNING;
725 return n_ret;
675 726
676scan: 727scan:
677 spin_unlock(&si->lock); 728 spin_unlock(&si->lock);
@@ -709,17 +760,41 @@ scan:
709 760
710no_page: 761no_page:
711 si->flags -= SWP_SCANNING; 762 si->flags -= SWP_SCANNING;
712 return 0; 763 return n_ret;
713} 764}
714 765
715swp_entry_t get_swap_page(void) 766static unsigned long scan_swap_map(struct swap_info_struct *si,
767 unsigned char usage)
768{
769 swp_entry_t entry;
770 int n_ret;
771
772 n_ret = scan_swap_map_slots(si, usage, 1, &entry);
773
774 if (n_ret)
775 return swp_offset(entry);
776 else
777 return 0;
778
779}
780
781int get_swap_pages(int n_goal, swp_entry_t swp_entries[])
716{ 782{
717 struct swap_info_struct *si, *next; 783 struct swap_info_struct *si, *next;
718 pgoff_t offset; 784 long avail_pgs;
785 int n_ret = 0;
719 786
720 if (atomic_long_read(&nr_swap_pages) <= 0) 787 avail_pgs = atomic_long_read(&nr_swap_pages);
788 if (avail_pgs <= 0)
721 goto noswap; 789 goto noswap;
722 atomic_long_dec(&nr_swap_pages); 790
791 if (n_goal > SWAP_BATCH)
792 n_goal = SWAP_BATCH;
793
794 if (n_goal > avail_pgs)
795 n_goal = avail_pgs;
796
797 atomic_long_sub(n_goal, &nr_swap_pages);
723 798
724 spin_lock(&swap_avail_lock); 799 spin_lock(&swap_avail_lock);
725 800
@@ -745,14 +820,14 @@ start_over:
745 spin_unlock(&si->lock); 820 spin_unlock(&si->lock);
746 goto nextsi; 821 goto nextsi;
747 } 822 }
748 823 n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
749 /* This is called for allocating swap entry for cache */ 824 n_goal, swp_entries);
750 offset = scan_swap_map(si, SWAP_HAS_CACHE);
751 spin_unlock(&si->lock); 825 spin_unlock(&si->lock);
752 if (offset) 826 if (n_ret)
753 return swp_entry(si->type, offset); 827 goto check_out;
754 pr_debug("scan_swap_map of si %d failed to find offset\n", 828 pr_debug("scan_swap_map of si %d failed to find offset\n",
755 si->type); 829 si->type);
830
756 spin_lock(&swap_avail_lock); 831 spin_lock(&swap_avail_lock);
757nextsi: 832nextsi:
758 /* 833 /*
@@ -763,7 +838,8 @@ nextsi:
763 * up between us dropping swap_avail_lock and taking si->lock. 838 * up between us dropping swap_avail_lock and taking si->lock.
764 * Since we dropped the swap_avail_lock, the swap_avail_head 839 * Since we dropped the swap_avail_lock, the swap_avail_head
765 * list may have been modified; so if next is still in the 840 * list may have been modified; so if next is still in the
766 * swap_avail_head list then try it, otherwise start over. 841 * swap_avail_head list then try it, otherwise start over
842 * if we have not gotten any slots.
767 */ 843 */
768 if (plist_node_empty(&next->avail_list)) 844 if (plist_node_empty(&next->avail_list))
769 goto start_over; 845 goto start_over;
@@ -771,9 +847,19 @@ nextsi:
771 847
772 spin_unlock(&swap_avail_lock); 848 spin_unlock(&swap_avail_lock);
773 849
774 atomic_long_inc(&nr_swap_pages); 850check_out:
851 if (n_ret < n_goal)
852 atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages);
775noswap: 853noswap:
776 return (swp_entry_t) {0}; 854 return n_ret;
855}
856
857swp_entry_t get_swap_page(void)
858{
859 swp_entry_t entry;
860
861 get_swap_pages(1, &entry);
862 return entry;
777} 863}
778 864
779/* The only caller of this function is now suspend routine */ 865/* The only caller of this function is now suspend routine */