aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDan Streetman <ddstreet@ieee.org>2014-06-04 19:09:59 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-04 19:54:07 -0400
commit18ab4d4ced0817421e6db6940374cc39d28d65da (patch)
tree1fc3911f333a37b21c39e862a6df70140c2e6202 /mm
parenta75f232ce0fe38bd01301899ecd97ffd0254316a (diff)
swap: change swap_list_head to plist, add swap_avail_head
Originally get_swap_page() started iterating through the singly-linked list of swap_info_structs using swap_list.next or highest_priority_index, which both were intended to point to the highest priority active swap target that was not full. The first patch in this series changed the singly-linked list to a doubly-linked list, and removed the logic to start at the highest priority non-full entry; it starts scanning at the highest priority entry each time, even if the entry is full. Replace the manually ordered swap_list_head with a plist, swap_active_head. Add a new plist, swap_avail_head. The original swap_active_head plist contains all active swap_info_structs, as before, while the new swap_avail_head plist contains only swap_info_structs that are active and available, i.e. not full. Add a new spinlock, swap_avail_lock, to protect the swap_avail_head list. Mel Gorman suggested using plists since they internally handle ordering the list entries based on priority, which is exactly what swap was doing manually. All the ordering code is now removed, and swap_info_struct entries and simply added to their corresponding plist and automatically ordered correctly. Using a new plist for available swap_info_structs simplifies and optimizes get_swap_page(), which no longer has to iterate over full swap_info_structs. Using a new spinlock for swap_avail_head plist allows each swap_info_struct to add or remove themselves from the plist when they become full or not-full; previously they could not do so because the swap_info_struct->lock is held when they change from full<->not-full, and the swap_lock protecting the main swap_active_head must be ordered before any swap_info_struct->lock. Signed-off-by: Dan Streetman <ddstreet@ieee.org> Acked-by: Mel Gorman <mgorman@suse.de> Cc: Shaohua Li <shli@fusionio.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Hugh Dickins <hughd@google.com> Cc: Dan Streetman <ddstreet@ieee.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> Cc: Weijie Yang <weijieut@gmail.com> Cc: Rik van Riel <riel@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Bob Liu <bob.liu@oracle.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/frontswap.c6
-rw-r--r--mm/swapfile.c145
2 files changed, 94 insertions, 57 deletions
diff --git a/mm/frontswap.c b/mm/frontswap.c
index fae11602e8a9..c30eec536f03 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -331,7 +331,7 @@ static unsigned long __frontswap_curr_pages(void)
331 struct swap_info_struct *si = NULL; 331 struct swap_info_struct *si = NULL;
332 332
333 assert_spin_locked(&swap_lock); 333 assert_spin_locked(&swap_lock);
334 list_for_each_entry(si, &swap_list_head, list) 334 plist_for_each_entry(si, &swap_active_head, list)
335 totalpages += atomic_read(&si->frontswap_pages); 335 totalpages += atomic_read(&si->frontswap_pages);
336 return totalpages; 336 return totalpages;
337} 337}
@@ -346,7 +346,7 @@ static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused,
346 unsigned long pages = 0, pages_to_unuse = 0; 346 unsigned long pages = 0, pages_to_unuse = 0;
347 347
348 assert_spin_locked(&swap_lock); 348 assert_spin_locked(&swap_lock);
349 list_for_each_entry(si, &swap_list_head, list) { 349 plist_for_each_entry(si, &swap_active_head, list) {
350 si_frontswap_pages = atomic_read(&si->frontswap_pages); 350 si_frontswap_pages = atomic_read(&si->frontswap_pages);
351 if (total_pages_to_unuse < si_frontswap_pages) { 351 if (total_pages_to_unuse < si_frontswap_pages) {
352 pages = pages_to_unuse = total_pages_to_unuse; 352 pages = pages_to_unuse = total_pages_to_unuse;
@@ -408,7 +408,7 @@ void frontswap_shrink(unsigned long target_pages)
408 /* 408 /*
409 * we don't want to hold swap_lock while doing a very 409 * we don't want to hold swap_lock while doing a very
410 * lengthy try_to_unuse, but swap_list may change 410 * lengthy try_to_unuse, but swap_list may change
411 * so restart scan from swap_list_head each time 411 * so restart scan from swap_active_head each time
412 */ 412 */
413 spin_lock(&swap_lock); 413 spin_lock(&swap_lock);
414 ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type); 414 ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6c95a8c63b1a..beeeef8a1b2d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -61,7 +61,22 @@ static const char Unused_offset[] = "Unused swap offset entry ";
61 * all active swap_info_structs 61 * all active swap_info_structs
62 * protected with swap_lock, and ordered by priority. 62 * protected with swap_lock, and ordered by priority.
63 */ 63 */
64LIST_HEAD(swap_list_head); 64PLIST_HEAD(swap_active_head);
65
66/*
67 * all available (active, not full) swap_info_structs
68 * protected with swap_avail_lock, ordered by priority.
69 * This is used by get_swap_page() instead of swap_active_head
70 * because swap_active_head includes all swap_info_structs,
71 * but get_swap_page() doesn't need to look at full ones.
72 * This uses its own lock instead of swap_lock because when a
73 * swap_info_struct changes between not-full/full, it needs to
74 * add/remove itself to/from this list, but the swap_info_struct->lock
75 * is held and the locking order requires swap_lock to be taken
76 * before any swap_info_struct->lock.
77 */
78static PLIST_HEAD(swap_avail_head);
79static DEFINE_SPINLOCK(swap_avail_lock);
65 80
66struct swap_info_struct *swap_info[MAX_SWAPFILES]; 81struct swap_info_struct *swap_info[MAX_SWAPFILES];
67 82
@@ -594,6 +609,9 @@ checks:
594 if (si->inuse_pages == si->pages) { 609 if (si->inuse_pages == si->pages) {
595 si->lowest_bit = si->max; 610 si->lowest_bit = si->max;
596 si->highest_bit = 0; 611 si->highest_bit = 0;
612 spin_lock(&swap_avail_lock);
613 plist_del(&si->avail_list, &swap_avail_head);
614 spin_unlock(&swap_avail_lock);
597 } 615 }
598 si->swap_map[offset] = usage; 616 si->swap_map[offset] = usage;
599 inc_cluster_info_page(si, si->cluster_info, offset); 617 inc_cluster_info_page(si, si->cluster_info, offset);
@@ -645,57 +663,63 @@ swp_entry_t get_swap_page(void)
645{ 663{
646 struct swap_info_struct *si, *next; 664 struct swap_info_struct *si, *next;
647 pgoff_t offset; 665 pgoff_t offset;
648 struct list_head *tmp;
649 666
650 spin_lock(&swap_lock);
651 if (atomic_long_read(&nr_swap_pages) <= 0) 667 if (atomic_long_read(&nr_swap_pages) <= 0)
652 goto noswap; 668 goto noswap;
653 atomic_long_dec(&nr_swap_pages); 669 atomic_long_dec(&nr_swap_pages);
654 670
655 list_for_each(tmp, &swap_list_head) { 671 spin_lock(&swap_avail_lock);
656 si = list_entry(tmp, typeof(*si), list); 672
673start_over:
674 plist_for_each_entry_safe(si, next, &swap_avail_head, avail_list) {
675 /* requeue si to after same-priority siblings */
676 plist_requeue(&si->avail_list, &swap_avail_head);
677 spin_unlock(&swap_avail_lock);
657 spin_lock(&si->lock); 678 spin_lock(&si->lock);
658 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) { 679 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
680 spin_lock(&swap_avail_lock);
681 if (plist_node_empty(&si->avail_list)) {
682 spin_unlock(&si->lock);
683 goto nextsi;
684 }
685 WARN(!si->highest_bit,
686 "swap_info %d in list but !highest_bit\n",
687 si->type);
688 WARN(!(si->flags & SWP_WRITEOK),
689 "swap_info %d in list but !SWP_WRITEOK\n",
690 si->type);
691 plist_del(&si->avail_list, &swap_avail_head);
659 spin_unlock(&si->lock); 692 spin_unlock(&si->lock);
660 continue; 693 goto nextsi;
661 } 694 }
662 695
663 /*
664 * rotate the current swap_info that we're going to use
665 * to after any other swap_info that have the same prio,
666 * so that all equal-priority swap_info get used equally
667 */
668 next = si;
669 list_for_each_entry_continue(next, &swap_list_head, list) {
670 if (si->prio != next->prio)
671 break;
672 list_rotate_left(&si->list);
673 next = si;
674 }
675
676 spin_unlock(&swap_lock);
677 /* This is called for allocating swap entry for cache */ 696 /* This is called for allocating swap entry for cache */
678 offset = scan_swap_map(si, SWAP_HAS_CACHE); 697 offset = scan_swap_map(si, SWAP_HAS_CACHE);
679 spin_unlock(&si->lock); 698 spin_unlock(&si->lock);
680 if (offset) 699 if (offset)
681 return swp_entry(si->type, offset); 700 return swp_entry(si->type, offset);
682 spin_lock(&swap_lock); 701 pr_debug("scan_swap_map of si %d failed to find offset\n",
702 si->type);
703 spin_lock(&swap_avail_lock);
704nextsi:
683 /* 705 /*
684 * if we got here, it's likely that si was almost full before, 706 * if we got here, it's likely that si was almost full before,
685 * and since scan_swap_map() can drop the si->lock, multiple 707 * and since scan_swap_map() can drop the si->lock, multiple
686 * callers probably all tried to get a page from the same si 708 * callers probably all tried to get a page from the same si
687 * and it filled up before we could get one. So we need to 709 * and it filled up before we could get one; or, the si filled
688 * try again. Since we dropped the swap_lock, there may now 710 * up between us dropping swap_avail_lock and taking si->lock.
689 * be non-full higher priority swap_infos, and this si may have 711 * Since we dropped the swap_avail_lock, the swap_avail_head
690 * even been removed from the list (although very unlikely). 712 * list may have been modified; so if next is still in the
691 * Let's start over. 713 * swap_avail_head list then try it, otherwise start over.
692 */ 714 */
693 tmp = &swap_list_head; 715 if (plist_node_empty(&next->avail_list))
716 goto start_over;
694 } 717 }
695 718
719 spin_unlock(&swap_avail_lock);
720
696 atomic_long_inc(&nr_swap_pages); 721 atomic_long_inc(&nr_swap_pages);
697noswap: 722noswap:
698 spin_unlock(&swap_lock);
699 return (swp_entry_t) {0}; 723 return (swp_entry_t) {0};
700} 724}
701 725
@@ -798,8 +822,18 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
798 dec_cluster_info_page(p, p->cluster_info, offset); 822 dec_cluster_info_page(p, p->cluster_info, offset);
799 if (offset < p->lowest_bit) 823 if (offset < p->lowest_bit)
800 p->lowest_bit = offset; 824 p->lowest_bit = offset;
801 if (offset > p->highest_bit) 825 if (offset > p->highest_bit) {
826 bool was_full = !p->highest_bit;
802 p->highest_bit = offset; 827 p->highest_bit = offset;
828 if (was_full && (p->flags & SWP_WRITEOK)) {
829 spin_lock(&swap_avail_lock);
830 WARN_ON(!plist_node_empty(&p->avail_list));
831 if (plist_node_empty(&p->avail_list))
832 plist_add(&p->avail_list,
833 &swap_avail_head);
834 spin_unlock(&swap_avail_lock);
835 }
836 }
803 atomic_long_inc(&nr_swap_pages); 837 atomic_long_inc(&nr_swap_pages);
804 p->inuse_pages--; 838 p->inuse_pages--;
805 frontswap_invalidate_page(p->type, offset); 839 frontswap_invalidate_page(p->type, offset);
@@ -1734,12 +1768,16 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
1734 unsigned char *swap_map, 1768 unsigned char *swap_map,
1735 struct swap_cluster_info *cluster_info) 1769 struct swap_cluster_info *cluster_info)
1736{ 1770{
1737 struct swap_info_struct *si;
1738
1739 if (prio >= 0) 1771 if (prio >= 0)
1740 p->prio = prio; 1772 p->prio = prio;
1741 else 1773 else
1742 p->prio = --least_priority; 1774 p->prio = --least_priority;
1775 /*
1776 * the plist prio is negated because plist ordering is
1777 * low-to-high, while swap ordering is high-to-low
1778 */
1779 p->list.prio = -p->prio;
1780 p->avail_list.prio = -p->prio;
1743 p->swap_map = swap_map; 1781 p->swap_map = swap_map;
1744 p->cluster_info = cluster_info; 1782 p->cluster_info = cluster_info;
1745 p->flags |= SWP_WRITEOK; 1783 p->flags |= SWP_WRITEOK;
@@ -1747,27 +1785,20 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
1747 total_swap_pages += p->pages; 1785 total_swap_pages += p->pages;
1748 1786
1749 assert_spin_locked(&swap_lock); 1787 assert_spin_locked(&swap_lock);
1750 BUG_ON(!list_empty(&p->list));
1751 /*
1752 * insert into swap list; the list is in priority order,
1753 * so that get_swap_page() can get a page from the highest
1754 * priority swap_info_struct with available page(s), and
1755 * swapoff can adjust the auto-assigned (i.e. negative) prio
1756 * values for any lower-priority swap_info_structs when
1757 * removing a negative-prio swap_info_struct
1758 */
1759 list_for_each_entry(si, &swap_list_head, list) {
1760 if (p->prio >= si->prio) {
1761 list_add_tail(&p->list, &si->list);
1762 return;
1763 }
1764 }
1765 /* 1788 /*
1766 * this covers two cases: 1789 * both lists are plists, and thus priority ordered.
1767 * 1) p->prio is less than all existing prio 1790 * swap_active_head needs to be priority ordered for swapoff(),
1768 * 2) the swap list is empty 1791 * which on removal of any swap_info_struct with an auto-assigned
1792 * (i.e. negative) priority increments the auto-assigned priority
1793 * of any lower-priority swap_info_structs.
1794 * swap_avail_head needs to be priority ordered for get_swap_page(),
1795 * which allocates swap pages from the highest available priority
1796 * swap_info_struct.
1769 */ 1797 */
1770 list_add_tail(&p->list, &swap_list_head); 1798 plist_add(&p->list, &swap_active_head);
1799 spin_lock(&swap_avail_lock);
1800 plist_add(&p->avail_list, &swap_avail_head);
1801 spin_unlock(&swap_avail_lock);
1771} 1802}
1772 1803
1773static void enable_swap_info(struct swap_info_struct *p, int prio, 1804static void enable_swap_info(struct swap_info_struct *p, int prio,
@@ -1821,7 +1852,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1821 1852
1822 mapping = victim->f_mapping; 1853 mapping = victim->f_mapping;
1823 spin_lock(&swap_lock); 1854 spin_lock(&swap_lock);
1824 list_for_each_entry(p, &swap_list_head, list) { 1855 plist_for_each_entry(p, &swap_active_head, list) {
1825 if (p->flags & SWP_WRITEOK) { 1856 if (p->flags & SWP_WRITEOK) {
1826 if (p->swap_file->f_mapping == mapping) { 1857 if (p->swap_file->f_mapping == mapping) {
1827 found = 1; 1858 found = 1;
@@ -1841,16 +1872,21 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1841 spin_unlock(&swap_lock); 1872 spin_unlock(&swap_lock);
1842 goto out_dput; 1873 goto out_dput;
1843 } 1874 }
1875 spin_lock(&swap_avail_lock);
1876 plist_del(&p->avail_list, &swap_avail_head);
1877 spin_unlock(&swap_avail_lock);
1844 spin_lock(&p->lock); 1878 spin_lock(&p->lock);
1845 if (p->prio < 0) { 1879 if (p->prio < 0) {
1846 struct swap_info_struct *si = p; 1880 struct swap_info_struct *si = p;
1847 1881
1848 list_for_each_entry_continue(si, &swap_list_head, list) { 1882 plist_for_each_entry_continue(si, &swap_active_head, list) {
1849 si->prio++; 1883 si->prio++;
1884 si->list.prio--;
1885 si->avail_list.prio--;
1850 } 1886 }
1851 least_priority++; 1887 least_priority++;
1852 } 1888 }
1853 list_del_init(&p->list); 1889 plist_del(&p->list, &swap_active_head);
1854 atomic_long_sub(p->pages, &nr_swap_pages); 1890 atomic_long_sub(p->pages, &nr_swap_pages);
1855 total_swap_pages -= p->pages; 1891 total_swap_pages -= p->pages;
1856 p->flags &= ~SWP_WRITEOK; 1892 p->flags &= ~SWP_WRITEOK;
@@ -2115,7 +2151,8 @@ static struct swap_info_struct *alloc_swap_info(void)
2115 */ 2151 */
2116 } 2152 }
2117 INIT_LIST_HEAD(&p->first_swap_extent.list); 2153 INIT_LIST_HEAD(&p->first_swap_extent.list);
2118 INIT_LIST_HEAD(&p->list); 2154 plist_node_init(&p->list, 0);
2155 plist_node_init(&p->avail_list, 0);
2119 p->flags = SWP_USED; 2156 p->flags = SWP_USED;
2120 spin_unlock(&swap_lock); 2157 spin_unlock(&swap_lock);
2121 spin_lock_init(&p->lock); 2158 spin_lock_init(&p->lock);