aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/swap.h3
-rw-r--r--include/linux/swapfile.h2
-rw-r--r--mm/frontswap.c6
-rw-r--r--mm/swapfile.c145
4 files changed, 97 insertions, 59 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8bb85d6d65f0..9155bcdcce12 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -214,7 +214,8 @@ struct percpu_cluster {
214struct swap_info_struct { 214struct swap_info_struct {
215 unsigned long flags; /* SWP_USED etc: see above */ 215 unsigned long flags; /* SWP_USED etc: see above */
216 signed short prio; /* swap priority of this type */ 216 signed short prio; /* swap priority of this type */
217 struct list_head list; /* entry in swap list */ 217 struct plist_node list; /* entry in swap_active_head */
218 struct plist_node avail_list; /* entry in swap_avail_head */
218 signed char type; /* strange name for an index */ 219 signed char type; /* strange name for an index */
219 unsigned int max; /* extent of the swap_map */ 220 unsigned int max; /* extent of the swap_map */
220 unsigned char *swap_map; /* vmalloc'ed array of usage counts */ 221 unsigned char *swap_map; /* vmalloc'ed array of usage counts */
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index 2eab382d593d..388293a91e8c 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -6,7 +6,7 @@
6 * want to expose them to the dozens of source files that include swap.h 6 * want to expose them to the dozens of source files that include swap.h
7 */ 7 */
8extern spinlock_t swap_lock; 8extern spinlock_t swap_lock;
9extern struct list_head swap_list_head; 9extern struct plist_head swap_active_head;
10extern struct swap_info_struct *swap_info[]; 10extern struct swap_info_struct *swap_info[];
11extern int try_to_unuse(unsigned int, bool, unsigned long); 11extern int try_to_unuse(unsigned int, bool, unsigned long);
12 12
diff --git a/mm/frontswap.c b/mm/frontswap.c
index fae11602e8a9..c30eec536f03 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -331,7 +331,7 @@ static unsigned long __frontswap_curr_pages(void)
331 struct swap_info_struct *si = NULL; 331 struct swap_info_struct *si = NULL;
332 332
333 assert_spin_locked(&swap_lock); 333 assert_spin_locked(&swap_lock);
334 list_for_each_entry(si, &swap_list_head, list) 334 plist_for_each_entry(si, &swap_active_head, list)
335 totalpages += atomic_read(&si->frontswap_pages); 335 totalpages += atomic_read(&si->frontswap_pages);
336 return totalpages; 336 return totalpages;
337} 337}
@@ -346,7 +346,7 @@ static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused,
346 unsigned long pages = 0, pages_to_unuse = 0; 346 unsigned long pages = 0, pages_to_unuse = 0;
347 347
348 assert_spin_locked(&swap_lock); 348 assert_spin_locked(&swap_lock);
349 list_for_each_entry(si, &swap_list_head, list) { 349 plist_for_each_entry(si, &swap_active_head, list) {
350 si_frontswap_pages = atomic_read(&si->frontswap_pages); 350 si_frontswap_pages = atomic_read(&si->frontswap_pages);
351 if (total_pages_to_unuse < si_frontswap_pages) { 351 if (total_pages_to_unuse < si_frontswap_pages) {
352 pages = pages_to_unuse = total_pages_to_unuse; 352 pages = pages_to_unuse = total_pages_to_unuse;
@@ -408,7 +408,7 @@ void frontswap_shrink(unsigned long target_pages)
408 /* 408 /*
409 * we don't want to hold swap_lock while doing a very 409 * we don't want to hold swap_lock while doing a very
410 * lengthy try_to_unuse, but swap_list may change 410 * lengthy try_to_unuse, but swap_list may change
411 * so restart scan from swap_list_head each time 411 * so restart scan from swap_active_head each time
412 */ 412 */
413 spin_lock(&swap_lock); 413 spin_lock(&swap_lock);
414 ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type); 414 ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6c95a8c63b1a..beeeef8a1b2d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -61,7 +61,22 @@ static const char Unused_offset[] = "Unused swap offset entry ";
61 * all active swap_info_structs 61 * all active swap_info_structs
62 * protected with swap_lock, and ordered by priority. 62 * protected with swap_lock, and ordered by priority.
63 */ 63 */
64LIST_HEAD(swap_list_head); 64PLIST_HEAD(swap_active_head);
65
66/*
67 * all available (active, not full) swap_info_structs
68 * protected with swap_avail_lock, ordered by priority.
69 * This is used by get_swap_page() instead of swap_active_head
70 * because swap_active_head includes all swap_info_structs,
71 * but get_swap_page() doesn't need to look at full ones.
72 * This uses its own lock instead of swap_lock because when a
73 * swap_info_struct changes between not-full/full, it needs to
74 * add/remove itself to/from this list, but the swap_info_struct->lock
75 * is held and the locking order requires swap_lock to be taken
76 * before any swap_info_struct->lock.
77 */
78static PLIST_HEAD(swap_avail_head);
79static DEFINE_SPINLOCK(swap_avail_lock);
65 80
66struct swap_info_struct *swap_info[MAX_SWAPFILES]; 81struct swap_info_struct *swap_info[MAX_SWAPFILES];
67 82
@@ -594,6 +609,9 @@ checks:
594 if (si->inuse_pages == si->pages) { 609 if (si->inuse_pages == si->pages) {
595 si->lowest_bit = si->max; 610 si->lowest_bit = si->max;
596 si->highest_bit = 0; 611 si->highest_bit = 0;
612 spin_lock(&swap_avail_lock);
613 plist_del(&si->avail_list, &swap_avail_head);
614 spin_unlock(&swap_avail_lock);
597 } 615 }
598 si->swap_map[offset] = usage; 616 si->swap_map[offset] = usage;
599 inc_cluster_info_page(si, si->cluster_info, offset); 617 inc_cluster_info_page(si, si->cluster_info, offset);
@@ -645,57 +663,63 @@ swp_entry_t get_swap_page(void)
645{ 663{
646 struct swap_info_struct *si, *next; 664 struct swap_info_struct *si, *next;
647 pgoff_t offset; 665 pgoff_t offset;
648 struct list_head *tmp;
649 666
650 spin_lock(&swap_lock);
651 if (atomic_long_read(&nr_swap_pages) <= 0) 667 if (atomic_long_read(&nr_swap_pages) <= 0)
652 goto noswap; 668 goto noswap;
653 atomic_long_dec(&nr_swap_pages); 669 atomic_long_dec(&nr_swap_pages);
654 670
655 list_for_each(tmp, &swap_list_head) { 671 spin_lock(&swap_avail_lock);
656 si = list_entry(tmp, typeof(*si), list); 672
673start_over:
674 plist_for_each_entry_safe(si, next, &swap_avail_head, avail_list) {
675 /* requeue si to after same-priority siblings */
676 plist_requeue(&si->avail_list, &swap_avail_head);
677 spin_unlock(&swap_avail_lock);
657 spin_lock(&si->lock); 678 spin_lock(&si->lock);
658 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) { 679 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
680 spin_lock(&swap_avail_lock);
681 if (plist_node_empty(&si->avail_list)) {
682 spin_unlock(&si->lock);
683 goto nextsi;
684 }
685 WARN(!si->highest_bit,
686 "swap_info %d in list but !highest_bit\n",
687 si->type);
688 WARN(!(si->flags & SWP_WRITEOK),
689 "swap_info %d in list but !SWP_WRITEOK\n",
690 si->type);
691 plist_del(&si->avail_list, &swap_avail_head);
659 spin_unlock(&si->lock); 692 spin_unlock(&si->lock);
660 continue; 693 goto nextsi;
661 } 694 }
662 695
663 /*
664 * rotate the current swap_info that we're going to use
665 * to after any other swap_info that have the same prio,
666 * so that all equal-priority swap_info get used equally
667 */
668 next = si;
669 list_for_each_entry_continue(next, &swap_list_head, list) {
670 if (si->prio != next->prio)
671 break;
672 list_rotate_left(&si->list);
673 next = si;
674 }
675
676 spin_unlock(&swap_lock);
677 /* This is called for allocating swap entry for cache */ 696 /* This is called for allocating swap entry for cache */
678 offset = scan_swap_map(si, SWAP_HAS_CACHE); 697 offset = scan_swap_map(si, SWAP_HAS_CACHE);
679 spin_unlock(&si->lock); 698 spin_unlock(&si->lock);
680 if (offset) 699 if (offset)
681 return swp_entry(si->type, offset); 700 return swp_entry(si->type, offset);
682 spin_lock(&swap_lock); 701 pr_debug("scan_swap_map of si %d failed to find offset\n",
702 si->type);
703 spin_lock(&swap_avail_lock);
704nextsi:
683 /* 705 /*
684 * if we got here, it's likely that si was almost full before, 706 * if we got here, it's likely that si was almost full before,
685 * and since scan_swap_map() can drop the si->lock, multiple 707 * and since scan_swap_map() can drop the si->lock, multiple
686 * callers probably all tried to get a page from the same si 708 * callers probably all tried to get a page from the same si
687 * and it filled up before we could get one. So we need to 709 * and it filled up before we could get one; or, the si filled
688 * try again. Since we dropped the swap_lock, there may now 710 * up between us dropping swap_avail_lock and taking si->lock.
689 * be non-full higher priority swap_infos, and this si may have 711 * Since we dropped the swap_avail_lock, the swap_avail_head
690 * even been removed from the list (although very unlikely). 712 * list may have been modified; so if next is still in the
691 * Let's start over. 713 * swap_avail_head list then try it, otherwise start over.
692 */ 714 */
693 tmp = &swap_list_head; 715 if (plist_node_empty(&next->avail_list))
716 goto start_over;
694 } 717 }
695 718
719 spin_unlock(&swap_avail_lock);
720
696 atomic_long_inc(&nr_swap_pages); 721 atomic_long_inc(&nr_swap_pages);
697noswap: 722noswap:
698 spin_unlock(&swap_lock);
699 return (swp_entry_t) {0}; 723 return (swp_entry_t) {0};
700} 724}
701 725
@@ -798,8 +822,18 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
798 dec_cluster_info_page(p, p->cluster_info, offset); 822 dec_cluster_info_page(p, p->cluster_info, offset);
799 if (offset < p->lowest_bit) 823 if (offset < p->lowest_bit)
800 p->lowest_bit = offset; 824 p->lowest_bit = offset;
801 if (offset > p->highest_bit) 825 if (offset > p->highest_bit) {
826 bool was_full = !p->highest_bit;
802 p->highest_bit = offset; 827 p->highest_bit = offset;
828 if (was_full && (p->flags & SWP_WRITEOK)) {
829 spin_lock(&swap_avail_lock);
830 WARN_ON(!plist_node_empty(&p->avail_list));
831 if (plist_node_empty(&p->avail_list))
832 plist_add(&p->avail_list,
833 &swap_avail_head);
834 spin_unlock(&swap_avail_lock);
835 }
836 }
803 atomic_long_inc(&nr_swap_pages); 837 atomic_long_inc(&nr_swap_pages);
804 p->inuse_pages--; 838 p->inuse_pages--;
805 frontswap_invalidate_page(p->type, offset); 839 frontswap_invalidate_page(p->type, offset);
@@ -1734,12 +1768,16 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
1734 unsigned char *swap_map, 1768 unsigned char *swap_map,
1735 struct swap_cluster_info *cluster_info) 1769 struct swap_cluster_info *cluster_info)
1736{ 1770{
1737 struct swap_info_struct *si;
1738
1739 if (prio >= 0) 1771 if (prio >= 0)
1740 p->prio = prio; 1772 p->prio = prio;
1741 else 1773 else
1742 p->prio = --least_priority; 1774 p->prio = --least_priority;
1775 /*
1776 * the plist prio is negated because plist ordering is
1777 * low-to-high, while swap ordering is high-to-low
1778 */
1779 p->list.prio = -p->prio;
1780 p->avail_list.prio = -p->prio;
1743 p->swap_map = swap_map; 1781 p->swap_map = swap_map;
1744 p->cluster_info = cluster_info; 1782 p->cluster_info = cluster_info;
1745 p->flags |= SWP_WRITEOK; 1783 p->flags |= SWP_WRITEOK;
@@ -1747,27 +1785,20 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
1747 total_swap_pages += p->pages; 1785 total_swap_pages += p->pages;
1748 1786
1749 assert_spin_locked(&swap_lock); 1787 assert_spin_locked(&swap_lock);
1750 BUG_ON(!list_empty(&p->list));
1751 /*
1752 * insert into swap list; the list is in priority order,
1753 * so that get_swap_page() can get a page from the highest
1754 * priority swap_info_struct with available page(s), and
1755 * swapoff can adjust the auto-assigned (i.e. negative) prio
1756 * values for any lower-priority swap_info_structs when
1757 * removing a negative-prio swap_info_struct
1758 */
1759 list_for_each_entry(si, &swap_list_head, list) {
1760 if (p->prio >= si->prio) {
1761 list_add_tail(&p->list, &si->list);
1762 return;
1763 }
1764 }
1765 /* 1788 /*
1766 * this covers two cases: 1789 * both lists are plists, and thus priority ordered.
1767 * 1) p->prio is less than all existing prio 1790 * swap_active_head needs to be priority ordered for swapoff(),
1768 * 2) the swap list is empty 1791 * which on removal of any swap_info_struct with an auto-assigned
1792 * (i.e. negative) priority increments the auto-assigned priority
1793 * of any lower-priority swap_info_structs.
1794 * swap_avail_head needs to be priority ordered for get_swap_page(),
1795 * which allocates swap pages from the highest available priority
1796 * swap_info_struct.
1769 */ 1797 */
1770 list_add_tail(&p->list, &swap_list_head); 1798 plist_add(&p->list, &swap_active_head);
1799 spin_lock(&swap_avail_lock);
1800 plist_add(&p->avail_list, &swap_avail_head);
1801 spin_unlock(&swap_avail_lock);
1771} 1802}
1772 1803
1773static void enable_swap_info(struct swap_info_struct *p, int prio, 1804static void enable_swap_info(struct swap_info_struct *p, int prio,
@@ -1821,7 +1852,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1821 1852
1822 mapping = victim->f_mapping; 1853 mapping = victim->f_mapping;
1823 spin_lock(&swap_lock); 1854 spin_lock(&swap_lock);
1824 list_for_each_entry(p, &swap_list_head, list) { 1855 plist_for_each_entry(p, &swap_active_head, list) {
1825 if (p->flags & SWP_WRITEOK) { 1856 if (p->flags & SWP_WRITEOK) {
1826 if (p->swap_file->f_mapping == mapping) { 1857 if (p->swap_file->f_mapping == mapping) {
1827 found = 1; 1858 found = 1;
@@ -1841,16 +1872,21 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1841 spin_unlock(&swap_lock); 1872 spin_unlock(&swap_lock);
1842 goto out_dput; 1873 goto out_dput;
1843 } 1874 }
1875 spin_lock(&swap_avail_lock);
1876 plist_del(&p->avail_list, &swap_avail_head);
1877 spin_unlock(&swap_avail_lock);
1844 spin_lock(&p->lock); 1878 spin_lock(&p->lock);
1845 if (p->prio < 0) { 1879 if (p->prio < 0) {
1846 struct swap_info_struct *si = p; 1880 struct swap_info_struct *si = p;
1847 1881
1848 list_for_each_entry_continue(si, &swap_list_head, list) { 1882 plist_for_each_entry_continue(si, &swap_active_head, list) {
1849 si->prio++; 1883 si->prio++;
1884 si->list.prio--;
1885 si->avail_list.prio--;
1850 } 1886 }
1851 least_priority++; 1887 least_priority++;
1852 } 1888 }
1853 list_del_init(&p->list); 1889 plist_del(&p->list, &swap_active_head);
1854 atomic_long_sub(p->pages, &nr_swap_pages); 1890 atomic_long_sub(p->pages, &nr_swap_pages);
1855 total_swap_pages -= p->pages; 1891 total_swap_pages -= p->pages;
1856 p->flags &= ~SWP_WRITEOK; 1892 p->flags &= ~SWP_WRITEOK;
@@ -2115,7 +2151,8 @@ static struct swap_info_struct *alloc_swap_info(void)
2115 */ 2151 */
2116 } 2152 }
2117 INIT_LIST_HEAD(&p->first_swap_extent.list); 2153 INIT_LIST_HEAD(&p->first_swap_extent.list);
2118 INIT_LIST_HEAD(&p->list); 2154 plist_node_init(&p->list, 0);
2155 plist_node_init(&p->avail_list, 0);
2119 p->flags = SWP_USED; 2156 p->flags = SWP_USED;
2120 spin_unlock(&swap_lock); 2157 spin_unlock(&swap_lock);
2121 spin_lock_init(&p->lock); 2158 spin_lock_init(&p->lock);