aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/swap.h7
-rw-r--r--include/linux/swapfile.h2
-rw-r--r--mm/frontswap.c13
-rw-r--r--mm/swapfile.c171
4 files changed, 78 insertions, 115 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5a14b928164e..8bb85d6d65f0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -214,8 +214,8 @@ struct percpu_cluster {
214struct swap_info_struct { 214struct swap_info_struct {
215 unsigned long flags; /* SWP_USED etc: see above */ 215 unsigned long flags; /* SWP_USED etc: see above */
216 signed short prio; /* swap priority of this type */ 216 signed short prio; /* swap priority of this type */
217 struct list_head list; /* entry in swap list */
217 signed char type; /* strange name for an index */ 218 signed char type; /* strange name for an index */
218 signed char next; /* next type on the swap list */
219 unsigned int max; /* extent of the swap_map */ 219 unsigned int max; /* extent of the swap_map */
220 unsigned char *swap_map; /* vmalloc'ed array of usage counts */ 220 unsigned char *swap_map; /* vmalloc'ed array of usage counts */
221 struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ 221 struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
@@ -255,11 +255,6 @@ struct swap_info_struct {
255 struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */ 255 struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */
256}; 256};
257 257
258struct swap_list_t {
259 int head; /* head of priority-ordered swapfile list */
260 int next; /* swapfile to be used next */
261};
262
263/* linux/mm/workingset.c */ 258/* linux/mm/workingset.c */
264void *workingset_eviction(struct address_space *mapping, struct page *page); 259void *workingset_eviction(struct address_space *mapping, struct page *page);
265bool workingset_refault(void *shadow); 260bool workingset_refault(void *shadow);
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index e282624e8c10..2eab382d593d 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -6,7 +6,7 @@
6 * want to expose them to the dozens of source files that include swap.h 6 * want to expose them to the dozens of source files that include swap.h
7 */ 7 */
8extern spinlock_t swap_lock; 8extern spinlock_t swap_lock;
9extern struct swap_list_t swap_list; 9extern struct list_head swap_list_head;
10extern struct swap_info_struct *swap_info[]; 10extern struct swap_info_struct *swap_info[];
11extern int try_to_unuse(unsigned int, bool, unsigned long); 11extern int try_to_unuse(unsigned int, bool, unsigned long);
12 12
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 1b24bdcb3197..fae11602e8a9 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -327,15 +327,12 @@ EXPORT_SYMBOL(__frontswap_invalidate_area);
327 327
328static unsigned long __frontswap_curr_pages(void) 328static unsigned long __frontswap_curr_pages(void)
329{ 329{
330 int type;
331 unsigned long totalpages = 0; 330 unsigned long totalpages = 0;
332 struct swap_info_struct *si = NULL; 331 struct swap_info_struct *si = NULL;
333 332
334 assert_spin_locked(&swap_lock); 333 assert_spin_locked(&swap_lock);
335 for (type = swap_list.head; type >= 0; type = si->next) { 334 list_for_each_entry(si, &swap_list_head, list)
336 si = swap_info[type];
337 totalpages += atomic_read(&si->frontswap_pages); 335 totalpages += atomic_read(&si->frontswap_pages);
338 }
339 return totalpages; 336 return totalpages;
340} 337}
341 338
@@ -347,11 +344,9 @@ static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused,
347 int si_frontswap_pages; 344 int si_frontswap_pages;
348 unsigned long total_pages_to_unuse = total; 345 unsigned long total_pages_to_unuse = total;
349 unsigned long pages = 0, pages_to_unuse = 0; 346 unsigned long pages = 0, pages_to_unuse = 0;
350 int type;
351 347
352 assert_spin_locked(&swap_lock); 348 assert_spin_locked(&swap_lock);
353 for (type = swap_list.head; type >= 0; type = si->next) { 349 list_for_each_entry(si, &swap_list_head, list) {
354 si = swap_info[type];
355 si_frontswap_pages = atomic_read(&si->frontswap_pages); 350 si_frontswap_pages = atomic_read(&si->frontswap_pages);
356 if (total_pages_to_unuse < si_frontswap_pages) { 351 if (total_pages_to_unuse < si_frontswap_pages) {
357 pages = pages_to_unuse = total_pages_to_unuse; 352 pages = pages_to_unuse = total_pages_to_unuse;
@@ -366,7 +361,7 @@ static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused,
366 } 361 }
367 vm_unacct_memory(pages); 362 vm_unacct_memory(pages);
368 *unused = pages_to_unuse; 363 *unused = pages_to_unuse;
369 *swapid = type; 364 *swapid = si->type;
370 ret = 0; 365 ret = 0;
371 break; 366 break;
372 } 367 }
@@ -413,7 +408,7 @@ void frontswap_shrink(unsigned long target_pages)
413 /* 408 /*
414 * we don't want to hold swap_lock while doing a very 409 * we don't want to hold swap_lock while doing a very
415 * lengthy try_to_unuse, but swap_list may change 410 * lengthy try_to_unuse, but swap_list may change
416 * so restart scan from swap_list.head each time 411 * so restart scan from swap_list_head each time
417 */ 412 */
418 spin_lock(&swap_lock); 413 spin_lock(&swap_lock);
419 ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type); 414 ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4a7f7e6992b6..6c95a8c63b1a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -51,14 +51,17 @@ atomic_long_t nr_swap_pages;
51/* protected with swap_lock. reading in vm_swap_full() doesn't need lock */ 51/* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
52long total_swap_pages; 52long total_swap_pages;
53static int least_priority; 53static int least_priority;
54static atomic_t highest_priority_index = ATOMIC_INIT(-1);
55 54
56static const char Bad_file[] = "Bad swap file entry "; 55static const char Bad_file[] = "Bad swap file entry ";
57static const char Unused_file[] = "Unused swap file entry "; 56static const char Unused_file[] = "Unused swap file entry ";
58static const char Bad_offset[] = "Bad swap offset entry "; 57static const char Bad_offset[] = "Bad swap offset entry ";
59static const char Unused_offset[] = "Unused swap offset entry "; 58static const char Unused_offset[] = "Unused swap offset entry ";
60 59
61struct swap_list_t swap_list = {-1, -1}; 60/*
61 * all active swap_info_structs
62 * protected with swap_lock, and ordered by priority.
63 */
64LIST_HEAD(swap_list_head);
62 65
63struct swap_info_struct *swap_info[MAX_SWAPFILES]; 66struct swap_info_struct *swap_info[MAX_SWAPFILES];
64 67
@@ -640,66 +643,54 @@ no_page:
640 643
641swp_entry_t get_swap_page(void) 644swp_entry_t get_swap_page(void)
642{ 645{
643 struct swap_info_struct *si; 646 struct swap_info_struct *si, *next;
644 pgoff_t offset; 647 pgoff_t offset;
645 int type, next; 648 struct list_head *tmp;
646 int wrapped = 0;
647 int hp_index;
648 649
649 spin_lock(&swap_lock); 650 spin_lock(&swap_lock);
650 if (atomic_long_read(&nr_swap_pages) <= 0) 651 if (atomic_long_read(&nr_swap_pages) <= 0)
651 goto noswap; 652 goto noswap;
652 atomic_long_dec(&nr_swap_pages); 653 atomic_long_dec(&nr_swap_pages);
653 654
654 for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { 655 list_for_each(tmp, &swap_list_head) {
655 hp_index = atomic_xchg(&highest_priority_index, -1); 656 si = list_entry(tmp, typeof(*si), list);
656 /*
657 * highest_priority_index records current highest priority swap
658 * type which just frees swap entries. If its priority is
659 * higher than that of swap_list.next swap type, we use it. It
660 * isn't protected by swap_lock, so it can be an invalid value
661 * if the corresponding swap type is swapoff. We double check
662 * the flags here. It's even possible the swap type is swapoff
663 * and swapon again and its priority is changed. In such rare
664 * case, low prority swap type might be used, but eventually
665 * high priority swap will be used after several rounds of
666 * swap.
667 */
668 if (hp_index != -1 && hp_index != type &&
669 swap_info[type]->prio < swap_info[hp_index]->prio &&
670 (swap_info[hp_index]->flags & SWP_WRITEOK)) {
671 type = hp_index;
672 swap_list.next = type;
673 }
674
675 si = swap_info[type];
676 next = si->next;
677 if (next < 0 ||
678 (!wrapped && si->prio != swap_info[next]->prio)) {
679 next = swap_list.head;
680 wrapped++;
681 }
682
683 spin_lock(&si->lock); 657 spin_lock(&si->lock);
684 if (!si->highest_bit) { 658 if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
685 spin_unlock(&si->lock);
686 continue;
687 }
688 if (!(si->flags & SWP_WRITEOK)) {
689 spin_unlock(&si->lock); 659 spin_unlock(&si->lock);
690 continue; 660 continue;
691 } 661 }
692 662
693 swap_list.next = next; 663 /*
664 * rotate the current swap_info that we're going to use
665 * to after any other swap_info that have the same prio,
666 * so that all equal-priority swap_info get used equally
667 */
668 next = si;
669 list_for_each_entry_continue(next, &swap_list_head, list) {
670 if (si->prio != next->prio)
671 break;
672 list_rotate_left(&si->list);
673 next = si;
674 }
694 675
695 spin_unlock(&swap_lock); 676 spin_unlock(&swap_lock);
696 /* This is called for allocating swap entry for cache */ 677 /* This is called for allocating swap entry for cache */
697 offset = scan_swap_map(si, SWAP_HAS_CACHE); 678 offset = scan_swap_map(si, SWAP_HAS_CACHE);
698 spin_unlock(&si->lock); 679 spin_unlock(&si->lock);
699 if (offset) 680 if (offset)
700 return swp_entry(type, offset); 681 return swp_entry(si->type, offset);
701 spin_lock(&swap_lock); 682 spin_lock(&swap_lock);
702 next = swap_list.next; 683 /*
684 * if we got here, it's likely that si was almost full before,
685 * and since scan_swap_map() can drop the si->lock, multiple
686 * callers probably all tried to get a page from the same si
687 * and it filled up before we could get one. So we need to
688 * try again. Since we dropped the swap_lock, there may now
689 * be non-full higher priority swap_infos, and this si may have
690 * even been removed from the list (although very unlikely).
691 * Let's start over.
692 */
693 tmp = &swap_list_head;
703 } 694 }
704 695
705 atomic_long_inc(&nr_swap_pages); 696 atomic_long_inc(&nr_swap_pages);
@@ -766,27 +757,6 @@ out:
766 return NULL; 757 return NULL;
767} 758}
768 759
769/*
770 * This swap type frees swap entry, check if it is the highest priority swap
771 * type which just frees swap entry. get_swap_page() uses
772 * highest_priority_index to search highest priority swap type. The
773 * swap_info_struct.lock can't protect us if there are multiple swap types
774 * active, so we use atomic_cmpxchg.
775 */
776static void set_highest_priority_index(int type)
777{
778 int old_hp_index, new_hp_index;
779
780 do {
781 old_hp_index = atomic_read(&highest_priority_index);
782 if (old_hp_index != -1 &&
783 swap_info[old_hp_index]->prio >= swap_info[type]->prio)
784 break;
785 new_hp_index = type;
786 } while (atomic_cmpxchg(&highest_priority_index,
787 old_hp_index, new_hp_index) != old_hp_index);
788}
789
790static unsigned char swap_entry_free(struct swap_info_struct *p, 760static unsigned char swap_entry_free(struct swap_info_struct *p,
791 swp_entry_t entry, unsigned char usage) 761 swp_entry_t entry, unsigned char usage)
792{ 762{
@@ -830,7 +800,6 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
830 p->lowest_bit = offset; 800 p->lowest_bit = offset;
831 if (offset > p->highest_bit) 801 if (offset > p->highest_bit)
832 p->highest_bit = offset; 802 p->highest_bit = offset;
833 set_highest_priority_index(p->type);
834 atomic_long_inc(&nr_swap_pages); 803 atomic_long_inc(&nr_swap_pages);
835 p->inuse_pages--; 804 p->inuse_pages--;
836 frontswap_invalidate_page(p->type, offset); 805 frontswap_invalidate_page(p->type, offset);
@@ -1765,7 +1734,7 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
1765 unsigned char *swap_map, 1734 unsigned char *swap_map,
1766 struct swap_cluster_info *cluster_info) 1735 struct swap_cluster_info *cluster_info)
1767{ 1736{
1768 int i, prev; 1737 struct swap_info_struct *si;
1769 1738
1770 if (prio >= 0) 1739 if (prio >= 0)
1771 p->prio = prio; 1740 p->prio = prio;
@@ -1777,18 +1746,28 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
1777 atomic_long_add(p->pages, &nr_swap_pages); 1746 atomic_long_add(p->pages, &nr_swap_pages);
1778 total_swap_pages += p->pages; 1747 total_swap_pages += p->pages;
1779 1748
1780 /* insert swap space into swap_list: */ 1749 assert_spin_locked(&swap_lock);
1781 prev = -1; 1750 BUG_ON(!list_empty(&p->list));
1782 for (i = swap_list.head; i >= 0; i = swap_info[i]->next) { 1751 /*
1783 if (p->prio >= swap_info[i]->prio) 1752 * insert into swap list; the list is in priority order,
1784 break; 1753 * so that get_swap_page() can get a page from the highest
1785 prev = i; 1754 * priority swap_info_struct with available page(s), and
1755 * swapoff can adjust the auto-assigned (i.e. negative) prio
1756 * values for any lower-priority swap_info_structs when
1757 * removing a negative-prio swap_info_struct
1758 */
1759 list_for_each_entry(si, &swap_list_head, list) {
1760 if (p->prio >= si->prio) {
1761 list_add_tail(&p->list, &si->list);
1762 return;
1763 }
1786 } 1764 }
1787 p->next = i; 1765 /*
1788 if (prev < 0) 1766 * this covers two cases:
1789 swap_list.head = swap_list.next = p->type; 1767 * 1) p->prio is less than all existing prio
1790 else 1768 * 2) the swap list is empty
1791 swap_info[prev]->next = p->type; 1769 */
1770 list_add_tail(&p->list, &swap_list_head);
1792} 1771}
1793 1772
1794static void enable_swap_info(struct swap_info_struct *p, int prio, 1773static void enable_swap_info(struct swap_info_struct *p, int prio,
@@ -1823,8 +1802,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1823 struct address_space *mapping; 1802 struct address_space *mapping;
1824 struct inode *inode; 1803 struct inode *inode;
1825 struct filename *pathname; 1804 struct filename *pathname;
1826 int i, type, prev; 1805 int err, found = 0;
1827 int err;
1828 unsigned int old_block_size; 1806 unsigned int old_block_size;
1829 1807
1830 if (!capable(CAP_SYS_ADMIN)) 1808 if (!capable(CAP_SYS_ADMIN))
@@ -1842,17 +1820,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1842 goto out; 1820 goto out;
1843 1821
1844 mapping = victim->f_mapping; 1822 mapping = victim->f_mapping;
1845 prev = -1;
1846 spin_lock(&swap_lock); 1823 spin_lock(&swap_lock);
1847 for (type = swap_list.head; type >= 0; type = swap_info[type]->next) { 1824 list_for_each_entry(p, &swap_list_head, list) {
1848 p = swap_info[type];
1849 if (p->flags & SWP_WRITEOK) { 1825 if (p->flags & SWP_WRITEOK) {
1850 if (p->swap_file->f_mapping == mapping) 1826 if (p->swap_file->f_mapping == mapping) {
1827 found = 1;
1851 break; 1828 break;
1829 }
1852 } 1830 }
1853 prev = type;
1854 } 1831 }
1855 if (type < 0) { 1832 if (!found) {
1856 err = -EINVAL; 1833 err = -EINVAL;
1857 spin_unlock(&swap_lock); 1834 spin_unlock(&swap_lock);
1858 goto out_dput; 1835 goto out_dput;
@@ -1864,20 +1841,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1864 spin_unlock(&swap_lock); 1841 spin_unlock(&swap_lock);
1865 goto out_dput; 1842 goto out_dput;
1866 } 1843 }
1867 if (prev < 0)
1868 swap_list.head = p->next;
1869 else
1870 swap_info[prev]->next = p->next;
1871 if (type == swap_list.next) {
1872 /* just pick something that's safe... */
1873 swap_list.next = swap_list.head;
1874 }
1875 spin_lock(&p->lock); 1844 spin_lock(&p->lock);
1876 if (p->prio < 0) { 1845 if (p->prio < 0) {
1877 for (i = p->next; i >= 0; i = swap_info[i]->next) 1846 struct swap_info_struct *si = p;
1878 swap_info[i]->prio = p->prio--; 1847
1848 list_for_each_entry_continue(si, &swap_list_head, list) {
1849 si->prio++;
1850 }
1879 least_priority++; 1851 least_priority++;
1880 } 1852 }
1853 list_del_init(&p->list);
1881 atomic_long_sub(p->pages, &nr_swap_pages); 1854 atomic_long_sub(p->pages, &nr_swap_pages);
1882 total_swap_pages -= p->pages; 1855 total_swap_pages -= p->pages;
1883 p->flags &= ~SWP_WRITEOK; 1856 p->flags &= ~SWP_WRITEOK;
@@ -1885,7 +1858,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1885 spin_unlock(&swap_lock); 1858 spin_unlock(&swap_lock);
1886 1859
1887 set_current_oom_origin(); 1860 set_current_oom_origin();
1888 err = try_to_unuse(type, false, 0); /* force all pages to be unused */ 1861 err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
1889 clear_current_oom_origin(); 1862 clear_current_oom_origin();
1890 1863
1891 if (err) { 1864 if (err) {
@@ -1926,7 +1899,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1926 frontswap_map = frontswap_map_get(p); 1899 frontswap_map = frontswap_map_get(p);
1927 spin_unlock(&p->lock); 1900 spin_unlock(&p->lock);
1928 spin_unlock(&swap_lock); 1901 spin_unlock(&swap_lock);
1929 frontswap_invalidate_area(type); 1902 frontswap_invalidate_area(p->type);
1930 frontswap_map_set(p, NULL); 1903 frontswap_map_set(p, NULL);
1931 mutex_unlock(&swapon_mutex); 1904 mutex_unlock(&swapon_mutex);
1932 free_percpu(p->percpu_cluster); 1905 free_percpu(p->percpu_cluster);
@@ -1935,7 +1908,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1935 vfree(cluster_info); 1908 vfree(cluster_info);
1936 vfree(frontswap_map); 1909 vfree(frontswap_map);
1937 /* Destroy swap account information */ 1910 /* Destroy swap account information */
1938 swap_cgroup_swapoff(type); 1911 swap_cgroup_swapoff(p->type);
1939 1912
1940 inode = mapping->host; 1913 inode = mapping->host;
1941 if (S_ISBLK(inode->i_mode)) { 1914 if (S_ISBLK(inode->i_mode)) {
@@ -2142,8 +2115,8 @@ static struct swap_info_struct *alloc_swap_info(void)
2142 */ 2115 */
2143 } 2116 }
2144 INIT_LIST_HEAD(&p->first_swap_extent.list); 2117 INIT_LIST_HEAD(&p->first_swap_extent.list);
2118 INIT_LIST_HEAD(&p->list);
2145 p->flags = SWP_USED; 2119 p->flags = SWP_USED;
2146 p->next = -1;
2147 spin_unlock(&swap_lock); 2120 spin_unlock(&swap_lock);
2148 spin_lock_init(&p->lock); 2121 spin_lock_init(&p->lock);
2149 2122