aboutsummaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c66
1 files changed, 45 insertions, 21 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 457b10baef59..71373d03fcee 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,6 +31,8 @@
31#include <linux/memcontrol.h> 31#include <linux/memcontrol.h>
32#include <linux/poll.h> 32#include <linux/poll.h>
33#include <linux/oom.h> 33#include <linux/oom.h>
34#include <linux/frontswap.h>
35#include <linux/swapfile.h>
34 36
35#include <asm/pgtable.h> 37#include <asm/pgtable.h>
36#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
@@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
42static void free_swap_count_continuations(struct swap_info_struct *); 44static void free_swap_count_continuations(struct swap_info_struct *);
43static sector_t map_swap_entry(swp_entry_t, struct block_device**); 45static sector_t map_swap_entry(swp_entry_t, struct block_device**);
44 46
45static DEFINE_SPINLOCK(swap_lock); 47DEFINE_SPINLOCK(swap_lock);
46static unsigned int nr_swapfiles; 48static unsigned int nr_swapfiles;
47long nr_swap_pages; 49long nr_swap_pages;
48long total_swap_pages; 50long total_swap_pages;
@@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry ";
53static const char Bad_offset[] = "Bad swap offset entry "; 55static const char Bad_offset[] = "Bad swap offset entry ";
54static const char Unused_offset[] = "Unused swap offset entry "; 56static const char Unused_offset[] = "Unused swap offset entry ";
55 57
56static struct swap_list_t swap_list = {-1, -1}; 58struct swap_list_t swap_list = {-1, -1};
57 59
58static struct swap_info_struct *swap_info[MAX_SWAPFILES]; 60struct swap_info_struct *swap_info[MAX_SWAPFILES];
59 61
60static DEFINE_MUTEX(swapon_mutex); 62static DEFINE_MUTEX(swapon_mutex);
61 63
@@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
556 swap_list.next = p->type; 558 swap_list.next = p->type;
557 nr_swap_pages++; 559 nr_swap_pages++;
558 p->inuse_pages--; 560 p->inuse_pages--;
561 frontswap_invalidate_page(p->type, offset);
559 if ((p->flags & SWP_BLKDEV) && 562 if ((p->flags & SWP_BLKDEV) &&
560 disk->fops->swap_slot_free_notify) 563 disk->fops->swap_slot_free_notify)
561 disk->fops->swap_slot_free_notify(p->bdev, offset); 564 disk->fops->swap_slot_free_notify(p->bdev, offset);
@@ -985,11 +988,12 @@ static int unuse_mm(struct mm_struct *mm,
985} 988}
986 989
987/* 990/*
988 * Scan swap_map from current position to next entry still in use. 991 * Scan swap_map (or frontswap_map if frontswap parameter is true)
992 * from current position to next entry still in use.
989 * Recycle to start on reaching the end, returning 0 when empty. 993 * Recycle to start on reaching the end, returning 0 when empty.
990 */ 994 */
991static unsigned int find_next_to_unuse(struct swap_info_struct *si, 995static unsigned int find_next_to_unuse(struct swap_info_struct *si,
992 unsigned int prev) 996 unsigned int prev, bool frontswap)
993{ 997{
994 unsigned int max = si->max; 998 unsigned int max = si->max;
995 unsigned int i = prev; 999 unsigned int i = prev;
@@ -1015,6 +1019,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1015 prev = 0; 1019 prev = 0;
1016 i = 1; 1020 i = 1;
1017 } 1021 }
1022 if (frontswap) {
1023 if (frontswap_test(si, i))
1024 break;
1025 else
1026 continue;
1027 }
1018 count = si->swap_map[i]; 1028 count = si->swap_map[i];
1019 if (count && swap_count(count) != SWAP_MAP_BAD) 1029 if (count && swap_count(count) != SWAP_MAP_BAD)
1020 break; 1030 break;
@@ -1026,8 +1036,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1026 * We completely avoid races by reading each swap page in advance, 1036 * We completely avoid races by reading each swap page in advance,
1027 * and then search for the process using it. All the necessary 1037 * and then search for the process using it. All the necessary
1028 * page table adjustments can then be made atomically. 1038 * page table adjustments can then be made atomically.
1039 *
1040 * if the boolean frontswap is true, only unuse pages_to_unuse pages;
1041 * pages_to_unuse==0 means all pages; ignored if frontswap is false
1029 */ 1042 */
1030static int try_to_unuse(unsigned int type) 1043int try_to_unuse(unsigned int type, bool frontswap,
1044 unsigned long pages_to_unuse)
1031{ 1045{
1032 struct swap_info_struct *si = swap_info[type]; 1046 struct swap_info_struct *si = swap_info[type];
1033 struct mm_struct *start_mm; 1047 struct mm_struct *start_mm;
@@ -1060,7 +1074,7 @@ static int try_to_unuse(unsigned int type)
1060 * one pass through swap_map is enough, but not necessarily: 1074 * one pass through swap_map is enough, but not necessarily:
1061 * there are races when an instance of an entry might be missed. 1075 * there are races when an instance of an entry might be missed.
1062 */ 1076 */
1063 while ((i = find_next_to_unuse(si, i)) != 0) { 1077 while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
1064 if (signal_pending(current)) { 1078 if (signal_pending(current)) {
1065 retval = -EINTR; 1079 retval = -EINTR;
1066 break; 1080 break;
@@ -1227,6 +1241,10 @@ static int try_to_unuse(unsigned int type)
1227 * interactive performance. 1241 * interactive performance.
1228 */ 1242 */
1229 cond_resched(); 1243 cond_resched();
1244 if (frontswap && pages_to_unuse > 0) {
1245 if (!--pages_to_unuse)
1246 break;
1247 }
1230 } 1248 }
1231 1249
1232 mmput(start_mm); 1250 mmput(start_mm);
@@ -1486,7 +1504,8 @@ bad_bmap:
1486} 1504}
1487 1505
1488static void enable_swap_info(struct swap_info_struct *p, int prio, 1506static void enable_swap_info(struct swap_info_struct *p, int prio,
1489 unsigned char *swap_map) 1507 unsigned char *swap_map,
1508 unsigned long *frontswap_map)
1490{ 1509{
1491 int i, prev; 1510 int i, prev;
1492 1511
@@ -1496,6 +1515,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1496 else 1515 else
1497 p->prio = --least_priority; 1516 p->prio = --least_priority;
1498 p->swap_map = swap_map; 1517 p->swap_map = swap_map;
1518 frontswap_map_set(p, frontswap_map);
1499 p->flags |= SWP_WRITEOK; 1519 p->flags |= SWP_WRITEOK;
1500 nr_swap_pages += p->pages; 1520 nr_swap_pages += p->pages;
1501 total_swap_pages += p->pages; 1521 total_swap_pages += p->pages;
@@ -1512,6 +1532,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1512 swap_list.head = swap_list.next = p->type; 1532 swap_list.head = swap_list.next = p->type;
1513 else 1533 else
1514 swap_info[prev]->next = p->type; 1534 swap_info[prev]->next = p->type;
1535 frontswap_init(p->type);
1515 spin_unlock(&swap_lock); 1536 spin_unlock(&swap_lock);
1516} 1537}
1517 1538
@@ -1585,7 +1606,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1585 spin_unlock(&swap_lock); 1606 spin_unlock(&swap_lock);
1586 1607
1587 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); 1608 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1588 err = try_to_unuse(type); 1609 err = try_to_unuse(type, false, 0); /* force all pages to be unused */
1589 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); 1610 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj);
1590 1611
1591 if (err) { 1612 if (err) {
@@ -1596,7 +1617,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1596 * sys_swapoff for this swap_info_struct at this point. 1617 * sys_swapoff for this swap_info_struct at this point.
1597 */ 1618 */
1598 /* re-insert swap space back into swap_list */ 1619 /* re-insert swap space back into swap_list */
1599 enable_swap_info(p, p->prio, p->swap_map); 1620 enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p));
1600 goto out_dput; 1621 goto out_dput;
1601 } 1622 }
1602 1623
@@ -1622,9 +1643,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1622 swap_map = p->swap_map; 1643 swap_map = p->swap_map;
1623 p->swap_map = NULL; 1644 p->swap_map = NULL;
1624 p->flags = 0; 1645 p->flags = 0;
1646 frontswap_invalidate_area(type);
1625 spin_unlock(&swap_lock); 1647 spin_unlock(&swap_lock);
1626 mutex_unlock(&swapon_mutex); 1648 mutex_unlock(&swapon_mutex);
1627 vfree(swap_map); 1649 vfree(swap_map);
1650 vfree(frontswap_map_get(p));
1628 /* Destroy swap account informatin */ 1651 /* Destroy swap account informatin */
1629 swap_cgroup_swapoff(type); 1652 swap_cgroup_swapoff(type);
1630 1653
@@ -1893,24 +1916,20 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
1893 1916
1894 /* 1917 /*
1895 * Find out how many pages are allowed for a single swap 1918 * Find out how many pages are allowed for a single swap
1896 * device. There are three limiting factors: 1) the number 1919 * device. There are two limiting factors: 1) the number
1897 * of bits for the swap offset in the swp_entry_t type, and 1920 * of bits for the swap offset in the swp_entry_t type, and
1898 * 2) the number of bits in the swap pte as defined by the 1921 * 2) the number of bits in the swap pte as defined by the
1899 * the different architectures, and 3) the number of free bits 1922 * different architectures. In order to find the
1900 * in an exceptional radix_tree entry. In order to find the
1901 * largest possible bit mask, a swap entry with swap type 0 1923 * largest possible bit mask, a swap entry with swap type 0
1902 * and swap offset ~0UL is created, encoded to a swap pte, 1924 * and swap offset ~0UL is created, encoded to a swap pte,
1903 * decoded to a swp_entry_t again, and finally the swap 1925 * decoded to a swp_entry_t again, and finally the swap
1904 * offset is extracted. This will mask all the bits from 1926 * offset is extracted. This will mask all the bits from
1905 * the initial ~0UL mask that can't be encoded in either 1927 * the initial ~0UL mask that can't be encoded in either
1906 * the swp_entry_t or the architecture definition of a 1928 * the swp_entry_t or the architecture definition of a
1907 * swap pte. Then the same is done for a radix_tree entry. 1929 * swap pte.
1908 */ 1930 */
1909 maxpages = swp_offset(pte_to_swp_entry( 1931 maxpages = swp_offset(pte_to_swp_entry(
1910 swp_entry_to_pte(swp_entry(0, ~0UL)))); 1932 swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
1911 maxpages = swp_offset(radix_to_swp_entry(
1912 swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
1913
1914 if (maxpages > swap_header->info.last_page) { 1933 if (maxpages > swap_header->info.last_page) {
1915 maxpages = swap_header->info.last_page + 1; 1934 maxpages = swap_header->info.last_page + 1;
1916 /* p->max is an unsigned int: don't overflow it */ 1935 /* p->max is an unsigned int: don't overflow it */
@@ -1988,6 +2007,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1988 sector_t span; 2007 sector_t span;
1989 unsigned long maxpages; 2008 unsigned long maxpages;
1990 unsigned char *swap_map = NULL; 2009 unsigned char *swap_map = NULL;
2010 unsigned long *frontswap_map = NULL;
1991 struct page *page = NULL; 2011 struct page *page = NULL;
1992 struct inode *inode = NULL; 2012 struct inode *inode = NULL;
1993 2013
@@ -2071,6 +2091,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2071 error = nr_extents; 2091 error = nr_extents;
2072 goto bad_swap; 2092 goto bad_swap;
2073 } 2093 }
2094 /* frontswap enabled? set up bit-per-page map for frontswap */
2095 if (frontswap_enabled)
2096 frontswap_map = vzalloc(maxpages / sizeof(long));
2074 2097
2075 if (p->bdev) { 2098 if (p->bdev) {
2076 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { 2099 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
@@ -2086,14 +2109,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2086 if (swap_flags & SWAP_FLAG_PREFER) 2109 if (swap_flags & SWAP_FLAG_PREFER)
2087 prio = 2110 prio =
2088 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; 2111 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
2089 enable_swap_info(p, prio, swap_map); 2112 enable_swap_info(p, prio, swap_map, frontswap_map);
2090 2113
2091 printk(KERN_INFO "Adding %uk swap on %s. " 2114 printk(KERN_INFO "Adding %uk swap on %s. "
2092 "Priority:%d extents:%d across:%lluk %s%s\n", 2115 "Priority:%d extents:%d across:%lluk %s%s%s\n",
2093 p->pages<<(PAGE_SHIFT-10), name, p->prio, 2116 p->pages<<(PAGE_SHIFT-10), name, p->prio,
2094 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), 2117 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
2095 (p->flags & SWP_SOLIDSTATE) ? "SS" : "", 2118 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
2096 (p->flags & SWP_DISCARDABLE) ? "D" : ""); 2119 (p->flags & SWP_DISCARDABLE) ? "D" : "",
2120 (frontswap_map) ? "FS" : "");
2097 2121
2098 mutex_unlock(&swapon_mutex); 2122 mutex_unlock(&swapon_mutex);
2099 atomic_inc(&proc_poll_event); 2123 atomic_inc(&proc_poll_event);