diff options
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 66 |
1 files changed, 45 insertions, 21 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index 457b10baef59..71373d03fcee 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -31,6 +31,8 @@ | |||
31 | #include <linux/memcontrol.h> | 31 | #include <linux/memcontrol.h> |
32 | #include <linux/poll.h> | 32 | #include <linux/poll.h> |
33 | #include <linux/oom.h> | 33 | #include <linux/oom.h> |
34 | #include <linux/frontswap.h> | ||
35 | #include <linux/swapfile.h> | ||
34 | 36 | ||
35 | #include <asm/pgtable.h> | 37 | #include <asm/pgtable.h> |
36 | #include <asm/tlbflush.h> | 38 | #include <asm/tlbflush.h> |
@@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t, | |||
42 | static void free_swap_count_continuations(struct swap_info_struct *); | 44 | static void free_swap_count_continuations(struct swap_info_struct *); |
43 | static sector_t map_swap_entry(swp_entry_t, struct block_device**); | 45 | static sector_t map_swap_entry(swp_entry_t, struct block_device**); |
44 | 46 | ||
45 | static DEFINE_SPINLOCK(swap_lock); | 47 | DEFINE_SPINLOCK(swap_lock); |
46 | static unsigned int nr_swapfiles; | 48 | static unsigned int nr_swapfiles; |
47 | long nr_swap_pages; | 49 | long nr_swap_pages; |
48 | long total_swap_pages; | 50 | long total_swap_pages; |
@@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry "; | |||
53 | static const char Bad_offset[] = "Bad swap offset entry "; | 55 | static const char Bad_offset[] = "Bad swap offset entry "; |
54 | static const char Unused_offset[] = "Unused swap offset entry "; | 56 | static const char Unused_offset[] = "Unused swap offset entry "; |
55 | 57 | ||
56 | static struct swap_list_t swap_list = {-1, -1}; | 58 | struct swap_list_t swap_list = {-1, -1}; |
57 | 59 | ||
58 | static struct swap_info_struct *swap_info[MAX_SWAPFILES]; | 60 | struct swap_info_struct *swap_info[MAX_SWAPFILES]; |
59 | 61 | ||
60 | static DEFINE_MUTEX(swapon_mutex); | 62 | static DEFINE_MUTEX(swapon_mutex); |
61 | 63 | ||
@@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, | |||
556 | swap_list.next = p->type; | 558 | swap_list.next = p->type; |
557 | nr_swap_pages++; | 559 | nr_swap_pages++; |
558 | p->inuse_pages--; | 560 | p->inuse_pages--; |
561 | frontswap_invalidate_page(p->type, offset); | ||
559 | if ((p->flags & SWP_BLKDEV) && | 562 | if ((p->flags & SWP_BLKDEV) && |
560 | disk->fops->swap_slot_free_notify) | 563 | disk->fops->swap_slot_free_notify) |
561 | disk->fops->swap_slot_free_notify(p->bdev, offset); | 564 | disk->fops->swap_slot_free_notify(p->bdev, offset); |
@@ -985,11 +988,12 @@ static int unuse_mm(struct mm_struct *mm, | |||
985 | } | 988 | } |
986 | 989 | ||
987 | /* | 990 | /* |
988 | * Scan swap_map from current position to next entry still in use. | 991 | * Scan swap_map (or frontswap_map if frontswap parameter is true) |
992 | * from current position to next entry still in use. | ||
989 | * Recycle to start on reaching the end, returning 0 when empty. | 993 | * Recycle to start on reaching the end, returning 0 when empty. |
990 | */ | 994 | */ |
991 | static unsigned int find_next_to_unuse(struct swap_info_struct *si, | 995 | static unsigned int find_next_to_unuse(struct swap_info_struct *si, |
992 | unsigned int prev) | 996 | unsigned int prev, bool frontswap) |
993 | { | 997 | { |
994 | unsigned int max = si->max; | 998 | unsigned int max = si->max; |
995 | unsigned int i = prev; | 999 | unsigned int i = prev; |
@@ -1015,6 +1019,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, | |||
1015 | prev = 0; | 1019 | prev = 0; |
1016 | i = 1; | 1020 | i = 1; |
1017 | } | 1021 | } |
1022 | if (frontswap) { | ||
1023 | if (frontswap_test(si, i)) | ||
1024 | break; | ||
1025 | else | ||
1026 | continue; | ||
1027 | } | ||
1018 | count = si->swap_map[i]; | 1028 | count = si->swap_map[i]; |
1019 | if (count && swap_count(count) != SWAP_MAP_BAD) | 1029 | if (count && swap_count(count) != SWAP_MAP_BAD) |
1020 | break; | 1030 | break; |
@@ -1026,8 +1036,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, | |||
1026 | * We completely avoid races by reading each swap page in advance, | 1036 | * We completely avoid races by reading each swap page in advance, |
1027 | * and then search for the process using it. All the necessary | 1037 | * and then search for the process using it. All the necessary |
1028 | * page table adjustments can then be made atomically. | 1038 | * page table adjustments can then be made atomically. |
1039 | * | ||
1040 | * if the boolean frontswap is true, only unuse pages_to_unuse pages; | ||
1041 | * pages_to_unuse==0 means all pages; ignored if frontswap is false | ||
1029 | */ | 1042 | */ |
1030 | static int try_to_unuse(unsigned int type) | 1043 | int try_to_unuse(unsigned int type, bool frontswap, |
1044 | unsigned long pages_to_unuse) | ||
1031 | { | 1045 | { |
1032 | struct swap_info_struct *si = swap_info[type]; | 1046 | struct swap_info_struct *si = swap_info[type]; |
1033 | struct mm_struct *start_mm; | 1047 | struct mm_struct *start_mm; |
@@ -1060,7 +1074,7 @@ static int try_to_unuse(unsigned int type) | |||
1060 | * one pass through swap_map is enough, but not necessarily: | 1074 | * one pass through swap_map is enough, but not necessarily: |
1061 | * there are races when an instance of an entry might be missed. | 1075 | * there are races when an instance of an entry might be missed. |
1062 | */ | 1076 | */ |
1063 | while ((i = find_next_to_unuse(si, i)) != 0) { | 1077 | while ((i = find_next_to_unuse(si, i, frontswap)) != 0) { |
1064 | if (signal_pending(current)) { | 1078 | if (signal_pending(current)) { |
1065 | retval = -EINTR; | 1079 | retval = -EINTR; |
1066 | break; | 1080 | break; |
@@ -1227,6 +1241,10 @@ static int try_to_unuse(unsigned int type) | |||
1227 | * interactive performance. | 1241 | * interactive performance. |
1228 | */ | 1242 | */ |
1229 | cond_resched(); | 1243 | cond_resched(); |
1244 | if (frontswap && pages_to_unuse > 0) { | ||
1245 | if (!--pages_to_unuse) | ||
1246 | break; | ||
1247 | } | ||
1230 | } | 1248 | } |
1231 | 1249 | ||
1232 | mmput(start_mm); | 1250 | mmput(start_mm); |
@@ -1486,7 +1504,8 @@ bad_bmap: | |||
1486 | } | 1504 | } |
1487 | 1505 | ||
1488 | static void enable_swap_info(struct swap_info_struct *p, int prio, | 1506 | static void enable_swap_info(struct swap_info_struct *p, int prio, |
1489 | unsigned char *swap_map) | 1507 | unsigned char *swap_map, |
1508 | unsigned long *frontswap_map) | ||
1490 | { | 1509 | { |
1491 | int i, prev; | 1510 | int i, prev; |
1492 | 1511 | ||
@@ -1496,6 +1515,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, | |||
1496 | else | 1515 | else |
1497 | p->prio = --least_priority; | 1516 | p->prio = --least_priority; |
1498 | p->swap_map = swap_map; | 1517 | p->swap_map = swap_map; |
1518 | frontswap_map_set(p, frontswap_map); | ||
1499 | p->flags |= SWP_WRITEOK; | 1519 | p->flags |= SWP_WRITEOK; |
1500 | nr_swap_pages += p->pages; | 1520 | nr_swap_pages += p->pages; |
1501 | total_swap_pages += p->pages; | 1521 | total_swap_pages += p->pages; |
@@ -1512,6 +1532,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, | |||
1512 | swap_list.head = swap_list.next = p->type; | 1532 | swap_list.head = swap_list.next = p->type; |
1513 | else | 1533 | else |
1514 | swap_info[prev]->next = p->type; | 1534 | swap_info[prev]->next = p->type; |
1535 | frontswap_init(p->type); | ||
1515 | spin_unlock(&swap_lock); | 1536 | spin_unlock(&swap_lock); |
1516 | } | 1537 | } |
1517 | 1538 | ||
@@ -1585,7 +1606,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1585 | spin_unlock(&swap_lock); | 1606 | spin_unlock(&swap_lock); |
1586 | 1607 | ||
1587 | oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); | 1608 | oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); |
1588 | err = try_to_unuse(type); | 1609 | err = try_to_unuse(type, false, 0); /* force all pages to be unused */ |
1589 | compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); | 1610 | compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); |
1590 | 1611 | ||
1591 | if (err) { | 1612 | if (err) { |
@@ -1596,7 +1617,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1596 | * sys_swapoff for this swap_info_struct at this point. | 1617 | * sys_swapoff for this swap_info_struct at this point. |
1597 | */ | 1618 | */ |
1598 | /* re-insert swap space back into swap_list */ | 1619 | /* re-insert swap space back into swap_list */ |
1599 | enable_swap_info(p, p->prio, p->swap_map); | 1620 | enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); |
1600 | goto out_dput; | 1621 | goto out_dput; |
1601 | } | 1622 | } |
1602 | 1623 | ||
@@ -1622,9 +1643,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1622 | swap_map = p->swap_map; | 1643 | swap_map = p->swap_map; |
1623 | p->swap_map = NULL; | 1644 | p->swap_map = NULL; |
1624 | p->flags = 0; | 1645 | p->flags = 0; |
1646 | frontswap_invalidate_area(type); | ||
1625 | spin_unlock(&swap_lock); | 1647 | spin_unlock(&swap_lock); |
1626 | mutex_unlock(&swapon_mutex); | 1648 | mutex_unlock(&swapon_mutex); |
1627 | vfree(swap_map); | 1649 | vfree(swap_map); |
1650 | vfree(frontswap_map_get(p)); | ||
1628 | /* Destroy swap account informatin */ | 1651 | /* Destroy swap account informatin */ |
1629 | swap_cgroup_swapoff(type); | 1652 | swap_cgroup_swapoff(type); |
1630 | 1653 | ||
@@ -1893,24 +1916,20 @@ static unsigned long read_swap_header(struct swap_info_struct *p, | |||
1893 | 1916 | ||
1894 | /* | 1917 | /* |
1895 | * Find out how many pages are allowed for a single swap | 1918 | * Find out how many pages are allowed for a single swap |
1896 | * device. There are three limiting factors: 1) the number | 1919 | * device. There are two limiting factors: 1) the number |
1897 | * of bits for the swap offset in the swp_entry_t type, and | 1920 | * of bits for the swap offset in the swp_entry_t type, and |
1898 | * 2) the number of bits in the swap pte as defined by the | 1921 | * 2) the number of bits in the swap pte as defined by the |
1899 | * the different architectures, and 3) the number of free bits | 1922 | * different architectures. In order to find the |
1900 | * in an exceptional radix_tree entry. In order to find the | ||
1901 | * largest possible bit mask, a swap entry with swap type 0 | 1923 | * largest possible bit mask, a swap entry with swap type 0 |
1902 | * and swap offset ~0UL is created, encoded to a swap pte, | 1924 | * and swap offset ~0UL is created, encoded to a swap pte, |
1903 | * decoded to a swp_entry_t again, and finally the swap | 1925 | * decoded to a swp_entry_t again, and finally the swap |
1904 | * offset is extracted. This will mask all the bits from | 1926 | * offset is extracted. This will mask all the bits from |
1905 | * the initial ~0UL mask that can't be encoded in either | 1927 | * the initial ~0UL mask that can't be encoded in either |
1906 | * the swp_entry_t or the architecture definition of a | 1928 | * the swp_entry_t or the architecture definition of a |
1907 | * swap pte. Then the same is done for a radix_tree entry. | 1929 | * swap pte. |
1908 | */ | 1930 | */ |
1909 | maxpages = swp_offset(pte_to_swp_entry( | 1931 | maxpages = swp_offset(pte_to_swp_entry( |
1910 | swp_entry_to_pte(swp_entry(0, ~0UL)))); | 1932 | swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; |
1911 | maxpages = swp_offset(radix_to_swp_entry( | ||
1912 | swp_to_radix_entry(swp_entry(0, maxpages)))) + 1; | ||
1913 | |||
1914 | if (maxpages > swap_header->info.last_page) { | 1933 | if (maxpages > swap_header->info.last_page) { |
1915 | maxpages = swap_header->info.last_page + 1; | 1934 | maxpages = swap_header->info.last_page + 1; |
1916 | /* p->max is an unsigned int: don't overflow it */ | 1935 | /* p->max is an unsigned int: don't overflow it */ |
@@ -1988,6 +2007,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
1988 | sector_t span; | 2007 | sector_t span; |
1989 | unsigned long maxpages; | 2008 | unsigned long maxpages; |
1990 | unsigned char *swap_map = NULL; | 2009 | unsigned char *swap_map = NULL; |
2010 | unsigned long *frontswap_map = NULL; | ||
1991 | struct page *page = NULL; | 2011 | struct page *page = NULL; |
1992 | struct inode *inode = NULL; | 2012 | struct inode *inode = NULL; |
1993 | 2013 | ||
@@ -2071,6 +2091,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2071 | error = nr_extents; | 2091 | error = nr_extents; |
2072 | goto bad_swap; | 2092 | goto bad_swap; |
2073 | } | 2093 | } |
2094 | /* frontswap enabled? set up bit-per-page map for frontswap */ | ||
2095 | if (frontswap_enabled) | ||
2096 | frontswap_map = vzalloc(maxpages / sizeof(long)); | ||
2074 | 2097 | ||
2075 | if (p->bdev) { | 2098 | if (p->bdev) { |
2076 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { | 2099 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { |
@@ -2086,14 +2109,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2086 | if (swap_flags & SWAP_FLAG_PREFER) | 2109 | if (swap_flags & SWAP_FLAG_PREFER) |
2087 | prio = | 2110 | prio = |
2088 | (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; | 2111 | (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; |
2089 | enable_swap_info(p, prio, swap_map); | 2112 | enable_swap_info(p, prio, swap_map, frontswap_map); |
2090 | 2113 | ||
2091 | printk(KERN_INFO "Adding %uk swap on %s. " | 2114 | printk(KERN_INFO "Adding %uk swap on %s. " |
2092 | "Priority:%d extents:%d across:%lluk %s%s\n", | 2115 | "Priority:%d extents:%d across:%lluk %s%s%s\n", |
2093 | p->pages<<(PAGE_SHIFT-10), name, p->prio, | 2116 | p->pages<<(PAGE_SHIFT-10), name, p->prio, |
2094 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), | 2117 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), |
2095 | (p->flags & SWP_SOLIDSTATE) ? "SS" : "", | 2118 | (p->flags & SWP_SOLIDSTATE) ? "SS" : "", |
2096 | (p->flags & SWP_DISCARDABLE) ? "D" : ""); | 2119 | (p->flags & SWP_DISCARDABLE) ? "D" : "", |
2120 | (frontswap_map) ? "FS" : ""); | ||
2097 | 2121 | ||
2098 | mutex_unlock(&swapon_mutex); | 2122 | mutex_unlock(&swapon_mutex); |
2099 | atomic_inc(&proc_poll_event); | 2123 | atomic_inc(&proc_poll_event); |