aboutsummaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-06-04 15:28:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-06-04 15:28:45 -0400
commita3fe778c7895cd847d23c25ad566d83346282a77 (patch)
tree53ba1b736dac08680a08753f7163a1890d7a0d03 /mm/swapfile.c
parent9171c670b4915e30360c2aed530b8377fbbcc852 (diff)
parent165c8aed5bbc6bdddbccae0ba9db451732558ff9 (diff)
Merge tag 'stable/frontswap.v16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/mm
Pull frontswap feature from Konrad Rzeszutek Wilk: "Frontswap provides a "transcendent memory" interface for swap pages. In some environments, dramatic performance savings may be obtained because swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk. This tag provides the basic infrastructure along with some changes to the existing backends." Fix up trivial conflict in mm/Makefile due to removal of swap token code changing a line next to the new frontswap entry. This pull request came in before the merge window even opened, it got delayed to after the merge window by me just wanting to make sure it had actual users. Apparently IBM is using this on their embedded side, and Jan Beulich says that it's already made available for SLES and OpenSUSE users. Also acked by Rik van Riel, and Konrad points to other people liking it too. So in it goes. By Dan Magenheimer (4) and Konrad Rzeszutek Wilk (2) via Konrad Rzeszutek Wilk * tag 'stable/frontswap.v16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/mm: frontswap: s/put_page/store/g s/get_page/load MAINTAINER: Add myself for the frontswap API mm: frontswap: config and doc files mm: frontswap: core frontswap functionality mm: frontswap: core swap subsystem hooks and headers mm: frontswap: add frontswap header file
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c54
1 files changed, 41 insertions, 13 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 457b10baef59..de5bc51c4a66 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,6 +31,8 @@
31#include <linux/memcontrol.h> 31#include <linux/memcontrol.h>
32#include <linux/poll.h> 32#include <linux/poll.h>
33#include <linux/oom.h> 33#include <linux/oom.h>
34#include <linux/frontswap.h>
35#include <linux/swapfile.h>
34 36
35#include <asm/pgtable.h> 37#include <asm/pgtable.h>
36#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
@@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
42static void free_swap_count_continuations(struct swap_info_struct *); 44static void free_swap_count_continuations(struct swap_info_struct *);
43static sector_t map_swap_entry(swp_entry_t, struct block_device**); 45static sector_t map_swap_entry(swp_entry_t, struct block_device**);
44 46
45static DEFINE_SPINLOCK(swap_lock); 47DEFINE_SPINLOCK(swap_lock);
46static unsigned int nr_swapfiles; 48static unsigned int nr_swapfiles;
47long nr_swap_pages; 49long nr_swap_pages;
48long total_swap_pages; 50long total_swap_pages;
@@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry ";
53static const char Bad_offset[] = "Bad swap offset entry "; 55static const char Bad_offset[] = "Bad swap offset entry ";
54static const char Unused_offset[] = "Unused swap offset entry "; 56static const char Unused_offset[] = "Unused swap offset entry ";
55 57
56static struct swap_list_t swap_list = {-1, -1}; 58struct swap_list_t swap_list = {-1, -1};
57 59
58static struct swap_info_struct *swap_info[MAX_SWAPFILES]; 60struct swap_info_struct *swap_info[MAX_SWAPFILES];
59 61
60static DEFINE_MUTEX(swapon_mutex); 62static DEFINE_MUTEX(swapon_mutex);
61 63
@@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
556 swap_list.next = p->type; 558 swap_list.next = p->type;
557 nr_swap_pages++; 559 nr_swap_pages++;
558 p->inuse_pages--; 560 p->inuse_pages--;
561 frontswap_invalidate_page(p->type, offset);
559 if ((p->flags & SWP_BLKDEV) && 562 if ((p->flags & SWP_BLKDEV) &&
560 disk->fops->swap_slot_free_notify) 563 disk->fops->swap_slot_free_notify)
561 disk->fops->swap_slot_free_notify(p->bdev, offset); 564 disk->fops->swap_slot_free_notify(p->bdev, offset);
@@ -985,11 +988,12 @@ static int unuse_mm(struct mm_struct *mm,
985} 988}
986 989
987/* 990/*
988 * Scan swap_map from current position to next entry still in use. 991 * Scan swap_map (or frontswap_map if frontswap parameter is true)
992 * from current position to next entry still in use.
989 * Recycle to start on reaching the end, returning 0 when empty. 993 * Recycle to start on reaching the end, returning 0 when empty.
990 */ 994 */
991static unsigned int find_next_to_unuse(struct swap_info_struct *si, 995static unsigned int find_next_to_unuse(struct swap_info_struct *si,
992 unsigned int prev) 996 unsigned int prev, bool frontswap)
993{ 997{
994 unsigned int max = si->max; 998 unsigned int max = si->max;
995 unsigned int i = prev; 999 unsigned int i = prev;
@@ -1015,6 +1019,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1015 prev = 0; 1019 prev = 0;
1016 i = 1; 1020 i = 1;
1017 } 1021 }
1022 if (frontswap) {
1023 if (frontswap_test(si, i))
1024 break;
1025 else
1026 continue;
1027 }
1018 count = si->swap_map[i]; 1028 count = si->swap_map[i];
1019 if (count && swap_count(count) != SWAP_MAP_BAD) 1029 if (count && swap_count(count) != SWAP_MAP_BAD)
1020 break; 1030 break;
@@ -1026,8 +1036,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1026 * We completely avoid races by reading each swap page in advance, 1036 * We completely avoid races by reading each swap page in advance,
1027 * and then search for the process using it. All the necessary 1037 * and then search for the process using it. All the necessary
1028 * page table adjustments can then be made atomically. 1038 * page table adjustments can then be made atomically.
1039 *
1040 * if the boolean frontswap is true, only unuse pages_to_unuse pages;
1041 * pages_to_unuse==0 means all pages; ignored if frontswap is false
1029 */ 1042 */
1030static int try_to_unuse(unsigned int type) 1043int try_to_unuse(unsigned int type, bool frontswap,
1044 unsigned long pages_to_unuse)
1031{ 1045{
1032 struct swap_info_struct *si = swap_info[type]; 1046 struct swap_info_struct *si = swap_info[type];
1033 struct mm_struct *start_mm; 1047 struct mm_struct *start_mm;
@@ -1060,7 +1074,7 @@ static int try_to_unuse(unsigned int type)
1060 * one pass through swap_map is enough, but not necessarily: 1074 * one pass through swap_map is enough, but not necessarily:
1061 * there are races when an instance of an entry might be missed. 1075 * there are races when an instance of an entry might be missed.
1062 */ 1076 */
1063 while ((i = find_next_to_unuse(si, i)) != 0) { 1077 while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
1064 if (signal_pending(current)) { 1078 if (signal_pending(current)) {
1065 retval = -EINTR; 1079 retval = -EINTR;
1066 break; 1080 break;
@@ -1227,6 +1241,10 @@ static int try_to_unuse(unsigned int type)
1227 * interactive performance. 1241 * interactive performance.
1228 */ 1242 */
1229 cond_resched(); 1243 cond_resched();
1244 if (frontswap && pages_to_unuse > 0) {
1245 if (!--pages_to_unuse)
1246 break;
1247 }
1230 } 1248 }
1231 1249
1232 mmput(start_mm); 1250 mmput(start_mm);
@@ -1486,7 +1504,8 @@ bad_bmap:
1486} 1504}
1487 1505
1488static void enable_swap_info(struct swap_info_struct *p, int prio, 1506static void enable_swap_info(struct swap_info_struct *p, int prio,
1489 unsigned char *swap_map) 1507 unsigned char *swap_map,
1508 unsigned long *frontswap_map)
1490{ 1509{
1491 int i, prev; 1510 int i, prev;
1492 1511
@@ -1496,6 +1515,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1496 else 1515 else
1497 p->prio = --least_priority; 1516 p->prio = --least_priority;
1498 p->swap_map = swap_map; 1517 p->swap_map = swap_map;
1518 frontswap_map_set(p, frontswap_map);
1499 p->flags |= SWP_WRITEOK; 1519 p->flags |= SWP_WRITEOK;
1500 nr_swap_pages += p->pages; 1520 nr_swap_pages += p->pages;
1501 total_swap_pages += p->pages; 1521 total_swap_pages += p->pages;
@@ -1512,6 +1532,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1512 swap_list.head = swap_list.next = p->type; 1532 swap_list.head = swap_list.next = p->type;
1513 else 1533 else
1514 swap_info[prev]->next = p->type; 1534 swap_info[prev]->next = p->type;
1535 frontswap_init(p->type);
1515 spin_unlock(&swap_lock); 1536 spin_unlock(&swap_lock);
1516} 1537}
1517 1538
@@ -1585,7 +1606,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1585 spin_unlock(&swap_lock); 1606 spin_unlock(&swap_lock);
1586 1607
1587 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); 1608 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1588 err = try_to_unuse(type); 1609 err = try_to_unuse(type, false, 0); /* force all pages to be unused */
1589 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); 1610 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj);
1590 1611
1591 if (err) { 1612 if (err) {
@@ -1596,7 +1617,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1596 * sys_swapoff for this swap_info_struct at this point. 1617 * sys_swapoff for this swap_info_struct at this point.
1597 */ 1618 */
1598 /* re-insert swap space back into swap_list */ 1619 /* re-insert swap space back into swap_list */
1599 enable_swap_info(p, p->prio, p->swap_map); 1620 enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p));
1600 goto out_dput; 1621 goto out_dput;
1601 } 1622 }
1602 1623
@@ -1622,9 +1643,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1622 swap_map = p->swap_map; 1643 swap_map = p->swap_map;
1623 p->swap_map = NULL; 1644 p->swap_map = NULL;
1624 p->flags = 0; 1645 p->flags = 0;
1646 frontswap_invalidate_area(type);
1625 spin_unlock(&swap_lock); 1647 spin_unlock(&swap_lock);
1626 mutex_unlock(&swapon_mutex); 1648 mutex_unlock(&swapon_mutex);
1627 vfree(swap_map); 1649 vfree(swap_map);
1650 vfree(frontswap_map_get(p));
1628 /* Destroy swap account informatin */ 1651 /* Destroy swap account informatin */
1629 swap_cgroup_swapoff(type); 1652 swap_cgroup_swapoff(type);
1630 1653
@@ -1988,6 +2011,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1988 sector_t span; 2011 sector_t span;
1989 unsigned long maxpages; 2012 unsigned long maxpages;
1990 unsigned char *swap_map = NULL; 2013 unsigned char *swap_map = NULL;
2014 unsigned long *frontswap_map = NULL;
1991 struct page *page = NULL; 2015 struct page *page = NULL;
1992 struct inode *inode = NULL; 2016 struct inode *inode = NULL;
1993 2017
@@ -2071,6 +2095,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2071 error = nr_extents; 2095 error = nr_extents;
2072 goto bad_swap; 2096 goto bad_swap;
2073 } 2097 }
2098 /* frontswap enabled? set up bit-per-page map for frontswap */
2099 if (frontswap_enabled)
2100 frontswap_map = vzalloc(maxpages / sizeof(long));
2074 2101
2075 if (p->bdev) { 2102 if (p->bdev) {
2076 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { 2103 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
@@ -2086,14 +2113,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2086 if (swap_flags & SWAP_FLAG_PREFER) 2113 if (swap_flags & SWAP_FLAG_PREFER)
2087 prio = 2114 prio =
2088 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; 2115 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
2089 enable_swap_info(p, prio, swap_map); 2116 enable_swap_info(p, prio, swap_map, frontswap_map);
2090 2117
2091 printk(KERN_INFO "Adding %uk swap on %s. " 2118 printk(KERN_INFO "Adding %uk swap on %s. "
2092 "Priority:%d extents:%d across:%lluk %s%s\n", 2119 "Priority:%d extents:%d across:%lluk %s%s%s\n",
2093 p->pages<<(PAGE_SHIFT-10), name, p->prio, 2120 p->pages<<(PAGE_SHIFT-10), name, p->prio,
2094 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), 2121 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
2095 (p->flags & SWP_SOLIDSTATE) ? "SS" : "", 2122 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
2096 (p->flags & SWP_DISCARDABLE) ? "D" : ""); 2123 (p->flags & SWP_DISCARDABLE) ? "D" : "",
2124 (frontswap_map) ? "FS" : "");
2097 2125
2098 mutex_unlock(&swapon_mutex); 2126 mutex_unlock(&swapon_mutex);
2099 atomic_inc(&proc_poll_event); 2127 atomic_inc(&proc_poll_event);