aboutsummaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorDan Magenheimer <dan.magenheimer@oracle.com>2012-04-09 19:08:06 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-05-15 11:33:58 -0400
commit38b5faf4b178d5279b1fca5d7dadc68881342660 (patch)
tree1a5d10ff709abcefa96034d3f89739be8d7ef0c2 /mm/swapfile.c
parentc3ba9698152b17fdc2c7cd0f7cbeb571e3367e9d (diff)
mm: frontswap: core swap subsystem hooks and headers
This patch, 2of4, contains the changes to the core swap subsystem. This includes: (1) makes available core swap data structures (swap_lock, swap_list and swap_info) that are needed by frontswap.c but we don't need to expose them to the dozens of files that include swap.h so we create a new swapfile.h just to extern-ify these and modify their declarations to non-static (2) adds frontswap-related elements to swap_info_struct. Frontswap_map points to vzalloc'ed one-bit-per-swap-page metadata that indicates whether the swap page is in frontswap or in the device and frontswap_pages counts how many pages are in frontswap. (3) adds hooks in the swap subsystem and extends try_to_unuse so that frontswap_shrink can do a "partial swapoff". Note that a failed frontswap_map allocation is safe... failure is noted by lack of "FS" in the subsequent printk. --- [v14: rebase to 3.4-rc2] [v10: no change] [v9: akpm@linux-foundation.org: mark some statics __read_mostly] [v9: akpm@linux-foundation.org: add clarifying comments] [v9: akpm@linux-foundation.org: no need to loop repeating try_to_unuse] [v9: error27@gmail.com: remove superfluous check for NULL] [v8: rebase to 3.0-rc4] [v8: kamezawa.hiroyu@jp.fujitsu.com: change counter to atomic_t to avoid races] [v8: kamezawa.hiroyu@jp.fujitsu.com: comment to clarify informational counters] [v7: rebase to 3.0-rc3] [v7: JBeulich@novell.com: add new swap struct elements only if config'd] [v6: rebase to 3.0-rc1] [v6: lliubbo@gmail.com: fix null pointer deref if vzalloc fails] [v6: konrad.wilk@oracl.com: various checks and code clarifications/comments] [v5: no change from v4] [v4: rebase to 2.6.39] Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com> Reviewed-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Jan Beulich <JBeulich@novell.com> Acked-by: Seth Jennings <sjenning@linux.vnet.ibm.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Matthew Wilcox <matthew@wil.cx> Cc: Chris Mason <chris.mason@oracle.com> Cc: Rik Riel <riel@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> [v11: Rebased, fixed mm/swapfile.c context change] Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c54
1 files changed, 41 insertions, 13 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fafc26d1b1dc..9c7be87175c5 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,6 +31,8 @@
31#include <linux/memcontrol.h> 31#include <linux/memcontrol.h>
32#include <linux/poll.h> 32#include <linux/poll.h>
33#include <linux/oom.h> 33#include <linux/oom.h>
34#include <linux/frontswap.h>
35#include <linux/swapfile.h>
34 36
35#include <asm/pgtable.h> 37#include <asm/pgtable.h>
36#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
@@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
42static void free_swap_count_continuations(struct swap_info_struct *); 44static void free_swap_count_continuations(struct swap_info_struct *);
43static sector_t map_swap_entry(swp_entry_t, struct block_device**); 45static sector_t map_swap_entry(swp_entry_t, struct block_device**);
44 46
45static DEFINE_SPINLOCK(swap_lock); 47DEFINE_SPINLOCK(swap_lock);
46static unsigned int nr_swapfiles; 48static unsigned int nr_swapfiles;
47long nr_swap_pages; 49long nr_swap_pages;
48long total_swap_pages; 50long total_swap_pages;
@@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry ";
53static const char Bad_offset[] = "Bad swap offset entry "; 55static const char Bad_offset[] = "Bad swap offset entry ";
54static const char Unused_offset[] = "Unused swap offset entry "; 56static const char Unused_offset[] = "Unused swap offset entry ";
55 57
56static struct swap_list_t swap_list = {-1, -1}; 58struct swap_list_t swap_list = {-1, -1};
57 59
58static struct swap_info_struct *swap_info[MAX_SWAPFILES]; 60struct swap_info_struct *swap_info[MAX_SWAPFILES];
59 61
60static DEFINE_MUTEX(swapon_mutex); 62static DEFINE_MUTEX(swapon_mutex);
61 63
@@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
556 swap_list.next = p->type; 558 swap_list.next = p->type;
557 nr_swap_pages++; 559 nr_swap_pages++;
558 p->inuse_pages--; 560 p->inuse_pages--;
561 frontswap_invalidate_page(p->type, offset);
559 if ((p->flags & SWP_BLKDEV) && 562 if ((p->flags & SWP_BLKDEV) &&
560 disk->fops->swap_slot_free_notify) 563 disk->fops->swap_slot_free_notify)
561 disk->fops->swap_slot_free_notify(p->bdev, offset); 564 disk->fops->swap_slot_free_notify(p->bdev, offset);
@@ -1016,11 +1019,12 @@ static int unuse_mm(struct mm_struct *mm,
1016} 1019}
1017 1020
1018/* 1021/*
1019 * Scan swap_map from current position to next entry still in use. 1022 * Scan swap_map (or frontswap_map if frontswap parameter is true)
1023 * from current position to next entry still in use.
1020 * Recycle to start on reaching the end, returning 0 when empty. 1024 * Recycle to start on reaching the end, returning 0 when empty.
1021 */ 1025 */
1022static unsigned int find_next_to_unuse(struct swap_info_struct *si, 1026static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1023 unsigned int prev) 1027 unsigned int prev, bool frontswap)
1024{ 1028{
1025 unsigned int max = si->max; 1029 unsigned int max = si->max;
1026 unsigned int i = prev; 1030 unsigned int i = prev;
@@ -1046,6 +1050,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1046 prev = 0; 1050 prev = 0;
1047 i = 1; 1051 i = 1;
1048 } 1052 }
1053 if (frontswap) {
1054 if (frontswap_test(si, i))
1055 break;
1056 else
1057 continue;
1058 }
1049 count = si->swap_map[i]; 1059 count = si->swap_map[i];
1050 if (count && swap_count(count) != SWAP_MAP_BAD) 1060 if (count && swap_count(count) != SWAP_MAP_BAD)
1051 break; 1061 break;
@@ -1057,8 +1067,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1057 * We completely avoid races by reading each swap page in advance, 1067 * We completely avoid races by reading each swap page in advance,
1058 * and then search for the process using it. All the necessary 1068 * and then search for the process using it. All the necessary
1059 * page table adjustments can then be made atomically. 1069 * page table adjustments can then be made atomically.
1070 *
1071 * if the boolean frontswap is true, only unuse pages_to_unuse pages;
1072 * pages_to_unuse==0 means all pages; ignored if frontswap is false
1060 */ 1073 */
1061static int try_to_unuse(unsigned int type) 1074int try_to_unuse(unsigned int type, bool frontswap,
1075 unsigned long pages_to_unuse)
1062{ 1076{
1063 struct swap_info_struct *si = swap_info[type]; 1077 struct swap_info_struct *si = swap_info[type];
1064 struct mm_struct *start_mm; 1078 struct mm_struct *start_mm;
@@ -1091,7 +1105,7 @@ static int try_to_unuse(unsigned int type)
1091 * one pass through swap_map is enough, but not necessarily: 1105 * one pass through swap_map is enough, but not necessarily:
1092 * there are races when an instance of an entry might be missed. 1106 * there are races when an instance of an entry might be missed.
1093 */ 1107 */
1094 while ((i = find_next_to_unuse(si, i)) != 0) { 1108 while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
1095 if (signal_pending(current)) { 1109 if (signal_pending(current)) {
1096 retval = -EINTR; 1110 retval = -EINTR;
1097 break; 1111 break;
@@ -1258,6 +1272,10 @@ static int try_to_unuse(unsigned int type)
1258 * interactive performance. 1272 * interactive performance.
1259 */ 1273 */
1260 cond_resched(); 1274 cond_resched();
1275 if (frontswap && pages_to_unuse > 0) {
1276 if (!--pages_to_unuse)
1277 break;
1278 }
1261 } 1279 }
1262 1280
1263 mmput(start_mm); 1281 mmput(start_mm);
@@ -1517,7 +1535,8 @@ bad_bmap:
1517} 1535}
1518 1536
1519static void enable_swap_info(struct swap_info_struct *p, int prio, 1537static void enable_swap_info(struct swap_info_struct *p, int prio,
1520 unsigned char *swap_map) 1538 unsigned char *swap_map,
1539 unsigned long *frontswap_map)
1521{ 1540{
1522 int i, prev; 1541 int i, prev;
1523 1542
@@ -1527,6 +1546,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1527 else 1546 else
1528 p->prio = --least_priority; 1547 p->prio = --least_priority;
1529 p->swap_map = swap_map; 1548 p->swap_map = swap_map;
1549 frontswap_map_set(p, frontswap_map);
1530 p->flags |= SWP_WRITEOK; 1550 p->flags |= SWP_WRITEOK;
1531 nr_swap_pages += p->pages; 1551 nr_swap_pages += p->pages;
1532 total_swap_pages += p->pages; 1552 total_swap_pages += p->pages;
@@ -1543,6 +1563,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1543 swap_list.head = swap_list.next = p->type; 1563 swap_list.head = swap_list.next = p->type;
1544 else 1564 else
1545 swap_info[prev]->next = p->type; 1565 swap_info[prev]->next = p->type;
1566 frontswap_init(p->type);
1546 spin_unlock(&swap_lock); 1567 spin_unlock(&swap_lock);
1547} 1568}
1548 1569
@@ -1616,7 +1637,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1616 spin_unlock(&swap_lock); 1637 spin_unlock(&swap_lock);
1617 1638
1618 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); 1639 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1619 err = try_to_unuse(type); 1640 err = try_to_unuse(type, false, 0); /* force all pages to be unused */
1620 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); 1641 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj);
1621 1642
1622 if (err) { 1643 if (err) {
@@ -1627,7 +1648,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1627 * sys_swapoff for this swap_info_struct at this point. 1648 * sys_swapoff for this swap_info_struct at this point.
1628 */ 1649 */
1629 /* re-insert swap space back into swap_list */ 1650 /* re-insert swap space back into swap_list */
1630 enable_swap_info(p, p->prio, p->swap_map); 1651 enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p));
1631 goto out_dput; 1652 goto out_dput;
1632 } 1653 }
1633 1654
@@ -1653,9 +1674,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1653 swap_map = p->swap_map; 1674 swap_map = p->swap_map;
1654 p->swap_map = NULL; 1675 p->swap_map = NULL;
1655 p->flags = 0; 1676 p->flags = 0;
1677 frontswap_invalidate_area(type);
1656 spin_unlock(&swap_lock); 1678 spin_unlock(&swap_lock);
1657 mutex_unlock(&swapon_mutex); 1679 mutex_unlock(&swapon_mutex);
1658 vfree(swap_map); 1680 vfree(swap_map);
1681 vfree(frontswap_map_get(p));
1659 /* Destroy swap account informatin */ 1682 /* Destroy swap account informatin */
1660 swap_cgroup_swapoff(type); 1683 swap_cgroup_swapoff(type);
1661 1684
@@ -2019,6 +2042,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2019 sector_t span; 2042 sector_t span;
2020 unsigned long maxpages; 2043 unsigned long maxpages;
2021 unsigned char *swap_map = NULL; 2044 unsigned char *swap_map = NULL;
2045 unsigned long *frontswap_map = NULL;
2022 struct page *page = NULL; 2046 struct page *page = NULL;
2023 struct inode *inode = NULL; 2047 struct inode *inode = NULL;
2024 2048
@@ -2102,6 +2126,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2102 error = nr_extents; 2126 error = nr_extents;
2103 goto bad_swap; 2127 goto bad_swap;
2104 } 2128 }
2129 /* frontswap enabled? set up bit-per-page map for frontswap */
2130 if (frontswap_enabled)
2131 frontswap_map = vzalloc(maxpages / sizeof(long));
2105 2132
2106 if (p->bdev) { 2133 if (p->bdev) {
2107 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { 2134 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
@@ -2117,14 +2144,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2117 if (swap_flags & SWAP_FLAG_PREFER) 2144 if (swap_flags & SWAP_FLAG_PREFER)
2118 prio = 2145 prio =
2119 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; 2146 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
2120 enable_swap_info(p, prio, swap_map); 2147 enable_swap_info(p, prio, swap_map, frontswap_map);
2121 2148
2122 printk(KERN_INFO "Adding %uk swap on %s. " 2149 printk(KERN_INFO "Adding %uk swap on %s. "
2123 "Priority:%d extents:%d across:%lluk %s%s\n", 2150 "Priority:%d extents:%d across:%lluk %s%s%s\n",
2124 p->pages<<(PAGE_SHIFT-10), name, p->prio, 2151 p->pages<<(PAGE_SHIFT-10), name, p->prio,
2125 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), 2152 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
2126 (p->flags & SWP_SOLIDSTATE) ? "SS" : "", 2153 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
2127 (p->flags & SWP_DISCARDABLE) ? "D" : ""); 2154 (p->flags & SWP_DISCARDABLE) ? "D" : "",
2155 (frontswap_map) ? "FS" : "");
2128 2156
2129 mutex_unlock(&swapon_mutex); 2157 mutex_unlock(&swapon_mutex);
2130 atomic_inc(&proc_poll_event); 2158 atomic_inc(&proc_poll_event);